mongodb - How could I remove duplicated data without id -
i need import lot of data mongodb.
but many data may duplicated, how remove duplicated data in mongodb elegant way.
it's ugly , inefficient check collection.find({all_the_fields}).exist?
before importing new record.
to index
inefficient because need indexing hundreds fields.
i want import data , remove duplicated records in collection later.
how ? thanks
the following 1 document sample,
i need import millions of sort of documents, , there many duplicated documents. need find out way remove duplicated documents
{ "_id": objectid("5550649669702d1901070000"), "report_type": "com_disagg", "item": "wheat", "exchange": "chicago board of trade", "product_exchange": "wheat - chicago board of trade", "date": new date("2009-12-08t08:00:00+0800"), "timestamp_utc": numberlong(1260230400000), "updated_at": new date(1431331990505), "created_at": new date(1431331990498), "symbol_code_id": objectid("5550645169702d1864020000"), "symbol": "w", "cftc_contract_market_code": "001602", "cftc_market_code": "cbt", "cftc_region_code": "00", "cftc_commodity_code": "001", "open_interest_all": "421790.0", "prod_merc_positions_long_all": "34399.0", "prod_merc_positions_short_all": "176446.0", "swap_positions_long_all": "167220.0", "swap__positions_short_all": "18118.0", "swap__positions_spread_all": "11837.0", "m_money_positions_long_all": "61511.0", "m_money_positions_short_all": "42756.0", "m_money_positions_spread_all": "34393.0", "other_rept_positions_long_all": "14072.0", "other_rept_positions_short_all": "21546.0", "other_rept_positions_spread_all": "65296.0", "tot_rept_positions_long_all": "388728.0", "tot_rept_positions_short_all": "370392.0", "nonrept_positions_long_all": "33062.0", "nonrept_positions_short_all": "51398.0", "open_interest_old": "281017.0", "prod_merc_positions_long_old": "16525.0", "prod_merc_positions_short_old": "122260.0", "swap_positions_long_old": "117778.0", "swap_positions_short_old": "5864.0", "swap_positions_spread_old": "4270.0", "m_money_positions_long_old": "54489.0", "m_money_positions_short_old": "44462.0", "m_money_positions_spread_old": "10319.0", "other_rept_positions_long_old": "9407.0", "other_rept_positions_short_old": "15516.0", "other_rept_positions_spread_old": "47020.0", "tot_rept_positions_long_old": "259809.0", "tot_rept_positions_short_old": "249712.0", "nonrept_positions_long_old": "21208.0", "nonrept_positions_short_old": "31304.0", "open_interest_other": "140773.0", "prod_merc_positions_long_other": "17875.0", "prod_merc_positions_short_other": "54186.0", "swap_positions_long_other": "51313.0", "swap_positions_short_other": "14125.0", "swap_positions_spread_other": "5696.0", "m_money_positions_long_other": "21387.0", "m_money_positions_short_other": "12659.0", "m_money_positions_spread_other": "9708.0", "other_rept_positions_long_other": "7275.0", "other_rept_positions_short_other": "8640.0", "other_rept_positions_spread_othr": "15665.0", "tot_rept_positions_long_other": "128919.0", "tot_rept_positions_short_other": "120679.0", "nonrept_positions_long_other": "11855.0", "nonrept_positions_short_other": "20094.0", "change_in_open_interest_all": "88.0", "change_in_prod_merc_long_all": "-287.0", "change_in_prod_merc_short_all": "-4032.0", "change_in_swap_long_all": "-768.0", "change_in_swap_short_all": "1679.0", "change_in_swap_spread_all": "268.0", "change_in_m_money_long_all": "-1992.0", "change_in_m_money_short_all": "2243.0", "change_in_m_money_spread_all": "45.0", "change_in_other_rept_long_all": "1475.0", "change_in_other_rept_short_all": "-4019.0", "change_in_other_rept_spread_all": "1368.0", "change_in_tot_rept_long_all": "110.0", "change_in_tot_rept_short_all": "-2449.0", "change_in_nonrept_long_all": "-22.0", "change_in_nonrept_short_all": "2537.0", "pct_of_open_interest_all": "100.0", "pct_of_oi_prod_merc_long_all": "8.2", "pct_of_oi_prod_merc_short_all": "41.8", "pct_of_oi_swap_long_all": "39.6", "pct_of_oi_swap_short_all": "4.3", "pct_of_oi_swap_spread_all": "2.8", "pct_of_oi_m_money_long_all": "14.6", "pct_of_oi_m_money_short_all": "10.1", "pct_of_oi_m_money_spread_all": "8.2", "pct_of_oi_other_rept_long_all": "3.3", "pct_of_oi_other_rept_short_all": "5.1", "pct_of_oi_other_rept_spread_all": "15.5", "pct_of_oi_tot_rept_long_all": "92.2", "pct_of_oi_tot_rept_short_all": "87.8", "pct_of_oi_nonrept_long_all": "7.8", "pct_of_oi_nonrept_short_all": "12.2", "pct_of_open_interest_old": "100.0", "pct_of_oi_prod_merc_long_old": "5.9", "pct_of_oi_prod_merc_short_old": "43.5", "pct_of_oi_swap_long_old": "41.9", "pct_of_oi_swap_short_old": "2.1", "pct_of_oi_swap_spread_old": "1.5", "pct_of_oi_m_money_long_old": "19.4", "pct_of_oi_m_money_short_old": "15.8", "pct_of_oi_m_money_spread_old": "3.7", "pct_of_oi_other_rept_long_old": "3.3", "pct_of_oi_other_rept_short_old": "5.5", "pct_of_oi_other_rept_spread_old": "16.7", "pct_of_oi_tot_rept_long_old": "92.5", "pct_of_oi_tot_rept_short_old": "88.9", "pct_of_oi_nonrept_long_old": "7.5", "pct_of_oi_nonrept_short_old": "11.1", "pct_of_open_interest_other": "100.0", "pct_of_oi_prod_merc_long_other": "12.7", "pct_of_oi_prod_merc_short_other": "38.5", "pct_of_oi_swap_long_other": "36.5", "pct_of_oi_swap_short_other": "10.0", "pct_of_oi_swap_spread_other": "4.0", "pct_of_oi_m_money_long_other": "15.2", "pct_of_oi_m_money_short_other": "9.0", "pct_of_oi_m_money_spread_other": "6.9", "pct_of_oi_other_rept_long_other": "5.2", "pct_of_oi_other_rept_short_other": "6.1", "pct_of_oi_other_rept_spread_othr": "11.1", "pct_of_oi_tot_rept_long_other": "91.6", "pct_of_oi_tot_rept_short_other": "85.7", "pct_of_oi_nonrept_long_other": "8.4", "pct_of_oi_nonrept_short_other": "14.3", "traders_tot_all": "359.0", "traders_prod_merc_long_all": "48.0", "traders_prod_merc_short_all": "76.0", "traders_swap_long_all": "18.0", "traders_swap_short_all": "9.0", "traders_swap_spread_all": "21.0", "traders_m_money_long_all": "62.0", "traders_m_money_short_all": "60.0", "traders_m_money_spread_all": "45.0", "traders_other_rept_long_all": "59.0", "traders_other_rept_short_all": "61.0", "traders_other_rept_spread_all": "74.0", "traders_tot_rept_long_all": "263.0", "traders_tot_rept_short_all": "276.0", "traders_tot_old": "337.0", "traders_prod_merc_long_old": "40.0", "traders_prod_merc_short_old": "72.0", "traders_swap_long_old": "19.0", "traders_swap_short_old": "8.0", "traders_swap_spread_old": "13.0", "traders_m_money_long_old": "59.0", "traders_m_money_short_old": "56.0", "traders_m_money_spread_old": "34.0", "traders_other_rept_long_old": "50.0", "traders_other_rept_short_old": "65.0", "traders_other_rept_spread_old": "61.0", "traders_tot_rept_long_old": "224.0", "traders_tot_rept_short_old": "253.0", "traders_tot_other": "206.0", "traders_prod_merc_long_other": "25.0", "traders_prod_merc_short_other": "62.0", "traders_swap_long_other": "11.0", "traders_swap_short_other": "11.0", "traders_swap_spread_other": "17.0", "traders_m_money_long_other": "18.0", "traders_m_money_short_other": "22.0", "traders_m_money_spread_other": "16.0", "traders_other_rept_long_other": "37.0", "traders_other_rept_short_other": "37.0", "traders_other_rept_spread_other": "46.0", "traders_tot_rept_long_other": "133.0", "traders_tot_rept_short_other": "171.0", "conc_gross_le_4_tdr_long_all": "21.3", "conc_gross_le_4_tdr_short_all": "21.3", "conc_gross_le_8_tdr_long_all": "35.4", "conc_gross_le_8_tdr_short_all": "30.0", "conc_net_le_4_tdr_long_all": "20.5", "conc_net_le_4_tdr_short_all": "18.1", "conc_net_le_8_tdr_long_all": "34.2", "conc_net_le_8_tdr_short_all": "24.6", "conc_gross_le_4_tdr_long_old": "27.3", "conc_gross_le_4_tdr_short_old": "22.1", "conc_gross_le_8_tdr_long_old": "39.6", "conc_gross_le_8_tdr_short_old": "31.4", "conc_net_le_4_tdr_long_old": "27.2", "conc_net_le_4_tdr_short_old": "20.0", "conc_net_le_8_tdr_long_old": "38.9", "conc_net_le_8_tdr_short_old": "27.9", "conc_gross_le_4_tdr_long_other": "37.4", "conc_gross_le_4_tdr_short_other": "27.6", "conc_gross_le_8_tdr_long_other": "49.2", "conc_gross_le_8_tdr_short_other": "37.0", "conc_net_le_4_tdr_long_other": "35.5", "conc_net_le_4_tdr_short_other": "22.8", "conc_net_le_8_tdr_long_other": "42.6", "conc_net_le_8_tdr_short_other": "29.0", "contract_units": "(contracts of 5,000 bushels)", "cftc_subgroup_code": "a10", "futonly_or_combined": "combined" }
Comments
Post a Comment