import csv import pandas as pd ################# # statistics_meta ################# statistics_meta_archive_file = "statistics_meta.csv" statistics_meta_export_file = "statistics_meta-export.csv" # read in current export, and the archive meta_df = pd.read_csv(statistics_meta_export_file) meta_archive_df = pd.read_csv(statistics_meta_archive_file) # find the id's and the unique statistics from each meta_df = meta_df[['id','statistic_id']] meta_archive_df = meta_archive_df[['id','statistic_id']] # only keeping statistics from the current system, # form a lookup dictionary so the archive can be corrected # using id from the current system export. meta_lookup = meta_df.merge(meta_archive_df, on=['statistic_id'], how='left', indicator=True) meta_lookup.set_index('id_x').to_csv("meta_merge.csv") meta_lookup = meta_lookup[['id_y','id_x']] meta_lookup = meta_lookup.T.to_dict('records')[0] ############ # statistics ############ statistics_archive_file = "statistics.csv" statistics_export_file = "statistics-export.csv" statistics_import_file = "statistics-import.csv" statistics_df = pd.read_csv(statistics_export_file, index_col='id') statistics_archive_df = pd.read_csv(statistics_archive_file, index_col='id') # make unique indexes statistics_max_id = statistics_df.last_valid_index() statistics_df.reset_index(inplace=True) statistics_df['id'] += statistics_max_id statistics_df.set_index('id',drop=True,inplace=True) # find any duplicates where tuple (start_ts,metadata_id) # exist in export and archive, drop the archive # read in current export, and the archive print(statistics_df.info()) statistics_df['unique_tuple'] = statistics_df.apply(lambda row: (row['start_ts'],row['metadata_id']), axis=1) statistics_archive_df['unique_tuple'] = statistics_archive_df.apply(lambda row: (row['start_ts'],row['metadata_id']), axis=1) statistics_df_copy = statistics_df.copy() statistics_df_copy = statistics_df_copy[['start_ts','metadata_id','unique_tuple']] statistics_archive_df = statistics_archive_df[['start_ts','metadata_id','unique_tuple']] unique_lookup = statistics_df_copy.merge(statistics_archive_df, on=['unique_tuple'], how='left', indicator=True) unique_lookup = unique_lookup[unique_lookup['_merge']=="both"] unique_lookup.to_csv("unique_merge.csv") unique_tuples = unique_lookup['unique_tuple'] statistics_df = statistics_df[~statistics_df['unique_tuple'].isin(unique_tuples)] statistics_df.drop(columns='unique_tuple',inplace=True) print(statistics_df.info()) # drop any statistics not in the existing systems metadata statistics_df = statistics_df[statistics_df['metadata_id'].isin(meta_lookup.keys())] # correct the meta column statistics_df.replace({'metadata_id': meta_lookup}, inplace=True) ####################### # statistics_short_term ####################### statistics_short_term_archive_file = "statistics_short_term.csv" statistics_short_term_export_file = "statistics_short_term-export.csv" statistics_short_term_import_file = "statistics_short_term-import.csv" statistics_short_term_df = pd.read_csv(statistics_short_term_export_file, index_col='id') # OBEY UNIQUE HERE TOO!!!!! # drop any statistics not in the existing systems metadata statistics_short_term_df = statistics_short_term_df[statistics_short_term_df['metadata_id'].isin(meta_lookup.keys())] # correct the meta column statistics_short_term_df.replace({'metadata_id': meta_lookup}, inplace=True) ########################### # write files for importing ########################### statistics_df.to_csv(statistics_import_file) statistics_short_term_df.to_csv(statistics_short_term_import_file)