diff --git a/data/msp_file_extraction.py b/data/msp_file_extraction.py index c27be7ae13b66ef4279a724c13a526656ba1a1de..3dca185dd8673f9de1930992b2a0da9ec2e327bc 100644 --- a/data/msp_file_extraction.py +++ b/data/msp_file_extraction.py @@ -66,6 +66,7 @@ if __name__ == '__main__': seq.append(s) df = pd.DataFrame(seq,columns=['sequence']) + df = df.drop_duplicates() df['irt_scaled']=0 df['state'] = 'holdout' df.to_csv('spectral_lib/df_predicted_library_oktoberfest.csv',index=False) @@ -77,6 +78,7 @@ if __name__ == '__main__': predicted_lib['seq'] = predicted_lib['seq'].map(numerical_to_alphabetical_str) predicted_lib['sequence']=predicted_lib['seq'] + pred_rt=predicted_lib['rt pred'] df_joined = pd.merge(df,predicted_lib[['rt pred','sequence']],on='sequence',how='left')