diff --git a/data/msp_file_extraction.py b/data/msp_file_extraction.py index ebd4b9ded7b0682e346372e183ea1a11d54a87d1..631c93358322424376144df5acc057cab3aaa4f7 100644 --- a/data/msp_file_extraction.py +++ b/data/msp_file_extraction.py @@ -60,7 +60,7 @@ if __name__ == '__main__': if 'Name:'in content[i]: s=content[i].split(':')[1].split('/')[0] - if 'C' in s : + if 'C' in s or len(s)>30: remove=True else : seq.append(s) @@ -69,17 +69,17 @@ if __name__ == '__main__': df['irt_scaled']=0 df['state'] = 'holdout' df.to_csv('spectral_lib/df_predicted_library_oktoberfest.csv',index=False) - - updated_content=[] - ind=0 - predicted_lib=pd.read_csv('../output/out_lib_oktoberfest.csv') - - predicted_lib['seq'] = predicted_lib['seq'].map(numerical_to_alphabetical_str) - - predicted_lib['sequence']=predicted_lib['seq'] - pred_rt=predicted_lib['rt pred'] - - df_joined = pd.merge(df,predicted_lib[['rt pred','sequence']],on='sequence',how='left') + # + # updated_content=[] + # ind=0 + # predicted_lib=pd.read_csv('../output/out_lib_oktoberfest.csv') + # + # predicted_lib['seq'] = predicted_lib['seq'].map(numerical_to_alphabetical_str) + # + # predicted_lib['sequence']=predicted_lib['seq'] + # pred_rt=predicted_lib['rt pred'] + # + # df_joined = pd.merge(df,predicted_lib[['rt pred','sequence']],on='sequence',how='left') #1787661 avec C , 15104040 sans \ No newline at end of file