diff --git a/data/msp_file_extraction.py b/data/msp_file_extraction.py index 64ade727c65783677d47e14e6c1be0f6c21743ec..46188a2d71f6a333c6e8fb79ca3fd8123936cedf 100644 --- a/data/msp_file_extraction.py +++ b/data/msp_file_extraction.py @@ -6,11 +6,25 @@ if __name__ == '__main__': file = open("spectral_lib/predicted_library.msp", "r") content=file.readlines() file.close() - for l in content : - if 'Name:'in l: - seq.append(l.split(':')[1].split('/')[0]) + remove = False + index_to_remove=[] + for i in range(len(content)) : + if remove: + if 'Name:' in content[i]: + remove = False + else : + index_to_remove.append(i) + + if 'Name:'in content[i]: + s=content[i].split(':')[1].split('/')[0] + if 'C' in s : + remove=True + else : + seq.append(s) df = pd.DataFrame(seq,columns=['sequence']) df['irt_scaled']=0 df['state'] = 'holdout' - df.to_csv('spectral_lib/df_predicted_library_oktoberfest.csv',index=False) \ No newline at end of file + df.to_csv('spectral_lib/df_predicted_library_oktoberfest.csv',index=False) + + #1787661 avec C , 15104040 sans \ No newline at end of file diff --git a/data/spectral_lib/df_predicted_library_oktoberfest.csv b/data/spectral_lib/df_predicted_library_oktoberfest.csv index 1387ae724c7d4a2bb6de0e881f852f4500e156c3..312e787c23eb764c15c2f7e3eec1afc558bc9e41 100644 Binary files a/data/spectral_lib/df_predicted_library_oktoberfest.csv and b/data/spectral_lib/df_predicted_library_oktoberfest.csv differ