import pandas as pd from sympy import false ALPHABET_UNMOD = { "": 0, "A": 1, "C": 2, "D": 3, "E": 4, "F": 5, "G": 6, "H": 7, "I": 8, "K": 9, "L": 10, "M": 11, "N": 12, "P": 13, "Q": 14, "R": 15, "S": 16, "T": 17, "V": 18, "W": 19, "Y": 20, "M(UniMod:35)": 21, "CaC": 22 } ALPHABET_UNMOD_REV = {v: k for k, v in ALPHABET_UNMOD.items()} def numerical_to_alphabetical_str(s): seq = '' s = s.replace('[','') s = s.replace(']', '') arr = s.split(',') arr = list(map(int, arr)) for i in range(len(arr)): seq+=ALPHABET_UNMOD_REV[arr[i]] return seq if __name__ == '__main__': seq=[] file = open("spectral_lib/predicted_library.msp", "r") content=file.readlines() file.close() remove = False index_to_remove=[] updated_content=[] ind=0 predicted_lib=pd.read_csv('../output/out_lib_oktoberfest.csv') pred_rt=predicted_lib['rt pred'] for i in range(len(content)) : if remove: if 'Name:' in content[i]: remove = False else : pass if 'Name:'in content[i]: s=content[i].split(':')[1].split('/')[0] if 'C' in s : remove=True else : seq.append(s) df = pd.DataFrame(seq,columns=['sequence']) df['irt_scaled']=0 df['state'] = 'holdout' df.to_csv('spectral_lib/df_predicted_library_oktoberfest.csv',index=False) updated_content=[] ind=0 predicted_lib=pd.read_csv('../output/out_lib_oktoberfest.csv') predicted_lib['seq'] = predicted_lib['seq'].map(numerical_to_alphabetical_str) predicted_lib['sequence']=predicted_lib['seq'] pred_rt=predicted_lib['rt pred'] df_joined = pd.merge(df,predicted_lib[['rt pred','sequence']],on='sequence',how='left') #1787661 avec C , 15104040 sans