Skip to content
Snippets Groups Projects
Commit 1332cb6a authored by Schneider Leo's avatar Schneider Leo
Browse files

datasets

parent a5b96b61
No related branches found
No related tags found
No related merge requests found
...@@ -107,42 +107,48 @@ def alphabetical_to_numerical(seq): ...@@ -107,42 +107,48 @@ def alphabetical_to_numerical(seq):
# np.save('data/intensity/irt_train.npy',data_int.irt) # np.save('data/intensity/irt_train.npy',data_int.irt)
sources = ('data/intensity/sequence_holdout.npy', # sources = ('data/intensity/sequence_holdout.npy',
'data/intensity/intensity_holdout.npy', # 'data/intensity/intensity_holdout.npy',
'data/intensity/precursor_charge_holdout.npy', # 'data/intensity/precursor_charge_holdout.npy',
'data/intensity/precursor_charge_holdout.npy') # 'data/intensity/precursor_charge_holdout.npy')
#
#
data_rt = pd.read_csv('database/data_unique_ptms.csv') # data_rt = pd.read_csv('database/data_unique_ptms.csv')
data_rt['Sequence']=data_rt['mod_sequence'] # data_rt['Sequence']=data_rt['mod_sequence']
#
padding(data_rt, 'Sequence', 30) # padding(data_rt, 'Sequence', 30)
data_rt['Sequence'] = data_rt['Sequence'].map(alphabetical_to_numerical) # data_rt['Sequence'] = data_rt['Sequence'].map(alphabetical_to_numerical)
#
data_rt =data_rt.drop(columns='mod_sequence') # data_rt =data_rt.drop(columns='mod_sequence')
#
data_int = load_intensity_df_from_files(sources[0], sources[1], sources[2], sources[3]) # data_int = load_intensity_df_from_files(sources[0], sources[1], sources[2], sources[3])
#
seq_rt = data_rt.Sequence # seq_rt = data_rt.Sequence
seq_int = data_int.seq # seq_int = data_int.seq
seq_rt = seq_rt.tolist() # seq_rt = seq_rt.tolist()
seq_int = seq_int.tolist() # seq_int = seq_int.tolist()
seq_rt = [tuple(l) for l in seq_rt] # seq_rt = [tuple(l) for l in seq_rt]
seq_int = [tuple(l) for l in seq_int] # seq_int = [tuple(l) for l in seq_int]
#
ind_dict_rt = dict((k, i) for i, k in enumerate(seq_rt)) # ind_dict_rt = dict((k, i) for i, k in enumerate(seq_rt))
inter = set(ind_dict_rt).intersection(seq_int) # inter = set(ind_dict_rt).intersection(seq_int)
ind_dict_rt = [ind_dict_rt[x] for x in inter] # ind_dict_rt = [ind_dict_rt[x] for x in inter]
#
#
# data_int.irt = np.zeros(data_int.energy.shape)
#
# i=0
# for ind in ind_dict_rt :
# print(i,'/',len(ind_dict_rt))
# i+=1
# ind_int = [k for k, x in enumerate(seq_int) if x == seq_rt[ind]]
# data_int.irt[ind_int] = data_rt.irt[ind]
#
# np.save('data/intensity/irt_holdout.npy',data_int.irt)
data_int.irt = np.zeros(data_int.energy.shape) df = pd.read_pickle('database/data_prosit_merged_holdout.pkl')
i=0 df = df.head(100)
for ind in ind_dict_rt :
print(i,'/',len(ind_dict_rt))
i+=1
ind_int = [k for k, x in enumerate(seq_int) if x == seq_rt[ind]]
data_int.irt[ind_int] = data_rt.irt[ind]
np.save('data/intensity/irt_holdout.npy',data_int.irt) df.to_csv('database/data_head.csv')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment