Skip to content
Snippets Groups Projects
Commit 11b1c39e authored by Schneider Leo's avatar Schneider Leo
Browse files

seq_length args

parent 02e48119
No related branches found
No related tags found
No related merge requests found
......@@ -165,16 +165,16 @@ def load_data(path_train, path_val, path_test, batch_size, length, pad=False, co
return train_loader, val_loader, test_loader
# irt_train = np.load('data/intensity/collision_irt_train.npy')
# seq_train = np.load('data/intensity/sequence_train.npy')
# charge_train = np.load('data/intensity/precursor_charge_train.npy')
# spectra_train = np.load('data/intensity/intensity_train.npy')
#
# irt_holdout = np.load('data/intensity/collision_irt_holdout.npy')
# seq_holdout = np.load('data/intensity/sequence_holdout.npy')
# charge_holdout = np.load('data/intensity/precursor_charge_holdout.npy')
# spectra_holdout = np.load('data/intensity/intensity_holdout.npy')
#
irt_train = np.load('data/intensity/collision_irt_train.npy')
seq_train = np.load('data/intensity/sequence_train.npy')
charge_train = np.load('data/intensity/precursor_charge_train.npy')
spectra_train = np.load('data/intensity/intensity_train.npy')
irt_holdout = np.load('data/intensity/collision_irt_holdout.npy')
seq_holdout = np.load('data/intensity/sequence_holdout.npy')
charge_holdout = np.load('data/intensity/precursor_charge_holdout.npy')
spectra_holdout = np.load('data/intensity/intensity_holdout.npy')
# dataset_train = pd.DataFrame({'Sequence':list(seq_train), 'Retention time':list(irt_train), 'Charge':list(charge_train), 'Spectra' : list(spectra_train)},index=list(range(6787933)))
# dataset_train.to_pickle('database/data_prosit_merged_train.pkl')
# dataset_test = pd.DataFrame({'Sequence':list(seq_holdout), 'Retention time':list(irt_holdout), 'Charge':list(charge_holdout), 'Spectra' : list(spectra_holdout)},index=list(range(754215)))
......
......@@ -195,9 +195,16 @@ class Intentsity_Dataset(Dataset):
return torch.tensor(self.seq[idx]), torch.tensor([self.energy[idx]]).float(), torch.tensor(
self.precursor_charge[idx]), torch.tensor(self.intensity[idx]).float()
# storage = H5ToStorage('database/traintest_hcd.hdf5')
# storage.make_npy_file('data/intensity/method_train.npy','method')
# storage.make_npy_file('data/intensity/sequence_train.npy','sequence_integer')
# storage.make_npy_file('data/intensity/intensity_train.npy', 'intensities_raw')
# storage.make_npy_file('data/intensity/collision_energy_train.npy', 'collision_energy_aligned_normed')
# storage.make_npy_file('data/intensity/precursor_charge_train.npy', 'precursor_charge_onehot')
storage = H5ToStorage('database/traintest_hcd.hdf5')
storage.make_npy_file('data/intensity/method_train.npy','method')
storage.make_npy_file('data/intensity/sequence_train.npy','sequence_integer')
storage.make_npy_file('data/intensity/intensity_train.npy', 'intensities_raw')
storage.make_npy_file('data/intensity/collision_energy_train.npy', 'collision_energy_aligned_normed')
storage.make_npy_file('data/intensity/precursor_charge_train.npy', 'precursor_charge_onehot')
storage = H5ToStorage('database/holdout_hcd.hdf5')
storage.make_npy_file('data/intensity/method_holdout.npy','method')
storage.make_npy_file('data/intensity/sequence_holdout.npy','sequence_integer')
storage.make_npy_file('data/intensity/intensity_holdout.npy', 'intensities_raw')
storage.make_npy_file('data/intensity/collision_energy_holdout.npy', 'collision_energy_aligned_normed')
storage.make_npy_file('data/intensity/precursor_charge_holdout.npy', 'precursor_charge_onehot')
......@@ -105,13 +105,45 @@ for ind in ind_dict_rt :
ind_int = [k for k, x in enumerate(seq_int) if x == seq_rt[ind]]
data_int.irt[ind_int] = data_rt.irt[ind]
np.save('data/intensity/collision_irt_train.npy',data_int.irt)
np.save('data/intensity/irt_train.npy',data_int.irt)
# indices_common = dict((k, i) for i, k in enumerate(seq_int))
# indices_common = [indices_common[x] for x in inter]
#
# data_int.irt[indices_common] = data_rt.irt[ind_dict_rt]
#
sources = ('data/intensity/sequence_holdout.npy',
'data/intensity/intensity_holdout.npy',
'data/intensity/collision_energy_holdout.npy',
'data/intensity/precursor_charge_holdout.npy')
data_rt = pd.read_csv('database/data_unique_ptms.csv')
data_rt['Sequence']=data_rt['mod_sequence']
padding(data_rt, 'Sequence', 30)
data_rt['Sequence'] = data_rt['Sequence'].map(alphabetical_to_numerical)
data_rt =data_rt.drop(columns='mod_sequence')
data_int = load_intensity_df_from_files(sources[0], sources[1], sources[2], sources[3])
seq_rt = data_rt.Sequence
seq_int = data_int.seq
seq_rt = seq_rt.tolist()
seq_int = seq_int.tolist()
seq_rt = [tuple(l) for l in seq_rt]
seq_int = [tuple(l) for l in seq_int]
ind_dict_rt = dict((k, i) for i, k in enumerate(seq_rt))
inter = set(ind_dict_rt).intersection(seq_int)
ind_dict_rt = [ind_dict_rt[x] for x in inter]
data_int.irt = np.zeros(data_int.energy.shape)
i=0
for ind in ind_dict_rt :
print(i,'/',len(ind_dict_rt))
i+=1
ind_int = [k for k, x in enumerate(seq_int) if x == seq_rt[ind]]
data_int.irt[ind_int] = data_rt.irt[ind]
np.save('data/intensity/irt_holdout.npy',data_int.irt)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment