diff --git a/common_dataset.py b/common_dataset.py
index b2fca949fbdde16e17c82c4ca3b6b9813cfcc37c..dffa493811b218a1d3484e1fd20866fbdbf8db74 100644
--- a/common_dataset.py
+++ b/common_dataset.py
@@ -165,16 +165,16 @@ def load_data(path_train, path_val, path_test, batch_size, length, pad=False, co
     return train_loader, val_loader, test_loader
 
 
-# irt_train = np.load('data/intensity/collision_irt_train.npy')
-# seq_train = np.load('data/intensity/sequence_train.npy')
-# charge_train = np.load('data/intensity/precursor_charge_train.npy')
-# spectra_train = np.load('data/intensity/intensity_train.npy')
-#
-# irt_holdout = np.load('data/intensity/collision_irt_holdout.npy')
-# seq_holdout = np.load('data/intensity/sequence_holdout.npy')
-# charge_holdout = np.load('data/intensity/precursor_charge_holdout.npy')
-# spectra_holdout = np.load('data/intensity/intensity_holdout.npy')
-#
+irt_train = np.load('data/intensity/collision_irt_train.npy')
+seq_train = np.load('data/intensity/sequence_train.npy')
+charge_train = np.load('data/intensity/precursor_charge_train.npy')
+spectra_train = np.load('data/intensity/intensity_train.npy')
+
+irt_holdout = np.load('data/intensity/collision_irt_holdout.npy')
+seq_holdout = np.load('data/intensity/sequence_holdout.npy')
+charge_holdout = np.load('data/intensity/precursor_charge_holdout.npy')
+spectra_holdout = np.load('data/intensity/intensity_holdout.npy')
+
 # dataset_train = pd.DataFrame({'Sequence':list(seq_train), 'Retention time':list(irt_train), 'Charge':list(charge_train), 'Spectra' : list(spectra_train)},index=list(range(6787933)))
 # dataset_train.to_pickle('database/data_prosit_merged_train.pkl')
 # dataset_test = pd.DataFrame({'Sequence':list(seq_holdout), 'Retention time':list(irt_holdout), 'Charge':list(charge_holdout), 'Spectra' : list(spectra_holdout)},index=list(range(754215)))
diff --git a/dataloader.py b/dataloader.py
index 743676ecb9535962056cc4dc5b216b6a18aa0928..b6b3f941c41c7137fb3836913cf7a74f3bd75164 100644
--- a/dataloader.py
+++ b/dataloader.py
@@ -195,9 +195,16 @@ class Intentsity_Dataset(Dataset):
         return torch.tensor(self.seq[idx]), torch.tensor([self.energy[idx]]).float(), torch.tensor(
             self.precursor_charge[idx]), torch.tensor(self.intensity[idx]).float()
 
-# storage = H5ToStorage('database/traintest_hcd.hdf5')
-# storage.make_npy_file('data/intensity/method_train.npy','method')
-# storage.make_npy_file('data/intensity/sequence_train.npy','sequence_integer')
-# storage.make_npy_file('data/intensity/intensity_train.npy', 'intensities_raw')
-# storage.make_npy_file('data/intensity/collision_energy_train.npy', 'collision_energy_aligned_normed')
-# storage.make_npy_file('data/intensity/precursor_charge_train.npy', 'precursor_charge_onehot')
+storage = H5ToStorage('database/traintest_hcd.hdf5')
+storage.make_npy_file('data/intensity/method_train.npy','method')
+storage.make_npy_file('data/intensity/sequence_train.npy','sequence_integer')
+storage.make_npy_file('data/intensity/intensity_train.npy', 'intensities_raw')
+storage.make_npy_file('data/intensity/collision_energy_train.npy', 'collision_energy_aligned_normed')
+storage.make_npy_file('data/intensity/precursor_charge_train.npy', 'precursor_charge_onehot')
+
+storage = H5ToStorage('database/holdout_hcd.hdf5')
+storage.make_npy_file('data/intensity/method_holdout.npy','method')
+storage.make_npy_file('data/intensity/sequence_holdout.npy','sequence_integer')
+storage.make_npy_file('data/intensity/intensity_holdout.npy', 'intensities_raw')
+storage.make_npy_file('data/intensity/collision_energy_holdout.npy', 'collision_energy_aligned_normed')
+storage.make_npy_file('data/intensity/precursor_charge_holdout.npy', 'precursor_charge_onehot')
diff --git a/prosit_data_merge.py b/prosit_data_merge.py
index 3c37066bb189e259f22cea14b48fdfe7befc73bf..8cc8125091127df85a6eded82d254d9124880541 100644
--- a/prosit_data_merge.py
+++ b/prosit_data_merge.py
@@ -105,13 +105,45 @@ for ind in ind_dict_rt :
     ind_int = [k for k, x in enumerate(seq_int) if x == seq_rt[ind]]
     data_int.irt[ind_int] = data_rt.irt[ind]
 
-np.save('data/intensity/collision_irt_train.npy',data_int.irt)
+np.save('data/intensity/irt_train.npy',data_int.irt)
 
-# indices_common = dict((k, i) for i, k in enumerate(seq_int))
-# indices_common = [indices_common[x] for x in inter]
-#
 
-# data_int.irt[indices_common] = data_rt.irt[ind_dict_rt]
-#
+sources = ('data/intensity/sequence_holdout.npy',
+             'data/intensity/intensity_holdout.npy',
+             'data/intensity/collision_energy_holdout.npy',
+             'data/intensity/precursor_charge_holdout.npy')
 
 
+data_rt = pd.read_csv('database/data_unique_ptms.csv')
+data_rt['Sequence']=data_rt['mod_sequence']
+
+padding(data_rt, 'Sequence', 30)
+data_rt['Sequence'] = data_rt['Sequence'].map(alphabetical_to_numerical)
+
+data_rt =data_rt.drop(columns='mod_sequence')
+
+data_int = load_intensity_df_from_files(sources[0], sources[1], sources[2], sources[3])
+
+seq_rt = data_rt.Sequence
+seq_int = data_int.seq
+seq_rt = seq_rt.tolist()
+seq_int = seq_int.tolist()
+seq_rt = [tuple(l) for l in seq_rt]
+seq_int = [tuple(l) for l in seq_int]
+
+ind_dict_rt = dict((k, i) for i, k in enumerate(seq_rt))
+inter = set(ind_dict_rt).intersection(seq_int)
+ind_dict_rt = [ind_dict_rt[x] for x in inter]
+
+
+data_int.irt = np.zeros(data_int.energy.shape)
+
+i=0
+for ind in ind_dict_rt :
+    print(i,'/',len(ind_dict_rt))
+    i+=1
+    ind_int = [k for k, x in enumerate(seq_int) if x == seq_rt[ind]]
+    data_int.irt[ind_int] = data_rt.irt[ind]
+
+np.save('data/intensity/irt_holdout.npy',data_int.irt)
+