diff --git a/common_dataset.py b/common_dataset.py index 47678cf7e63ee7b4addb6088bde34a2c467fe9dd..462db86fbdbb8da4cee8e0f2e30b51f7cf4b7549 100644 --- a/common_dataset.py +++ b/common_dataset.py @@ -165,17 +165,17 @@ def load_data(path_train, path_val, path_test, batch_size, length, pad=False, co return train_loader, val_loader, test_loader if __name__ =='__main__' : - # irt_train = np.load('data/intensity/collision_irt_train.npy') - # seq_train = np.load('data/intensity/sequence_train.npy') - # charge_train = np.load('data/intensity/precursor_charge_train.npy') - # spectra_train = np.load('data/intensity/intensity_train.npy') - - irt_holdout = np.load('data/intensity/irt_holdout.npy') - seq_holdout = np.load('data/intensity/sequence_holdout.npy') - charge_holdout = np.load('data/intensity/precursor_charge_holdout.npy') - spectra_holdout = np.load('data/intensity/intensity_holdout.npy') - - # dataset_train = pd.DataFrame({'Sequence':list(seq_train), 'Retention time':list(irt_train), 'Charge':list(charge_train), 'Spectra' : list(spectra_train)},index=list(range(6787933))) - # dataset_train.to_pickle('database/data_prosit_merged_train.pkl') - dataset_test = pd.DataFrame({'Sequence':list(seq_holdout), 'Retention time':list(irt_holdout), 'Charge':list(charge_holdout), 'Spectra' : list(spectra_holdout)},index=list(range(754215))) - dataset_test.to_pickle('database/data_prosit_merged_holdout.pkl') \ No newline at end of file + irt_train = np.load('data/intensity/irt_train.npy') + seq_train = np.load('data/intensity/sequence_train.npy') + charge_train = np.load('data/intensity/precursor_charge_train.npy') + spectra_train = np.load('data/intensity/intensity_train.npy') + # + # irt_holdout = np.load('data/intensity/irt_holdout.npy') + # seq_holdout = np.load('data/intensity/sequence_holdout.npy') + # charge_holdout = np.load('data/intensity/precursor_charge_holdout.npy') + # spectra_holdout = np.load('data/intensity/intensity_holdout.npy') + # + dataset_train = pd.DataFrame({'Sequence':list(seq_train), 'Retention time':list(irt_train), 'Charge':list(charge_train), 'Spectra' : list(spectra_train)},index=list(range(6787933))) + dataset_train.to_pickle('database/data_prosit_merged_train.pkl') + # dataset_test = pd.DataFrame({'Sequence':list(seq_holdout), 'Retention time':list(irt_holdout), 'Charge':list(charge_holdout), 'Spectra' : list(spectra_holdout)},index=list(range(754215))) + # dataset_test.to_pickle('database/data_prosit_merged_holdout.pkl') \ No newline at end of file diff --git a/data/intensity/precursor_charge_holdout.npy b/data/intensity/precursor_charge_holdout.npy index 700204b09429a036b0966ea5d2e81bca3b65bf9c..a892440611560b14e9d21db3dfc1902d347ed0d2 100644 Binary files a/data/intensity/precursor_charge_holdout.npy and b/data/intensity/precursor_charge_holdout.npy differ diff --git a/data/intensity/precursor_charge_train.npy b/data/intensity/precursor_charge_train.npy index ca6379eab213f9118944f310b8ca894920be3de5..7e6f7f7133df305f55cdf3685131c85c1cd6a2d0 100644 Binary files a/data/intensity/precursor_charge_train.npy and b/data/intensity/precursor_charge_train.npy differ diff --git a/database/data_prosit_merged_holdout.pkl b/database/data_prosit_merged_holdout.pkl index 13d7f7b79d5782143028314dbd9e97fa1eb8c732..ff450be168b63fcf4968b530f3cab6cecccc6d95 100644 Binary files a/database/data_prosit_merged_holdout.pkl and b/database/data_prosit_merged_holdout.pkl differ