diff --git a/common_dataset.py b/common_dataset.py index dffa493811b218a1d3484e1fd20866fbdbf8db74..9e293f17be4dcc4e25317500ed3946982d8f0808 100644 --- a/common_dataset.py +++ b/common_dataset.py @@ -164,18 +164,18 @@ def load_data(path_train, path_val, path_test, batch_size, length, pad=False, co return train_loader, val_loader, test_loader - -irt_train = np.load('data/intensity/collision_irt_train.npy') -seq_train = np.load('data/intensity/sequence_train.npy') -charge_train = np.load('data/intensity/precursor_charge_train.npy') -spectra_train = np.load('data/intensity/intensity_train.npy') - -irt_holdout = np.load('data/intensity/collision_irt_holdout.npy') -seq_holdout = np.load('data/intensity/sequence_holdout.npy') -charge_holdout = np.load('data/intensity/precursor_charge_holdout.npy') -spectra_holdout = np.load('data/intensity/intensity_holdout.npy') - -# dataset_train = pd.DataFrame({'Sequence':list(seq_train), 'Retention time':list(irt_train), 'Charge':list(charge_train), 'Spectra' : list(spectra_train)},index=list(range(6787933))) -# dataset_train.to_pickle('database/data_prosit_merged_train.pkl') -# dataset_test = pd.DataFrame({'Sequence':list(seq_holdout), 'Retention time':list(irt_holdout), 'Charge':list(charge_holdout), 'Spectra' : list(spectra_holdout)},index=list(range(754215))) -# dataset_train.to_pickle('database/data_prosit_merged_holdout.pkl') \ No newline at end of file +if __name__ =='__main__' : + irt_train = np.load('data/intensity/collision_irt_train.npy') + seq_train = np.load('data/intensity/sequence_train.npy') + charge_train = np.load('data/intensity/precursor_charge_train.npy') + spectra_train = np.load('data/intensity/intensity_train.npy') + + irt_holdout = np.load('data/intensity/collision_irt_holdout.npy') + seq_holdout = np.load('data/intensity/sequence_holdout.npy') + charge_holdout = np.load('data/intensity/precursor_charge_holdout.npy') + spectra_holdout = np.load('data/intensity/intensity_holdout.npy') + + # dataset_train = pd.DataFrame({'Sequence':list(seq_train), 'Retention time':list(irt_train), 'Charge':list(charge_train), 'Spectra' : list(spectra_train)},index=list(range(6787933))) + # dataset_train.to_pickle('database/data_prosit_merged_train.pkl') + # dataset_test = pd.DataFrame({'Sequence':list(seq_holdout), 'Retention time':list(irt_holdout), 'Charge':list(charge_holdout), 'Spectra' : list(spectra_holdout)},index=list(range(754215))) + # dataset_train.to_pickle('database/data_prosit_merged_holdout.pkl') \ No newline at end of file