From d9cfb467e69cc2a4c79a2d9b8d2d8de3f561131a Mon Sep 17 00:00:00 2001 From: Schneider Leo <leo.schneider@etu.ec-lyon.fr> Date: Tue, 22 Oct 2024 14:11:57 +0200 Subject: [PATCH] datasets --- prosit_data_merge.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/prosit_data_merge.py b/prosit_data_merge.py index d864658..7a612c7 100644 --- a/prosit_data_merge.py +++ b/prosit_data_merge.py @@ -147,13 +147,14 @@ def alphabetical_to_numerical(seq): # np.save('data/intensity/irt_holdout.npy',data_int.irt) df = pd.read_pickle('database/data_prosit_merged_holdout.pkl') - print(len(df)) -print(df.head()) -df['Retention time']=df['Retention time'].apply(lambda x : x[0]) -df = df.loc[df['Retention time']!=0] +possible_charges = [1,2,3,4] +df = df[df['Charge'].isin(possible_charges)] +print(len(df)) +df = pd.read_pickle('database/data_prosit_merged_train.pkl') +print(len(df)) +df=df.loc[df['Retention time']!=0] +df = df[df['Charge'].isin(possible_charges)] print(len(df)) -print(df.head()) -df.to_pickle('database/data_prosit_merged_holdout_2.pkl') -- GitLab