diff --git a/prosit_data_merge.py b/prosit_data_merge.py index d864658ab74243be7f8c41af195bdbb3e700fc82..7a612c7931579b6044fce06bbfff5c8076383588 100644 --- a/prosit_data_merge.py +++ b/prosit_data_merge.py @@ -147,13 +147,14 @@ def alphabetical_to_numerical(seq): # np.save('data/intensity/irt_holdout.npy',data_int.irt) df = pd.read_pickle('database/data_prosit_merged_holdout.pkl') - print(len(df)) -print(df.head()) -df['Retention time']=df['Retention time'].apply(lambda x : x[0]) -df = df.loc[df['Retention time']!=0] +possible_charges = [1,2,3,4] +df = df[df['Charge'].isin(possible_charges)] +print(len(df)) +df = pd.read_pickle('database/data_prosit_merged_train.pkl') +print(len(df)) +df=df.loc[df['Retention time']!=0] +df = df[df['Charge'].isin(possible_charges)] print(len(df)) -print(df.head()) -df.to_pickle('database/data_prosit_merged_holdout_2.pkl')