From d9cfb467e69cc2a4c79a2d9b8d2d8de3f561131a Mon Sep 17 00:00:00 2001
From: Schneider Leo <leo.schneider@etu.ec-lyon.fr>
Date: Tue, 22 Oct 2024 14:11:57 +0200
Subject: [PATCH] datasets

---
 prosit_data_merge.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/prosit_data_merge.py b/prosit_data_merge.py
index d864658..7a612c7 100644
--- a/prosit_data_merge.py
+++ b/prosit_data_merge.py
@@ -147,13 +147,14 @@ def alphabetical_to_numerical(seq):
 # np.save('data/intensity/irt_holdout.npy',data_int.irt)
 
 df = pd.read_pickle('database/data_prosit_merged_holdout.pkl')
-
 print(len(df))
-print(df.head())
-df['Retention time']=df['Retention time'].apply(lambda x : x[0])
-df = df.loc[df['Retention time']!=0]
+possible_charges = [1,2,3,4]
+df = df[df['Charge'].isin(possible_charges)]
+print(len(df))
+df = pd.read_pickle('database/data_prosit_merged_train.pkl')
+print(len(df))
+df=df.loc[df['Retention time']!=0]
+df = df[df['Charge'].isin(possible_charges)]
 print(len(df))
-print(df.head())
 
-df.to_pickle('database/data_prosit_merged_holdout_2.pkl')
 
-- 
GitLab