From dbabcb515f196b60f97c7e949b4edd0c4d9235ac Mon Sep 17 00:00:00 2001 From: Schneider Leo <leo.schneider@etu.ec-lyon.fr> Date: Thu, 3 Oct 2024 13:18:29 +0200 Subject: [PATCH] filter cysteine --- alignement.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/alignement.py b/alignement.py index 94b7821..2b88a0c 100644 --- a/alignement.py +++ b/alignement.py @@ -68,6 +68,12 @@ def align(dataset, reference): dataset['Retention time'] = yout return dataset +def filter_cysteine(df, col): + def map_cys(str): + return not('C' in str) + df['cys'] = df[col].map(map_cys) + data = df[df['cys']].reset_index(drop=True) + return data # data_ori = load_data('msms/msms30_01.txt').reset_index(drop=True) # # data_ori['sequence'] = data_ori['sequence'].map(numerical_to_alphabetical) @@ -141,9 +147,11 @@ def align(dataset, reference): # dataset_train.to_pickle('database/data_DIA_ISA_55_test.pkl') data_train_1 = pd.read_pickle('database/data_DIA_ISA_55_train.pkl').reset_index(drop=True) -data_train_2 = pd.read_pickle('database/data_DIA_ISA_55_test.pkl').reset_index(drop=True) -data_ori = pd.read_csv('database/data_train.csv').reset_index(drop=True) -data_ori['Sequence']=data_ori['sequence'] -data_ori['Retention time']=data_ori['irt'] -data_train = pd.concat([data_train_2,data_train_1]).reset_index(drop=True) -data_align = align(data_train, data_ori) +# data_train_2 = pd.read_pickle('database/data_DIA_ISA_55_test.pkl').reset_index(drop=True) +# data_ori = pd.read_csv('database/data_train.csv').reset_index(drop=True) +# data_ori['Sequence']=data_ori['sequence'] +# data_ori['Retention time']=data_ori['irt'] +# data_train = pd.concat([data_train_2,data_train_1]).reset_index(drop=True) +# data_align = align(data_train, data_ori) + +df = filter_cysteine(data_train_1,'Sequence') -- GitLab