diff --git a/alignement.py b/alignement.py index 94b782162844e37137f52e894b3caa211b7348d8..2b88a0c905c861d30b6a4ca0dc8fa898481c48cb 100644 --- a/alignement.py +++ b/alignement.py @@ -68,6 +68,12 @@ def align(dataset, reference): dataset['Retention time'] = yout return dataset +def filter_cysteine(df, col): + def map_cys(str): + return not('C' in str) + df['cys'] = df[col].map(map_cys) + data = df[df['cys']].reset_index(drop=True) + return data # data_ori = load_data('msms/msms30_01.txt').reset_index(drop=True) # # data_ori['sequence'] = data_ori['sequence'].map(numerical_to_alphabetical) @@ -141,9 +147,11 @@ def align(dataset, reference): # dataset_train.to_pickle('database/data_DIA_ISA_55_test.pkl') data_train_1 = pd.read_pickle('database/data_DIA_ISA_55_train.pkl').reset_index(drop=True) -data_train_2 = pd.read_pickle('database/data_DIA_ISA_55_test.pkl').reset_index(drop=True) -data_ori = pd.read_csv('database/data_train.csv').reset_index(drop=True) -data_ori['Sequence']=data_ori['sequence'] -data_ori['Retention time']=data_ori['irt'] -data_train = pd.concat([data_train_2,data_train_1]).reset_index(drop=True) -data_align = align(data_train, data_ori) +# data_train_2 = pd.read_pickle('database/data_DIA_ISA_55_test.pkl').reset_index(drop=True) +# data_ori = pd.read_csv('database/data_train.csv').reset_index(drop=True) +# data_ori['Sequence']=data_ori['sequence'] +# data_ori['Retention time']=data_ori['irt'] +# data_train = pd.concat([data_train_2,data_train_1]).reset_index(drop=True) +# data_align = align(data_train, data_ori) + +df = filter_cysteine(data_train_1,'Sequence')