Skip to content
Snippets Groups Projects
Commit dbabcb51 authored by Schneider Leo's avatar Schneider Leo
Browse files

filter cysteine

parent 57aee571
No related branches found
No related tags found
No related merge requests found
...@@ -68,6 +68,12 @@ def align(dataset, reference): ...@@ -68,6 +68,12 @@ def align(dataset, reference):
dataset['Retention time'] = yout dataset['Retention time'] = yout
return dataset return dataset
def filter_cysteine(df, col):
def map_cys(str):
return not('C' in str)
df['cys'] = df[col].map(map_cys)
data = df[df['cys']].reset_index(drop=True)
return data
# data_ori = load_data('msms/msms30_01.txt').reset_index(drop=True) # data_ori = load_data('msms/msms30_01.txt').reset_index(drop=True)
# # data_ori['sequence'] = data_ori['sequence'].map(numerical_to_alphabetical) # # data_ori['sequence'] = data_ori['sequence'].map(numerical_to_alphabetical)
...@@ -141,9 +147,11 @@ def align(dataset, reference): ...@@ -141,9 +147,11 @@ def align(dataset, reference):
# dataset_train.to_pickle('database/data_DIA_ISA_55_test.pkl') # dataset_train.to_pickle('database/data_DIA_ISA_55_test.pkl')
data_train_1 = pd.read_pickle('database/data_DIA_ISA_55_train.pkl').reset_index(drop=True) data_train_1 = pd.read_pickle('database/data_DIA_ISA_55_train.pkl').reset_index(drop=True)
data_train_2 = pd.read_pickle('database/data_DIA_ISA_55_test.pkl').reset_index(drop=True) # data_train_2 = pd.read_pickle('database/data_DIA_ISA_55_test.pkl').reset_index(drop=True)
data_ori = pd.read_csv('database/data_train.csv').reset_index(drop=True) # data_ori = pd.read_csv('database/data_train.csv').reset_index(drop=True)
data_ori['Sequence']=data_ori['sequence'] # data_ori['Sequence']=data_ori['sequence']
data_ori['Retention time']=data_ori['irt'] # data_ori['Retention time']=data_ori['irt']
data_train = pd.concat([data_train_2,data_train_1]).reset_index(drop=True) # data_train = pd.concat([data_train_2,data_train_1]).reset_index(drop=True)
data_align = align(data_train, data_ori) # data_align = align(data_train, data_ori)
df = filter_cysteine(data_train_1,'Sequence')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment