diff --git a/alignement.py b/alignement.py index 39892fc10f955ff126ff8bca2dbd7aa97ce82e7b..744746df1eaab9754a9b29e6573aafb3723a0461 100644 --- a/alignement.py +++ b/alignement.py @@ -146,12 +146,14 @@ def filter_cysteine(df, col): # dataset_train.to_pickle('database/data_DIA_ISA_55_train.pkl') # dataset_train.to_pickle('database/data_DIA_ISA_55_test.pkl') -data_train_1 = pd.read_csv('database/data_holdout.csv').reset_index(drop=True) -# data_train_2 = pd.read_pickle('database/data_DIA_ISA_55_test.pkl').reset_index(drop=True) -# data_ori = pd.read_csv('database/data_train.csv').reset_index(drop=True) -# data_ori['Sequence']=data_ori['sequence'] -# data_ori['Retention time']=data_ori['irt'] -# data_train = pd.concat([data_train_2,data_train_1]).reset_index(drop=True) -# data_align = align(data_train, data_ori) +data_train_1 = pd.read_pickle('database/data_DIA_ISA_55_test_30_01.pkl').reset_index(drop=True) +data_train_2 = pd.read_pickle('database/data_DIA_ISA_55_train_30_01.pkl').reset_index(drop=True) +data_ori = pd.read_csv('database/data_train.csv').reset_index(drop=True) +data_ori['Sequence']=data_ori['sequence'] +data_ori['Retention time']=data_ori['irt'] +data_train = pd.concat([data_train_2,data_train_1]).reset_index(drop=True) +data_align = align(data_train, data_ori) + +data_align.to_pickle('database/data_ISA_dual_align.pkl') -df = filter_cysteine(data_train_1,'sequence') +# df = filter_cysteine(data_train_1,'sequence') diff --git a/data_viz.py b/data_viz.py index ff1665fe1cd4c0edfdb1b5929eecdb3ce628b49f..87b641d3271d27cfe044125a74f9466b6c9e214f 100644 --- a/data_viz.py +++ b/data_viz.py @@ -226,16 +226,17 @@ def histo_length_by_error(dataframe, bins, display=False, save=False, path=None) plt.savefig(path) def compare_error(df1, df2, display=False, save=False, path=None): - size = len(df2) - ind = np.random.choice(range(size), size=10, replace=False) - seq1 = df1['seq'][ind] - seq2 = df2['seq'][ind] - data_1 = df1['abs_error'][ind] - data_2 = df2['abs_error'][ind] + df1['abs err 1'] = abs(df1['rt pred'] - df1['true rt']) + df2['abs err 2'] = abs(df2['rt pred'] - df2['true rt']) + df_group_1 = df1.groupby(['seq'])['abs err 1'].mean().to_frame().reset_index() + df_group_2 = df2.groupby(['seq'])['abs err 2'].mean().to_frame().reset_index() + df = pd.concat([df_group_1,df_group_2],axis=1) + + fig, ax = plt.subplots() + ax.scatter(df['abs err 1'], df['abs err 2'], s=0.1, alpha=0.05) + + plt.savefig('temp.png') - fig, ax = plt.subplots(figsize=(2, 1)) - ax[0, 0].bar(seq1, data_1, width=0.8) - ax[1, 0].bar(seq2, data_2, width=0.8) if display: plt.show() @@ -268,12 +269,12 @@ def add_length(dataframe): # scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_prosit_eval.png', color=True) # histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_prosit_eval.png') # -# df = pd.read_csv('output/out_common_transfereval.csv') -# add_length(df) -# df['abs_error'] = np.abs(df['rt pred']-df['true rt']) -# histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_ISA_eval.png') -# scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_ISA_eval.png', color=True) -# histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_ISA_eval.png') +df = pd.read_csv('output/out_common_prosit_ISA_eval_3.csv') +add_length(df) +df['abs_error'] = np.abs(df['rt pred']-df['true rt']) +histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_ISA_eval_3.png') +scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_ISA_eval_3.png', color=True) +histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_ISA_eval_3.png') # df = pd.read_csv('output/out_common_ISA_prosit_eval.csv') # add_length(df) diff --git a/database/data_ISA_dual_align.pkl b/database/data_ISA_dual_align.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b93184e745d30bcd8aeb109dfb4d432f7f98a5a9 Binary files /dev/null and b/database/data_ISA_dual_align.pkl differ