diff --git a/data_exploration.py b/data_exploration.py index f74f487de2937bbc31503220c5cdb740671a29a6..09991181536e850becab4c0ae92c05fa500dd29a 100644 --- a/data_exploration.py +++ b/data_exploration.py @@ -1,6 +1,7 @@ import numpy as np import matplotlib.pyplot as plt import matplotlib +import pandas as pd matplotlib.use('agg') length = 30 @@ -284,3 +285,13 @@ def RT_distrib(Y, f_name): # RT_distrib(Y_validation,'fig/histo_RT_validation.png' ) # # +#ISA DATA + +df = pd.read_pickle('database/data_ISA_aligned_prosit.pkl') +seq = df['Sequence'].unique() +# rt = df['Retention time'] +df_mean = df.groupby(['Sequence'])['Retention time'].mean() +# feq_aa(seq, plot=False, save=True, f_name='fig/histo_aa_ISA_unique.png') +# dist_long(seq, plot=False, save=True, f_name='fig/histo_length_ISA_unique.png') + +RT_distrib(df_mean, 'fig/histo_RT_ISA_unique.png') diff --git a/data_viz.py b/data_viz.py index b655f79c63a48307ffa2a30d5decb2791c4df3c2..ff1665fe1cd4c0edfdb1b5929eecdb3ce628b49f 100644 --- a/data_viz.py +++ b/data_viz.py @@ -254,7 +254,7 @@ def add_length(dataframe): dataframe['length']=dataframe['seq'].map(fonc) -df = pd.read_csv('output/out_common_ISA_ISA_eval_2.csv') +# df = pd.read_csv('output/out_common_ISA_ISA_eval_2.csv') # add_length(df) # df['abs_error'] = np.abs(df['rt pred']-df['true rt']) # histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_ISA_eval.png') @@ -275,11 +275,11 @@ df = pd.read_csv('output/out_common_ISA_ISA_eval_2.csv') # scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_ISA_eval.png', color=True) # histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_ISA_eval.png') -# df = pd.read_csv('output/out_common_ISA_ISA_eval_3.csv') +# df = pd.read_csv('output/out_common_ISA_prosit_eval.csv') # add_length(df) # df['abs_error'] = np.abs(df['rt pred']-df['true rt']) -# histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_ISA_eval_3.png') -# scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_ISA_ISA_eval_3_file.png', color=True, col = 'file') -# histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_ISA_ISA_eval_3.png') - +# histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_prosit_eval.png') +# scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_ISA_prosit_eval.png', color=True, col = 'seq') +# histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_ISA_prosit_eval.png') +#