diff --git a/data_viz.py b/data_viz.py index d1686aeb63283fc607489cfa1090019d298a2576..52fd3095cda563f91c9a055a39ad2ef1c8c4740b 100644 --- a/data_viz.py +++ b/data_viz.py @@ -1,7 +1,9 @@ +from unittest.mock import inplace + import matplotlib.pyplot as plt import numpy as np import random - +import pandas as pd from sympy.utilities.misc import replace from mass_prediction import compute_frag_mz_ration @@ -11,7 +13,7 @@ def data(a): b=a+a return b -int = np.random.rand(174) +# int = np.random.rand(174) names = ['b1(+)', 'y1(+)', 'b1(2+)', 'y1(2+)', 'b1(3+)', 'y1(3+)','b2(+)', 'y2(+)', 'b2(2+)', 'y2(2+)', 'b2(3+)', 'y2(3+)', 'b3(+)', 'y3(+)', 'b3(2+)', 'y3(2+)', 'b3(3+)', 'y3(3+)', 'b4(+)', 'y4(+)', 'b4(2+)', 'y4(2+)', 'b4(3+)', @@ -123,19 +125,28 @@ def histo_abs_error(dataframe, display=False, save=False, path=None): data_to_plot = [points] # Create a figure instance - fig = plt.figure() - - # Create an axes instance - ax = fig.add_axes([0, 0, 1, 1]) + fig, ax = plt.subplots() # Create the boxplot - bp = ax.violinplot(data_to_plot, vert=False) + ax.set_xlabel('abs error') + ax.violinplot(data_to_plot, vert=False, side='high', showmedians=True, quantiles=[0.95]) if display : plt.show() if save : plt.savefig(path) +def scatter_rt(dataframe, display=False, save=False, path=None): + fig, ax = plt.subplots() + + ax.scatter(dataframe['true rt'], dataframe['rt pred'], s=.1) + ax.set_xlabel('true RT') + ax.set_ylabel('pred RT') + if display : + plt.show() + + if save : + plt.savefig(path) def histo_abs_error_by_length(dataframe, display=False, save=False, path=None): @@ -146,36 +157,45 @@ def histo_abs_error_by_length(dataframe, display=False, save=False, path=None): data_to_plot.append(dataframe['abs_error'].where(dataframe['length']==l)) - # Create a figure instance - fig = plt.figure() - - # Create an axes instance - ax = fig.add_axes([0, 0, 1, 1]) + fig, ax = plt.subplots() # Create the boxplot - bp = ax.violinplot(data_to_plot, vert=True) + bp = ax.violinplot(data_to_plot, vert=True, side='low') if display: plt.show() if save: plt.savefig(path) +def running_mean(x, N): + cumsum = np.cumsum(np.insert(x, 0, 0)) + return (cumsum[N:] - cumsum[:-N]) / float(N) + def histo_length_by_error(dataframe, bins, display=False, save=False, path=None): data_to_plot = [] - + quanti = [] max_error = max(dataframe['abs_error']) - inter = np.linspace(0, max_error, num=bins) + inter = np.linspace(0, max_error, num=bins+1) + inter_m = running_mean(inter, 2) + + inter_labels = list(map(lambda x : round(x,2),inter_m)) + inter_labels.insert(0,0) for i in range(bins): - data_to_plot.append(dataframe['length'].where(inter[i] < dataframe['abs_error'] < inter[i+1])) + a = dataframe.loc[(inter[i] < dataframe['abs_error']) & (dataframe['abs_error'] < inter[i+1])]['length'] + if len(a)>0: + data_to_plot.append(a) + quanti.append(0.95) + else : + data_to_plot.append([0]) + quanti.append(0.95) - # Create a figure instance - fig = plt.figure() - # Create an axes instance - ax = fig.add_axes([0, 0, 1, 1]) + fig, ax = plt.subplots() # Create the boxplot - bp = ax.violinplot(data_to_plot, vert=False) + ax.violinplot(data_to_plot, vert=True, side='high', showmedians=True) + ax.set_ylabel('length') + ax.set_xticks(range(len(inter)),inter_labels) if display: plt.show() @@ -199,3 +219,20 @@ def compare_error(df1, df2, display=False, save=False, path=None): if save: plt.savefig(path) + +def add_length(dataframe): + def fonc(a): + a = a.replace('[', '') + a = a.replace(']', '') + a = a.split(',') + a = list(map(int, a)) + return np.count_nonzero(np.array(a)) + dataframe['length']=dataframe['seq'].map(fonc) + + +df = pd.read_csv('output/out_ISA.csv') +add_length(df) +df['abs_error'] = np.abs(df['rt pred']-df['true rt']) +# histo_abs_error(df, display=False, save=True, path='temp.png') +# scatter_rt(df, display=False, save=True, path='temp.png') +histo_length_by_error(df, 10, save=True, path='temp.png') \ No newline at end of file diff --git a/output/out.csv b/output/out.csv deleted file mode 100644 index 7839a68e0ee4fbe295c5f98b25deaf8177ef8588..0000000000000000000000000000000000000000 Binary files a/output/out.csv and /dev/null differ