import matplotlib.pyplot as plt import numpy as np import random import pandas as pd from mass_prediction import compute_frag_mz_ration seq = 'YEEEFLR' def data(a): b=a+a return b # int = np.random.rand(174) names = ['b1(+)', 'y1(+)', 'b1(2+)', 'y1(2+)', 'b1(3+)', 'y1(3+)','b2(+)', 'y2(+)', 'b2(2+)', 'y2(2+)', 'b2(3+)', 'y2(3+)', 'b3(+)', 'y3(+)', 'b3(2+)', 'y3(2+)', 'b3(3+)', 'y3(3+)', 'b4(+)', 'y4(+)', 'b4(2+)', 'y4(2+)', 'b4(3+)', 'y4(3+)','b5(+)', 'y5(+)', 'b5(2+)', 'y5(2+)', 'b5(3+)', 'y5(3+)','b6(+)', 'y6(+)', 'b6(2+)', 'y6(2+)', 'b6(3+)', 'y6(3+)','b7(+)', 'y7(+)', 'b7(2+)', 'y7(2+)', 'b7(3+)', 'y7(3+)','b8(+)', 'y8(+)', 'b8(2+)', 'y8(2+)', 'b8(3+)', 'y8(3+)','b9(+)', 'y9(+)', 'b9(2+)', 'y9(2+)', 'b9(3+)', 'y9(3+)','b10(+)', 'y10(+)', 'b10(2+)', 'y10(2+)', 'b10(3+)', 'y10(3+)','b11(+)', 'y11(+)', 'b11(2+)', 'y11(2+)', 'b11(3+)', 'y11(3+)', 'b12(+)', 'y12(+)', 'b12(2+)', 'y12(2+)', 'b12(3+)', 'y12(3+)', 'b13(+)', 'y13(+)', 'b13(2+)', 'y13(2+)', 'b13(3+)', 'y13(3+)','b14(+)', 'y14(+)', 'b14(2+)', 'y14(2+)', 'b14(3+)', 'y14(3+)','b15(+)', 'y15(+)', 'b15(2+)', 'y15(2+)', 'b15(3+)', 'y15(3+)', 'b16(+)', 'y16(+)', 'b16(2+)', 'y16(2+)', 'b16(3+)', 'y16(3+)', 'b17(+)', 'y17(+)', 'b17(2+)', 'y17(2+)', 'b17(3+)', 'y17(3+)','b18(+)', 'y18(+)', 'b18(2+)', 'y18(2+)', 'b18(3+)', 'y18(3+)','b19(+)', 'y19(+)', 'b19(2+)', 'y19(2+)', 'b19(3+)', 'y19(3+)','b20(+)', 'y20(+)', 'b20(2+)', 'y20(2+)', 'b20(3+)', 'y20(3+)','b21(+)', 'y21(+)', 'b21(2+)', 'y21(2+)', 'b21(3+)', 'y21(3+)', 'b22(+)', 'y22(+)', 'b22(2+)', 'y22(2+)', 'b22(3+)', 'y22(3+)','b23(+)', 'y23(+)', 'b23(2+)', 'y23(2+)', 'b23(3+)', 'y23(3+)','b24(+)', 'y24(+)', 'b24(2+)', 'y24(2+)', 'b24(3+)', 'y24(3+)','b25(+)', 'y25(+)', 'b25(2+)', 'y25(2+)', 'b25(3+)', 'y25(3+)','b26(+)', 'y26(+)', 'b26(2+)', 'y26(2+)', 'b26(3+)', 'y26(3+)', 'b27(+)', 'y27(+)', 'b27(2+)', 'y27(2+)', 'b27(3+)', 'y27(3+)','b28(+)', 'y28(+)', 'b28(2+)', 'y28(2+)', 'b28(3+)', 'y28(3+)','b29(+)', 'y29(+)', 'b29(2+)', 'y29(2+)', 'b29(3+)', 'y29(3+)'] names = np.array(names) def frag_spectra(int, seq): masses = compute_frag_mz_ration(seq,'mono') msk = [el!=-1. for el in int] # Choose some nice levels levels = int[msk] dates = masses[msk] # Create figure and plot a stem plot with the date fig, ax = plt.subplots(figsize=(8.8, 4), constrained_layout=True) ax.set(title=seq + " fragmentation spectra") ax.vlines(dates, 0, levels, color="tab:red") # The vertical stems. ax.plot(dates, np.zeros_like(dates), color="k", markerfacecolor="w") # Baseline and markers on it. # annotate lines for d, l, r in zip(dates, levels, names): ax.annotate(r, xy=(d, l), xytext=(-3, np.sign(l) * 3), textcoords="offset points", horizontalalignment="right", verticalalignment="bottom" if l > 0 else "top") plt.setp(ax.get_xticklabels(), rotation=30, ha="right") # remove y axis and spines ax.yaxis.set_visible(False) ax.spines[["left", "top", "right"]].set_visible(False) ax.margins(y=0.1) plt.show() def frag_spectra_comparison(int_1, seq_1, int_2, seq_2=None): if seq_2 is None : seq_2 = seq_1 masses_1 = compute_frag_mz_ration(seq_1,'mono') msk_1 = [el!=-1 for el in int_1] levels_1 = int_1[msk_1] dates_1 = masses_1[msk_1] names_1 = names[msk_1] masses_2 = compute_frag_mz_ration(seq_2, 'mono') msk_2 = [el != -1. for el in int_2] levels_2 = int_2[msk_2] dates_2 = masses_2[msk_2] names_2 = names[msk_2] # Create figure and plot a stem plot with the date fig, ax = plt.subplots(figsize=(8.8, 4), constrained_layout=True) ax.set(title=seq_1 + " / " +seq_2 + " fragmentation spectra comparison") ax.vlines(dates_1, 0, levels_1, color="tab:red") # The vertical stems. ax.plot(dates_1, np.zeros_like(dates_1), color="k", markerfacecolor="w") # Baseline and markers on it. # annotate lines for d, l, r in zip(dates_1, levels_1, names_1): ax.annotate(r, xy=(d, l), xytext=(-3, np.sign(l) * 3), textcoords="offset points", horizontalalignment="right", verticalalignment="bottom" if l > 0 else "top") ax.vlines(dates_2, 0, -levels_2, color="tab:blue") # The vertical stems. ax.plot(dates_2, np.zeros_like(dates_2), color="k", markerfacecolor="w") # Baseline and markers on it. # annotate lines for d, l, r in zip(dates_2, -levels_2, names_2): ax.annotate(r, xy=(d, l), xytext=(-3, np.sign(l) * 3), textcoords="offset points", horizontalalignment="right", verticalalignment="bottom" if l > 0 else "top") plt.setp(ax.get_xticklabels(), rotation=30, ha="right") # remove y axis and spines ax.yaxis.set_visible(False) ax.spines[["left", "top", "right"]].set_visible(False) ax.margins(y=0.1) plt.show() def histo_abs_error(dataframe, display=False, save=False, path=None): points = dataframe['abs_error'] ## combine these different collections into a list data_to_plot = [points] # Create a figure instance fig, ax = plt.subplots() # Create the boxplot ax.set_xlabel('abs error') ax.violinplot(data_to_plot, vert=False, side='high', showmedians=True, quantiles=[0.95]) if display : plt.show() if save : plt.savefig(path) def scatter_rt(dataframe, display=False, save=False, path=None): fig, ax = plt.subplots() ax.scatter(dataframe['true rt'], dataframe['rt pred'], s=.1) ax.set_xlabel('true RT') ax.set_ylabel('pred RT') if display : plt.show() if save : plt.savefig(path) def histo_abs_error_by_length(dataframe, display=False, save=False, path=None): data_to_plot =[] max_length = max(dataframe['length']) min_length = min(dataframe['length']) for l in range(min_length, max_length): data_to_plot.append(dataframe['abs_error'].where(dataframe['length']==l)) fig, ax = plt.subplots() # Create the boxplot bp = ax.violinplot(data_to_plot, vert=True, side='low') if display: plt.show() if save: plt.savefig(path) def running_mean(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / float(N) def histo_length_by_error(dataframe, bins, display=False, save=False, path=None): data_to_plot = [] quanti = [] max_error = max(dataframe['abs_error']) inter = np.linspace(0, max_error, num=bins+1) inter_m = running_mean(inter, 2) inter_labels = list(map(lambda x : round(x,2),inter_m)) inter_labels.insert(0,0) for i in range(bins): a = dataframe.loc[(inter[i] < dataframe['abs_error']) & (dataframe['abs_error'] < inter[i+1])]['length'] if len(a)>0: data_to_plot.append(a) quanti.append(0.95) else : data_to_plot.append([0]) quanti.append(0.95) fig, ax = plt.subplots() # Create the boxplot ax.violinplot(data_to_plot, vert=True, side='high', showmedians=True) ax.set_ylabel('length') ax.set_xticks(range(len(inter)),inter_labels) if display: plt.show() if save: plt.savefig(path) def compare_error(df1, df2, display=False, save=False, path=None): size = len(df2) ind = np.random.choice(range(size), size=10, replace=False) seq1 = df1['seq'][ind] seq2 = df2['seq'][ind] data_1 = df1['abs_error'][ind] data_2 = df2['abs_error'][ind] fig, ax = plt.subplots(figsize=(2, 1)) ax[0, 0].bar(seq1, data_1, width=0.8) ax[1, 0].bar(seq2, data_2, width=0.8) if display: plt.show() if save: plt.savefig(path) def add_length(dataframe): def fonc(a): a = a.replace('[', '') a = a.replace(']', '') a = a.split(',') a = list(map(int, a)) return np.count_nonzero(np.array(a)) dataframe['length']=dataframe['seq'].map(fonc) df = pd.read_csv('output/out_ISA_no_tape.csv') add_length(df) df['abs_error'] = np.abs(df['rt pred']-df['true rt']) histo_abs_error(df, display=False, save=True, path='temp.png') # scatter_rt(df, display=False, save=True, path='temp.png') # histo_length_by_error(df, 10, save=True, path='temp.png')