Skip to content
Snippets Groups Projects
data_viz.py 7.12 KiB
Newer Older
Léo Schneider's avatar
Léo Schneider committed
import matplotlib.pyplot as plt
import numpy as np
import random
Léo Schneider's avatar
Léo Schneider committed

from sympy.utilities.misc import replace

Léo Schneider's avatar
Léo Schneider committed
from mass_prediction import compute_frag_mz_ration

seq = 'YEEEFLR'
def data(a):
    b=a+a
    return b

int = np.random.rand(174)

names = ['b1(+)', 'y1(+)', 'b1(2+)', 'y1(2+)', 'b1(3+)', 'y1(3+)','b2(+)', 'y2(+)', 'b2(2+)', 'y2(2+)', 'b2(3+)', 'y2(3+)',
         'b3(+)', 'y3(+)', 'b3(2+)', 'y3(2+)', 'b3(3+)', 'y3(3+)', 'b4(+)', 'y4(+)', 'b4(2+)', 'y4(2+)', 'b4(3+)',
         'y4(3+)','b5(+)', 'y5(+)', 'b5(2+)', 'y5(2+)', 'b5(3+)', 'y5(3+)','b6(+)', 'y6(+)', 'b6(2+)', 'y6(2+)',
         'b6(3+)', 'y6(3+)','b7(+)', 'y7(+)', 'b7(2+)', 'y7(2+)', 'b7(3+)', 'y7(3+)','b8(+)', 'y8(+)', 'b8(2+)',
         'y8(2+)', 'b8(3+)', 'y8(3+)','b9(+)', 'y9(+)', 'b9(2+)', 'y9(2+)', 'b9(3+)', 'y9(3+)','b10(+)', 'y10(+)',
         'b10(2+)', 'y10(2+)', 'b10(3+)', 'y10(3+)','b11(+)', 'y11(+)', 'b11(2+)', 'y11(2+)', 'b11(3+)', 'y11(3+)',
         'b12(+)', 'y12(+)', 'b12(2+)', 'y12(2+)', 'b12(3+)', 'y12(3+)', 'b13(+)', 'y13(+)', 'b13(2+)', 'y13(2+)',
         'b13(3+)', 'y13(3+)','b14(+)', 'y14(+)', 'b14(2+)', 'y14(2+)', 'b14(3+)', 'y14(3+)','b15(+)', 'y15(+)',
         'b15(2+)', 'y15(2+)', 'b15(3+)', 'y15(3+)', 'b16(+)', 'y16(+)', 'b16(2+)', 'y16(2+)', 'b16(3+)', 'y16(3+)',
         'b17(+)', 'y17(+)', 'b17(2+)', 'y17(2+)', 'b17(3+)', 'y17(3+)','b18(+)', 'y18(+)', 'b18(2+)', 'y18(2+)',
         'b18(3+)', 'y18(3+)','b19(+)', 'y19(+)', 'b19(2+)', 'y19(2+)', 'b19(3+)', 'y19(3+)','b20(+)', 'y20(+)',
         'b20(2+)', 'y20(2+)', 'b20(3+)', 'y20(3+)','b21(+)', 'y21(+)', 'b21(2+)', 'y21(2+)', 'b21(3+)', 'y21(3+)',
         'b22(+)', 'y22(+)', 'b22(2+)', 'y22(2+)', 'b22(3+)', 'y22(3+)','b23(+)', 'y23(+)', 'b23(2+)', 'y23(2+)',
         'b23(3+)', 'y23(3+)','b24(+)', 'y24(+)', 'b24(2+)', 'y24(2+)', 'b24(3+)', 'y24(3+)','b25(+)', 'y25(+)',
         'b25(2+)', 'y25(2+)', 'b25(3+)', 'y25(3+)','b26(+)', 'y26(+)', 'b26(2+)', 'y26(2+)', 'b26(3+)', 'y26(3+)',
         'b27(+)', 'y27(+)', 'b27(2+)', 'y27(2+)', 'b27(3+)', 'y27(3+)','b28(+)', 'y28(+)', 'b28(2+)', 'y28(2+)',
         'b28(3+)', 'y28(3+)','b29(+)', 'y29(+)', 'b29(2+)', 'y29(2+)', 'b29(3+)', 'y29(3+)']

names = np.array(names)

def frag_spectra(int, seq):
    masses = compute_frag_mz_ration(seq,'mono')
    msk = [el!=-1. for el in int]
    # Choose some nice levels
    levels = int[msk]
    dates = masses[msk]
    # Create figure and plot a stem plot with the date
    fig, ax = plt.subplots(figsize=(8.8, 4), constrained_layout=True)
    ax.set(title=seq + " fragmentation spectra")

    ax.vlines(dates, 0, levels, color="tab:red")  # The vertical stems.
    ax.plot(dates, np.zeros_like(dates),
            color="k", markerfacecolor="w")  # Baseline and markers on it.

    # annotate lines
    for d, l, r in zip(dates, levels, names):
        ax.annotate(r, xy=(d, l),
                    xytext=(-3, np.sign(l) * 3), textcoords="offset points",
                    horizontalalignment="right",
                    verticalalignment="bottom" if l > 0 else "top")


    plt.setp(ax.get_xticklabels(), rotation=30, ha="right")

    # remove y axis and spines
    ax.yaxis.set_visible(False)
    ax.spines[["left", "top", "right"]].set_visible(False)

    ax.margins(y=0.1)
    plt.show()

def frag_spectra_comparison(int_1, seq_1, int_2, seq_2=None):
    if seq_2 is None :
        seq_2 = seq_1
    masses_1 = compute_frag_mz_ration(seq_1,'mono')
    msk_1 = [el!=-1 for el in int_1]
    levels_1 = int_1[msk_1]
    dates_1 = masses_1[msk_1]
    names_1 = names[msk_1]
    masses_2 = compute_frag_mz_ration(seq_2, 'mono')
    msk_2 = [el != -1. for el in int_2]
    levels_2 = int_2[msk_2]
    dates_2 = masses_2[msk_2]
    names_2 = names[msk_2]
    # Create figure and plot a stem plot with the date
    fig, ax = plt.subplots(figsize=(8.8, 4), constrained_layout=True)
    ax.set(title=seq_1 + " / " +seq_2 + " fragmentation spectra comparison")

    ax.vlines(dates_1, 0, levels_1, color="tab:red")  # The vertical stems.
    ax.plot(dates_1, np.zeros_like(dates_1),
            color="k", markerfacecolor="w")  # Baseline and markers on it.

    # annotate lines
    for d, l, r in zip(dates_1, levels_1, names_1):
        ax.annotate(r, xy=(d, l),
                    xytext=(-3, np.sign(l) * 3), textcoords="offset points",
                    horizontalalignment="right",
                    verticalalignment="bottom" if l > 0 else "top")

    ax.vlines(dates_2, 0, -levels_2, color="tab:blue")  # The vertical stems.
    ax.plot(dates_2, np.zeros_like(dates_2),
            color="k", markerfacecolor="w")  # Baseline and markers on it.

    # annotate lines
    for d, l, r in zip(dates_2, -levels_2, names_2):
        ax.annotate(r, xy=(d, l),
                    xytext=(-3, np.sign(l) * 3), textcoords="offset points",
                    horizontalalignment="right",
                    verticalalignment="bottom" if l > 0 else "top")




    plt.setp(ax.get_xticklabels(), rotation=30, ha="right")

    # remove y axis and spines
    ax.yaxis.set_visible(False)
    ax.spines[["left", "top", "right"]].set_visible(False)

    ax.margins(y=0.1)
Léo Schneider's avatar
Léo Schneider committed
    plt.show()


def histo_abs_error(dataframe, display=False, save=False, path=None):
    points = dataframe['abs_error']

    ## combine these different collections into a list
    data_to_plot = [points]

    # Create a figure instance
    fig = plt.figure()

    # Create an axes instance
    ax = fig.add_axes([0, 0, 1, 1])

    # Create the boxplot
    bp = ax.violinplot(data_to_plot, vert=False)
    if display :
        plt.show()

    if save :
        plt.savefig(path)



def histo_abs_error_by_length(dataframe, display=False, save=False, path=None):
    data_to_plot =[]
    max_length = max(dataframe['length'])
    min_length = min(dataframe['length'])
    for l in range(min_length, max_length):
        data_to_plot.append(dataframe['abs_error'].where(dataframe['length']==l))


    # Create a figure instance
    fig = plt.figure()

    # Create an axes instance
    ax = fig.add_axes([0, 0, 1, 1])

    # Create the boxplot
    bp = ax.violinplot(data_to_plot, vert=True)
    if display:
        plt.show()

    if save:
        plt.savefig(path)

def histo_length_by_error(dataframe, bins, display=False, save=False, path=None):
    data_to_plot = []

    max_error = max(dataframe['abs_error'])
    inter = np.linspace(0, max_error, num=bins)
    for i in range(bins):
        data_to_plot.append(dataframe['length'].where(inter[i] < dataframe['abs_error'] < inter[i+1]))

    # Create a figure instance
    fig = plt.figure()

    # Create an axes instance
    ax = fig.add_axes([0, 0, 1, 1])

    # Create the boxplot
    bp = ax.violinplot(data_to_plot, vert=False)
    if display:
        plt.show()

    if save:
        plt.savefig(path)

def compare_error(df1, df2, display=False, save=False, path=None):
    size = len(df2)
    ind = np.random.choice(range(size), size=10, replace=False)
    seq1 = df1['seq'][ind]
    seq2 = df2['seq'][ind]
    data_1 = df1['abs_error'][ind]
    data_2 = df2['abs_error'][ind]

    fig, ax = plt.subplots(figsize=(2, 1))
    ax[0, 0].bar(seq1, data_1, width=0.8)
    ax[1, 0].bar(seq2, data_2, width=0.8)

    if display:
        plt.show()

    if save:
        plt.savefig(path)