result_extraction.py

import numpy as np
import pandas as pd
from matplotlib_venn import venn2
import matplotlib.pyplot as plt

def compare_id(path_1,path_2,sample_name):
    df_1 = pd.read_csv(path_1, sep='\t', encoding='latin-1')
    df_2 = pd.read_csv(path_2, sep='\t', encoding='latin-1')
    peptides_1 = set(df_1['Stripped.Sequence'].tolist())
    protein_1 = set(df_1['Protein.Ids'].tolist())
    peptides_2 = set(df_2['Stripped.Sequence'].tolist())
    protein_2 = set(df_2['Protein.Ids'].tolist())

    venn2((peptides_1, peptides_2), ('custom lib', 'base lib'), set_colors=('g','r')) # venn2 works for two sets
    plt.title('Peptide identifications on {} sample'.format(sample_name))
    plt.savefig('venn_diag_pep_{}.png'.format(sample_name))

    plt.clf()
    venn2((protein_1, protein_2), ('custom lib', 'base lib'), set_colors=('g','r')) # venn2 works for two sets
    plt.title('Protein identifications on {} sample'.format(sample_name))
    plt.savefig('venn_diag_prot_{}.png'.format(sample_name))

compare_id('CITFRE_ANA_69/report_custom.tsv','CITFRE_ANA_69/report_first_lib.tsv',sample_name='CITFRE_ANA_69')