import pyarrow.parquet as pq import matplotlib.pyplot as plt from matplotlib_venn import venn2 def load_lib(path): table = pq.read_table(path) table = table.to_pandas() return table if __name__ =='__main__': df1 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet') df2 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033499_2025_03_14.predicted.parquet') set1 = set(df1['Stripped.Sequence'].to_list()) set2 = set(df2['Stripped.Sequence'].to_list()) venn2((set1, set2), ('Group1', 'Group2')) plt.show() plt.savefig('fasta_similarity_diann.png')