Skip to content
Snippets Groups Projects
Commit c766cc82 authored by Schneider Leo's avatar Schneider Leo
Browse files

fix image ref ccreation

parent ea776796
No related branches found
No related tags found
No related merge requests found
......@@ -44,6 +44,7 @@ def build_image_ms1(path, bin_mz):
break
total_ms1_mz = ms1_end_mz - ms1_start_mz
print('start',ms1_start_mz,'end',ms1_end_mz)
n_bin_ms1 = int(total_ms1_mz//bin_mz)
size_bin_ms1 = total_ms1_mz / n_bin_ms1
for spec in e: # data structure
......
import pandas as pd
import pyarrow.parquet as pq
import matplotlib.pyplot as plt
import numpy as np
from matplotlib_venn import venn2
def load_lib(path):
table = pq.read_table(path)
table = table.to_pandas()
return table
df1 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet')
df2 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033499_2025_03_14.predicted.parquet')
set1 = set(df1['Stripped.Sequence'].to_list())
set2 = set(df2['Stripped.Sequence'].to_list())
venn2((set1, set2), ('Group1', 'Group2'))
plt.show()
plt.savefig('fasta_similarity_diann.png')
\ No newline at end of file
......@@ -2,8 +2,8 @@ import fastapy
import matplotlib.pyplot as plt
import numpy as np
from matplotlib_venn import venn2
from analyse_diann_digestion import load_lib
import matplotlib.image as mpimg
ALPHABET_UNMOD = {
......@@ -109,9 +109,9 @@ def fasta_similarity(path_fasta_1, path_fasta_2):
list_seq_1=[]
list_seq_2 = []
for record in fastapy.parse(path_fasta_1):
list_seq_1.append(record.seq)
list_seq_1.extend(digest(record.seq))
for record in fastapy.parse(path_fasta_2):
list_seq_2.append(record.seq)
list_seq_2.extend(digest(record.seq))
set1 = set(list_seq_1)
set2 = set(list_seq_2)
......@@ -143,12 +143,16 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m
#compute m/z ration
mz_ratio={}
i=0
list_peptides = list(set(list_peptides))
for seq in list_peptides:
mz_ratio['seq']=[]
for charge in possible_charge:
ratio = compute_mass(seq,'avg')/charge
if ms1_end_mz > ratio > ms1_start_mz:
mz_ratio['seq'].append(ratio)
i+=1
print(i)
#assocy predict rt
data=[]
......@@ -167,7 +171,30 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m
return im
def build_ref_image_from_diann(path_parqet, ms1_end_mz, ms1_start_mz, bin_mz, max_cycle, rt_pred):
df = load_lib(path_parqet)
df=df[['Stripped.Sequence','Precursor.Charge','RT','Precursor.Mz']]
df_unique = df.drop_duplicates()
#build image
total_ms1_mz = ms1_end_mz - ms1_start_mz
n_bin_ms1 = int(total_ms1_mz // bin_mz)
im = np.zeros([max_cycle, n_bin_ms1])
max_rt = np.max(df_unique['RT'])
min_rt = np.min(df_unique['RT'])
total_rt = max_rt - min_rt +1e-3
for row in df_unique.iterrows() :
if 900 > int(((row[1]['Precursor.Mz']-ms1_start_mz)/total_ms1_mz)*n_bin_ms1) >= 0:
im[int((row[1]['RT']-min_rt)/total_rt*max_cycle),int(((row[1]['Precursor.Mz']-ms1_start_mz)/total_ms1_mz)*n_bin_ms1)]=1
return im
if __name__ == '__main__':
# fasta_similarity('fasta/uniprotkb_proteome_UP000742934_2025_03_12.fasta','fasta/uniprotkb_proteome_UP001182277_2025_03_12.fasta')
# mass = build_ref_image('fasta/uniprotkb_proteome_UP000742934_2025_03_12.fasta')
pass
\ No newline at end of file
# fasta_similarity('fasta/uniparc_proteome_UP000033376_2025_03_14.fasta','fasta/uniparc_proteome_UP000033499_2025_03_14.fasta')
im = build_ref_image_from_diann('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet', ms1_end_mz=1250, ms1_start_mz=350, bin_mz=1, max_cycle=663, rt_pred=[])
plt.clf()
mpimg.imsave('test_img.png', im)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment