Skip to content
Snippets Groups Projects
Commit c766cc82 authored by Schneider Leo's avatar Schneider Leo
Browse files

fix image ref ccreation

parent ea776796
No related branches found
No related tags found
No related merge requests found
...@@ -44,6 +44,7 @@ def build_image_ms1(path, bin_mz): ...@@ -44,6 +44,7 @@ def build_image_ms1(path, bin_mz):
break break
total_ms1_mz = ms1_end_mz - ms1_start_mz total_ms1_mz = ms1_end_mz - ms1_start_mz
print('start',ms1_start_mz,'end',ms1_end_mz)
n_bin_ms1 = int(total_ms1_mz//bin_mz) n_bin_ms1 = int(total_ms1_mz//bin_mz)
size_bin_ms1 = total_ms1_mz / n_bin_ms1 size_bin_ms1 = total_ms1_mz / n_bin_ms1
for spec in e: # data structure for spec in e: # data structure
......
import pandas as pd
import pyarrow.parquet as pq
import matplotlib.pyplot as plt
import numpy as np
from matplotlib_venn import venn2
def load_lib(path):
table = pq.read_table(path)
table = table.to_pandas()
return table
df1 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet')
df2 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033499_2025_03_14.predicted.parquet')
set1 = set(df1['Stripped.Sequence'].to_list())
set2 = set(df2['Stripped.Sequence'].to_list())
venn2((set1, set2), ('Group1', 'Group2'))
plt.show()
plt.savefig('fasta_similarity_diann.png')
\ No newline at end of file
...@@ -2,8 +2,8 @@ import fastapy ...@@ -2,8 +2,8 @@ import fastapy
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from matplotlib_venn import venn2 from matplotlib_venn import venn2
from analyse_diann_digestion import load_lib
import matplotlib.image as mpimg
ALPHABET_UNMOD = { ALPHABET_UNMOD = {
...@@ -109,9 +109,9 @@ def fasta_similarity(path_fasta_1, path_fasta_2): ...@@ -109,9 +109,9 @@ def fasta_similarity(path_fasta_1, path_fasta_2):
list_seq_1=[] list_seq_1=[]
list_seq_2 = [] list_seq_2 = []
for record in fastapy.parse(path_fasta_1): for record in fastapy.parse(path_fasta_1):
list_seq_1.append(record.seq) list_seq_1.extend(digest(record.seq))
for record in fastapy.parse(path_fasta_2): for record in fastapy.parse(path_fasta_2):
list_seq_2.append(record.seq) list_seq_2.extend(digest(record.seq))
set1 = set(list_seq_1) set1 = set(list_seq_1)
set2 = set(list_seq_2) set2 = set(list_seq_2)
...@@ -143,12 +143,16 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m ...@@ -143,12 +143,16 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m
#compute m/z ration #compute m/z ration
mz_ratio={} mz_ratio={}
i=0
list_peptides = list(set(list_peptides))
for seq in list_peptides: for seq in list_peptides:
mz_ratio['seq']=[] mz_ratio['seq']=[]
for charge in possible_charge: for charge in possible_charge:
ratio = compute_mass(seq,'avg')/charge ratio = compute_mass(seq,'avg')/charge
if ms1_end_mz > ratio > ms1_start_mz: if ms1_end_mz > ratio > ms1_start_mz:
mz_ratio['seq'].append(ratio) mz_ratio['seq'].append(ratio)
i+=1
print(i)
#assocy predict rt #assocy predict rt
data=[] data=[]
...@@ -167,7 +171,30 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m ...@@ -167,7 +171,30 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m
return im return im
def build_ref_image_from_diann(path_parqet, ms1_end_mz, ms1_start_mz, bin_mz, max_cycle, rt_pred):
df = load_lib(path_parqet)
df=df[['Stripped.Sequence','Precursor.Charge','RT','Precursor.Mz']]
df_unique = df.drop_duplicates()
#build image
total_ms1_mz = ms1_end_mz - ms1_start_mz
n_bin_ms1 = int(total_ms1_mz // bin_mz)
im = np.zeros([max_cycle, n_bin_ms1])
max_rt = np.max(df_unique['RT'])
min_rt = np.min(df_unique['RT'])
total_rt = max_rt - min_rt +1e-3
for row in df_unique.iterrows() :
if 900 > int(((row[1]['Precursor.Mz']-ms1_start_mz)/total_ms1_mz)*n_bin_ms1) >= 0:
im[int((row[1]['RT']-min_rt)/total_rt*max_cycle),int(((row[1]['Precursor.Mz']-ms1_start_mz)/total_ms1_mz)*n_bin_ms1)]=1
return im
if __name__ == '__main__': if __name__ == '__main__':
# fasta_similarity('fasta/uniprotkb_proteome_UP000742934_2025_03_12.fasta','fasta/uniprotkb_proteome_UP001182277_2025_03_12.fasta') # fasta_similarity('fasta/uniparc_proteome_UP000033376_2025_03_14.fasta','fasta/uniparc_proteome_UP000033499_2025_03_14.fasta')
# mass = build_ref_image('fasta/uniprotkb_proteome_UP000742934_2025_03_12.fasta') im = build_ref_image_from_diann('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet', ms1_end_mz=1250, ms1_start_mz=350, bin_mz=1, max_cycle=663, rt_pred=[])
pass plt.clf()
\ No newline at end of file mpimg.imsave('test_img.png', im)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment