diff --git a/image_processing/build_image.py b/image_processing/build_image.py
index 251af24fcae03dc9b1164102a2ec4982c1d52f62..336940a4073c9aae6ae9c7f307904b0b42c753ea 100644
--- a/image_processing/build_image.py
+++ b/image_processing/build_image.py
@@ -44,6 +44,7 @@ def build_image_ms1(path, bin_mz):
             break
 
     total_ms1_mz = ms1_end_mz - ms1_start_mz
+    print('start',ms1_start_mz,'end',ms1_end_mz)
     n_bin_ms1 = int(total_ms1_mz//bin_mz)
     size_bin_ms1 = total_ms1_mz / n_bin_ms1
     for spec in e:  # data structure
diff --git a/image_ref/analyse_diann_digestion.py b/image_ref/analyse_diann_digestion.py
new file mode 100644
index 0000000000000000000000000000000000000000..94f8a4c9987161c66a8ff9087b9dbaa9a8d42df6
--- /dev/null
+++ b/image_ref/analyse_diann_digestion.py
@@ -0,0 +1,23 @@
+import pandas as pd
+import pyarrow.parquet as pq
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib_venn import venn2
+
+
+def load_lib(path):
+    table = pq.read_table(path)
+    table = table.to_pandas()
+
+    return table
+
+
+df1 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet')
+df2 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033499_2025_03_14.predicted.parquet')
+
+set1 = set(df1['Stripped.Sequence'].to_list())
+set2 = set(df2['Stripped.Sequence'].to_list())
+
+venn2((set1, set2), ('Group1', 'Group2'))
+plt.show()
+plt.savefig('fasta_similarity_diann.png')
\ No newline at end of file
diff --git a/image_ref/utils.py b/image_ref/utils.py
index e22fccbf3bb0ecfc4a94c293eda01f9d8e7b964a..13d6a69367d38196741a5c16a32b626103eb8486 100644
--- a/image_ref/utils.py
+++ b/image_ref/utils.py
@@ -2,8 +2,8 @@ import fastapy
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib_venn import venn2
-
-
+from analyse_diann_digestion import load_lib
+import matplotlib.image as mpimg
 
 
 ALPHABET_UNMOD = {
@@ -109,9 +109,9 @@ def fasta_similarity(path_fasta_1, path_fasta_2):
     list_seq_1=[]
     list_seq_2 = []
     for record in fastapy.parse(path_fasta_1):
-        list_seq_1.append(record.seq)
+        list_seq_1.extend(digest(record.seq))
     for record in fastapy.parse(path_fasta_2):
-        list_seq_2.append(record.seq)
+        list_seq_2.extend(digest(record.seq))
 
     set1 = set(list_seq_1)
     set2 = set(list_seq_2)
@@ -143,12 +143,16 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m
 
     #compute m/z ration
     mz_ratio={}
+    i=0
+    list_peptides = list(set(list_peptides))
     for seq in list_peptides:
         mz_ratio['seq']=[]
         for charge in possible_charge:
             ratio = compute_mass(seq,'avg')/charge
             if ms1_end_mz > ratio > ms1_start_mz:
                 mz_ratio['seq'].append(ratio)
+                i+=1
+    print(i)
 
     #assocy predict rt
     data=[]
@@ -167,7 +171,30 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m
 
     return im
 
+
+def build_ref_image_from_diann(path_parqet, ms1_end_mz, ms1_start_mz, bin_mz, max_cycle, rt_pred):
+
+
+    df = load_lib(path_parqet)
+    df=df[['Stripped.Sequence','Precursor.Charge','RT','Precursor.Mz']]
+    df_unique = df.drop_duplicates()
+    #build image
+    total_ms1_mz = ms1_end_mz - ms1_start_mz
+    n_bin_ms1 = int(total_ms1_mz // bin_mz)
+    im = np.zeros([max_cycle, n_bin_ms1])
+    max_rt = np.max(df_unique['RT'])
+    min_rt = np.min(df_unique['RT'])
+    total_rt = max_rt - min_rt +1e-3
+    for row in df_unique.iterrows() :
+        if 900 > int(((row[1]['Precursor.Mz']-ms1_start_mz)/total_ms1_mz)*n_bin_ms1) >= 0:
+            im[int((row[1]['RT']-min_rt)/total_rt*max_cycle),int(((row[1]['Precursor.Mz']-ms1_start_mz)/total_ms1_mz)*n_bin_ms1)]=1
+
+    return im
+
+
+
 if __name__ == '__main__':
-    # fasta_similarity('fasta/uniprotkb_proteome_UP000742934_2025_03_12.fasta','fasta/uniprotkb_proteome_UP001182277_2025_03_12.fasta')
-    # mass = build_ref_image('fasta/uniprotkb_proteome_UP000742934_2025_03_12.fasta')
-    pass
\ No newline at end of file
+    # fasta_similarity('fasta/uniparc_proteome_UP000033376_2025_03_14.fasta','fasta/uniparc_proteome_UP000033499_2025_03_14.fasta')
+    im = build_ref_image_from_diann('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet', ms1_end_mz=1250, ms1_start_mz=350, bin_mz=1, max_cycle=663, rt_pred=[])
+    plt.clf()
+    mpimg.imsave('test_img.png', im)