diff --git a/data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx b/data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..f092ddf15aa3040458d480d46aaeba3d385f7d20
Binary files /dev/null and b/data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx differ
diff --git a/image_processing/build_dataset.py b/image_processing/build_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..27fda9745622ffae3a4ad27c272474a97d0b265e
--- /dev/null
+++ b/image_processing/build_dataset.py
@@ -0,0 +1,40 @@
+import pandas as pd
+import re
+
+
+def create_antibio_dataset(path='data/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'):
+    df = pd.read_excel(path, header=1)
+    df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
+    'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
+    'CIP (vitek)','COL (disk)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
+    'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
+    'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
+    'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LTM (disk)','LVX (disk)','LVX (vitek)','MEC (disk)',
+    'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
+    'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TEM (disk)','TEM (vitek)','TGC (disk)','TGC (vitek)',
+    'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']]
+
+    def split_before_number(s):
+        return re.split(r'(\d+)', s)
+
+
+    def create_fname(s, analyse):
+        l = split_before_number(s)
+        species = l[0]
+        nb = l[1]
+        return '{}-{}-{}_100vW_100SPD.wiff'.format(species,nb,analyse)
+
+    df['path_ana'] = df['sample_name'].map(lambda x: create_fname(x,analyse='ANA'))
+    df['path_aer'] = df['sample_name'].map(lambda x: create_fname(x, analyse='AER'))
+
+    return df
+
+
+def cut_fname(s):
+    return s.split('/')[-1]
+
+def is_file_present(path, df):
+    return path in df['path_ana'].values or path in df['path_aer'].values
+
+def is_fname_present(path, df):
+    return path in df['fname'].values
\ No newline at end of file
diff --git a/image_processing/build_image.py b/image_processing/build_image.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2f03475636a9fac2e12d1235f63478c75703442
--- /dev/null
+++ b/image_processing/build_image.py
@@ -0,0 +1,68 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.colors as colors
+
+def plot_spectra_2d(exp, ms_level=1, marker_size=5, out_path='temp.png'):
+    exp.updateRanges()
+    for spec in exp:
+        if spec.getMSLevel() == ms_level:
+            mz, intensity = spec.get_peaks()
+            p = intensity.argsort()  # sort by intensity to plot highest on top
+            rt = np.full([mz.shape[0]], spec.getRT(), float)
+            plt.scatter(
+                rt,
+                mz[p],
+                c=intensity[p],
+                cmap="afmhot_r",
+                s=marker_size,
+                norm=colors.LogNorm(
+                    exp.getMinIntensity() + 1, exp.getMaxIntensity()
+                ),
+            )
+    plt.clim(exp.getMinIntensity() + 1, exp.getMaxIntensity())
+    plt.xlabel("time (s)")
+    plt.ylabel("m/z")
+    plt.colorbar()
+    plt.savefig(out_path)  # slow for larger data sets
+
+
+def build_image_ms1(e, bin_mz):
+    e.updateRanges()
+    id = e.getSpectra()[-1].getNativeID()
+
+    dico = dict(s.split('=', 1) for s in id.split())
+    max_cycle = int(dico['cycle'])
+    list_cycle = [[] for _ in range(max_cycle)]
+
+    for s in e:
+        if s.getMSLevel() == 1:
+            ms1_start_mz = s.getInstrumentSettings().getScanWindows()[0].begin
+            ms1_end_mz = s.getInstrumentSettings().getScanWindows()[0].end
+            break
+
+    total_ms1_mz = ms1_end_mz - ms1_start_mz
+    n_bin_ms1 = int(total_ms1_mz//bin_mz)
+    size_bin_ms1 = total_ms1_mz / n_bin_ms1
+    for spec in e:  # data structure
+        id = spec.getNativeID()
+        dico = dict(s.split('=', 1) for s in id.split())
+        if spec.getMSLevel() == 1:
+            list_cycle[int(dico['cycle']) - 1].insert(0, spec)
+
+    im = np.zeros([max_cycle, n_bin_ms1])
+
+    for c in range(max_cycle):  # Build one cycle image
+        line = np.zeros(n_bin_ms1)
+        if len(list_cycle[c]) > 0:
+            for k in range(len(list_cycle[c])):
+                ms1 = list_cycle[c][k]
+                intensity = ms1.get_peaks()[1]
+                mz = ms1.get_peaks()[0]
+                id = ms1.getNativeID()
+                dico = dict(s.split('=', 1) for s in id.split())
+                for i in range(ms1.size()):
+                    line[int((mz[i] - ms1_start_mz) // size_bin_ms1)] += intensity[i]
+
+        im[c, :] = line
+
+    return im
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de43a1ebe42a7d41074db486ee3bb8cc65368e1b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+numpy~=2.2.3
+matplotlib~=3.10.0
+pandas~=2.2.3
+pyopenms~=3.3.0
\ No newline at end of file