diff --git a/dataset/dataset.py b/dataset/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0d19939d869aa1d0aad126671689fa552a3f119
--- /dev/null
+++ b/dataset/dataset.py
@@ -0,0 +1,29 @@
+import torch
+import torchvision
+
+from torch.utils.data import DataLoader
+import torchvision.transforms.functional as TF
+import random
+
+root = '../data/processed_data'
+dataset = torchvision.datasets.ImageFolder(root, transform=None)
+data_loader = DataLoader(
+    dataset,
+    batch_size=1,
+    shuffle=False,
+    num_workers=0,
+    collate_fn=None,
+    pin_memory=False,
+ )
+
+class Threshold_noise:
+    """Rotate by one of the given angles."""
+
+    def __init__(self, threshold):
+        self.threshold = threshold
+
+    def __call__(self, x):
+        angle = random.choice(self.angles)
+        return torch.max(x,0)
+
+rotation_transform = Threshold_noise(threshold=100)
\ No newline at end of file
diff --git a/image_processing/build_dataset.py b/image_processing/build_dataset.py
index 27fda9745622ffae3a4ad27c272474a97d0b265e..2a127e0841ff7fef09c60301134ff6cb753da677 100644
--- a/image_processing/build_dataset.py
+++ b/image_processing/build_dataset.py
@@ -1,8 +1,24 @@
+import glob
+import os
 import pandas as pd
 import re
+import numpy as np
+from PIL import Image
+import matplotlib.image as mpimg
 
+from build_image import build_image_ms1
 
-def create_antibio_dataset(path='data/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'):
+"""
+find . -name '*.mzML' -exec cp -prv '{}' '/home/leo/PycharmProjects/pseudo_image/data/raw_data' ';'
+copy des mzml depuis lecteur
+"""
+
+def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'):
+    """
+    Extract and organise labels from raw excel file
+    :param path: excel path
+    :return: dataframe
+    """
     df = pd.read_excel(path, header=1)
     df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
     'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
@@ -22,7 +38,7 @@ def create_antibio_dataset(path='data/230804_strain_peptides_antibiogram_Enterob
         l = split_before_number(s)
         species = l[0]
         nb = l[1]
-        return '{}-{}-{}_100vW_100SPD.wiff'.format(species,nb,analyse)
+        return '{}-{}-{}_100vW_100SPD.mzML'.format(species,nb,analyse)
 
     df['path_ana'] = df['sample_name'].map(lambda x: create_fname(x,analyse='ANA'))
     df['path_aer'] = df['sample_name'].map(lambda x: create_fname(x, analyse='AER'))
@@ -30,11 +46,32 @@ def create_antibio_dataset(path='data/230804_strain_peptides_antibiogram_Enterob
     return df
 
 
-def cut_fname(s):
-    return s.split('/')[-1]
+def create_dataset():
+    """
+    Create images from raw .mzML files and sort it in their corresponding class directory
+    :return: None
+    """
+    label = create_antibio_dataset()
+    for path in glob.glob("../data/raw_data/**.mzML"):
+        print(path)
+        species = None
+        if path.split("/")[-1] in label['path_ana'].values:
+            species = label[label['path_ana'] == path.split("/")[-1]]['species'].values[0]
+            name = label[label['path_ana'] == path.split("/")[-1]]['sample_name'].values[0]
+            analyse = 'ANA'
+        elif path.split("/")[-1] in label['path_aer'].values:
+            species = label[label['path_aer'] == path.split("/")[-1]]['species'].values[0]
+            name = label[label['path_aer'] == path.split("/")[-1]]['sample_name'].values[0]
+            analyse = 'AER'
+        if species is not None:
+            directory_path = '../data/processed_data/{}'.format(species)
+            if not os.path.isdir(directory_path):
+                os.makedirs(directory_path)
+            mat = build_image_ms1(path, 1)
+            mpimg.imsave(directory_path + "/" + name + '_' + analyse + '.png', mat)
+            np.save(directory_path + "/" + name + '_' + analyse + '.npy', mat)
+
 
-def is_file_present(path, df):
-    return path in df['path_ana'].values or path in df['path_aer'].values
 
-def is_fname_present(path, df):
-    return path in df['fname'].values
\ No newline at end of file
+if __name__ =='__main__' :
+    label = create_antibio_dataset()
\ No newline at end of file
diff --git a/image_processing/build_image.py b/image_processing/build_image.py
index b2f03475636a9fac2e12d1235f63478c75703442..251af24fcae03dc9b1164102a2ec4982c1d52f62 100644
--- a/image_processing/build_image.py
+++ b/image_processing/build_image.py
@@ -1,6 +1,7 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.colors as colors
+import pyopenms as oms
 
 def plot_spectra_2d(exp, ms_level=1, marker_size=5, out_path='temp.png'):
     exp.updateRanges()
@@ -26,7 +27,9 @@ def plot_spectra_2d(exp, ms_level=1, marker_size=5, out_path='temp.png'):
     plt.savefig(out_path)  # slow for larger data sets
 
 
-def build_image_ms1(e, bin_mz):
+def build_image_ms1(path, bin_mz):
+    e = oms.MSExperiment()
+    oms.MzMLFile().load(path, e)
     e.updateRanges()
     id = e.getSpectra()[-1].getNativeID()
 
diff --git a/requirements.txt b/requirements.txt
index de43a1ebe42a7d41074db486ee3bb8cc65368e1b..6780dc37ff73f4e4abb0c84a415145c2a67d5ac0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,8 @@
 numpy~=2.2.3
 matplotlib~=3.10.0
 pandas~=2.2.3
-pyopenms~=3.3.0
\ No newline at end of file
+pyopenms~=3.3.0
+openpyxl
+torch~=2.6.0
+torchvision~=0.21.0
+pillow~=11.1.0
\ No newline at end of file