diff --git a/image_processing/build_dataset.py b/image_processing/build_dataset.py
index d2820b2881acdf283333f9756fd71ff63478328c..e2e0e8ad9d2d273f09825c014be7268aad2cc34b 100644
--- a/image_processing/build_dataset.py
+++ b/image_processing/build_dataset.py
@@ -3,7 +3,6 @@ import os
 import pandas as pd
 import re
 import numpy as np
-from PIL import Image
 import matplotlib.image as mpimg
 
 from build_image import build_image_ms1
diff --git a/image_processing/build_image.py b/image_processing/build_image.py
index a2d2143190acb2af6d96b96e171093392840bf3f..f390b40eef5d0ecd1d105aa6d76b79878b8e6766 100644
--- a/image_processing/build_image.py
+++ b/image_processing/build_image.py
@@ -32,12 +32,13 @@ def build_image_ms1(path, bin_mz):
     e = oms.MSExperiment()
     oms.MzMLFile().load(path, e)
     e.updateRanges()
-    id = e.getSpectra()[-1].getNativeID()
+    id_spectra = e.getSpectra()[-1].getNativeID()
 
-    dico = dict(s.split('=', 1) for s in id.split())
+    dico = dict(s.split('=', 1) for s in id_spectra.split())
     max_cycle = int(dico['cycle'])
     list_cycle = [[] for _ in range(max_cycle)]
     #get ms window size from first ms1 spectra (similar for all ms1 spectra)
+    ms1_start_mz,ms1_end_mz=None,None
     for s in e:
         if s.getMSLevel() == 1:
             ms1_start_mz = s.getInstrumentSettings().getScanWindows()[0].begin
@@ -50,8 +51,8 @@ def build_image_ms1(path, bin_mz):
     size_bin_ms1 = total_ms1_mz / n_bin_ms1
     #organise sepctra by their MSlevel (only MS1 are kept)
     for spec in e:  # data structure
-        id = spec.getNativeID()
-        dico = dict(s.split('=', 1) for s in id.split())
+        id_spectra = spec.getNativeID()
+        dico = dict(s.split('=', 1) for s in id_spectra.split())
         if spec.getMSLevel() == 1:
             list_cycle[int(dico['cycle']) - 1].insert(0, spec)
 
diff --git a/image_ref/analyse_diann_digestion.py b/image_ref/analyse_diann_digestion.py
index 920a5861329c90f43355b443529a127ce4a542e3..ddaee1481e548bb7119ce521a4d3b0724609caa5 100644
--- a/image_ref/analyse_diann_digestion.py
+++ b/image_ref/analyse_diann_digestion.py
@@ -1,7 +1,5 @@
-import pandas as pd
 import pyarrow.parquet as pq
 import matplotlib.pyplot as plt
-import numpy as np
 from matplotlib_venn import venn2
 
 
diff --git a/image_ref/dataset_ref.py b/image_ref/dataset_ref.py
index 8d7f1fb0abca6ae3171cb70a42f8e2b6b8d3a866..be91a4cf616adb2a6ff7ab6621e377ddb8e1a7df 100644
--- a/image_ref/dataset_ref.py
+++ b/image_ref/dataset_ref.py
@@ -1,5 +1,4 @@
 import random
-from cProfile import label
 
 import numpy as np
 import torch
@@ -9,10 +8,10 @@ import torch.utils.data as data
 from PIL import Image
 import os
 import os.path
-from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union
+from typing import Callable, cast, Dict, List, Optional, Tuple, Union
 from pathlib import Path
 from collections import OrderedDict
-from sklearn.model_selection import train_test_split
+
 IMG_EXTENSIONS = ".npy"
 
 class Threshold_noise:
diff --git a/image_ref/main.py b/image_ref/main.py
index 8fadee97c46b88a953b88014e46d80721cd61777..d65d0c8e7f31d2259271177123f609a225b6461e 100644
--- a/image_ref/main.py
+++ b/image_ref/main.py
@@ -1,8 +1,3 @@
-#TODO REFAIRE UN DATASET https://discuss.pytorch.org/t/upload-a-customize-data-set-for-multi-regression-task/43413?u=ptrblck
-"""1er methode load 1 image pour 1 ref
-2eme methode : load 1 image et toutes les refs : ok pour l'instant mais a voir comment est ce que cela scale avec l'augmentation du nb de classes
-3eme methods 2 datasets différents : plus efficace en stockage mais pas facil a maintenir"""
-
 import matplotlib.pyplot as plt
 import numpy as np