From 091411baeac48acc269e28ef6ea8634afe8104ea Mon Sep 17 00:00:00 2001 From: Schneider Leo <leo.schneider@etu.ec-lyon.fr> Date: Mon, 31 Mar 2025 17:49:18 +0200 Subject: [PATCH] fix : optimize imports --- image_processing/build_dataset.py | 1 - image_processing/build_image.py | 9 +++++---- image_ref/analyse_diann_digestion.py | 2 -- image_ref/dataset_ref.py | 5 ++--- image_ref/main.py | 5 ----- 5 files changed, 7 insertions(+), 15 deletions(-) diff --git a/image_processing/build_dataset.py b/image_processing/build_dataset.py index d2820b28..e2e0e8ad 100644 --- a/image_processing/build_dataset.py +++ b/image_processing/build_dataset.py @@ -3,7 +3,6 @@ import os import pandas as pd import re import numpy as np -from PIL import Image import matplotlib.image as mpimg from build_image import build_image_ms1 diff --git a/image_processing/build_image.py b/image_processing/build_image.py index a2d21431..f390b40e 100644 --- a/image_processing/build_image.py +++ b/image_processing/build_image.py @@ -32,12 +32,13 @@ def build_image_ms1(path, bin_mz): e = oms.MSExperiment() oms.MzMLFile().load(path, e) e.updateRanges() - id = e.getSpectra()[-1].getNativeID() + id_spectra = e.getSpectra()[-1].getNativeID() - dico = dict(s.split('=', 1) for s in id.split()) + dico = dict(s.split('=', 1) for s in id_spectra.split()) max_cycle = int(dico['cycle']) list_cycle = [[] for _ in range(max_cycle)] #get ms window size from first ms1 spectra (similar for all ms1 spectra) + ms1_start_mz,ms1_end_mz=None,None for s in e: if s.getMSLevel() == 1: ms1_start_mz = s.getInstrumentSettings().getScanWindows()[0].begin @@ -50,8 +51,8 @@ def build_image_ms1(path, bin_mz): size_bin_ms1 = total_ms1_mz / n_bin_ms1 #organise sepctra by their MSlevel (only MS1 are kept) for spec in e: # data structure - id = spec.getNativeID() - dico = dict(s.split('=', 1) for s in id.split()) + id_spectra = spec.getNativeID() + dico = dict(s.split('=', 1) for s in id_spectra.split()) if spec.getMSLevel() == 1: list_cycle[int(dico['cycle']) - 1].insert(0, spec) diff --git a/image_ref/analyse_diann_digestion.py b/image_ref/analyse_diann_digestion.py index 920a5861..ddaee148 100644 --- a/image_ref/analyse_diann_digestion.py +++ b/image_ref/analyse_diann_digestion.py @@ -1,7 +1,5 @@ -import pandas as pd import pyarrow.parquet as pq import matplotlib.pyplot as plt -import numpy as np from matplotlib_venn import venn2 diff --git a/image_ref/dataset_ref.py b/image_ref/dataset_ref.py index 8d7f1fb0..be91a4cf 100644 --- a/image_ref/dataset_ref.py +++ b/image_ref/dataset_ref.py @@ -1,5 +1,4 @@ import random -from cProfile import label import numpy as np import torch @@ -9,10 +8,10 @@ import torch.utils.data as data from PIL import Image import os import os.path -from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union +from typing import Callable, cast, Dict, List, Optional, Tuple, Union from pathlib import Path from collections import OrderedDict -from sklearn.model_selection import train_test_split + IMG_EXTENSIONS = ".npy" class Threshold_noise: diff --git a/image_ref/main.py b/image_ref/main.py index 8fadee97..d65d0c8e 100644 --- a/image_ref/main.py +++ b/image_ref/main.py @@ -1,8 +1,3 @@ -#TODO REFAIRE UN DATASET https://discuss.pytorch.org/t/upload-a-customize-data-set-for-multi-regression-task/43413?u=ptrblck -"""1er methode load 1 image pour 1 ref -2eme methode : load 1 image et toutes les refs : ok pour l'instant mais a voir comment est ce que cela scale avec l'augmentation du nb de classes -3eme methods 2 datasets différents : plus efficace en stockage mais pas facil a maintenir""" - import matplotlib.pyplot as plt import numpy as np -- GitLab