diff --git a/image_processing/build_dataset.py b/image_processing/build_dataset.py index d2820b2881acdf283333f9756fd71ff63478328c..e2e0e8ad9d2d273f09825c014be7268aad2cc34b 100644 --- a/image_processing/build_dataset.py +++ b/image_processing/build_dataset.py @@ -3,7 +3,6 @@ import os import pandas as pd import re import numpy as np -from PIL import Image import matplotlib.image as mpimg from build_image import build_image_ms1 diff --git a/image_processing/build_image.py b/image_processing/build_image.py index a2d2143190acb2af6d96b96e171093392840bf3f..f390b40eef5d0ecd1d105aa6d76b79878b8e6766 100644 --- a/image_processing/build_image.py +++ b/image_processing/build_image.py @@ -32,12 +32,13 @@ def build_image_ms1(path, bin_mz): e = oms.MSExperiment() oms.MzMLFile().load(path, e) e.updateRanges() - id = e.getSpectra()[-1].getNativeID() + id_spectra = e.getSpectra()[-1].getNativeID() - dico = dict(s.split('=', 1) for s in id.split()) + dico = dict(s.split('=', 1) for s in id_spectra.split()) max_cycle = int(dico['cycle']) list_cycle = [[] for _ in range(max_cycle)] #get ms window size from first ms1 spectra (similar for all ms1 spectra) + ms1_start_mz,ms1_end_mz=None,None for s in e: if s.getMSLevel() == 1: ms1_start_mz = s.getInstrumentSettings().getScanWindows()[0].begin @@ -50,8 +51,8 @@ def build_image_ms1(path, bin_mz): size_bin_ms1 = total_ms1_mz / n_bin_ms1 #organise sepctra by their MSlevel (only MS1 are kept) for spec in e: # data structure - id = spec.getNativeID() - dico = dict(s.split('=', 1) for s in id.split()) + id_spectra = spec.getNativeID() + dico = dict(s.split('=', 1) for s in id_spectra.split()) if spec.getMSLevel() == 1: list_cycle[int(dico['cycle']) - 1].insert(0, spec) diff --git a/image_ref/analyse_diann_digestion.py b/image_ref/analyse_diann_digestion.py index 920a5861329c90f43355b443529a127ce4a542e3..ddaee1481e548bb7119ce521a4d3b0724609caa5 100644 --- a/image_ref/analyse_diann_digestion.py +++ b/image_ref/analyse_diann_digestion.py @@ -1,7 +1,5 @@ -import pandas as pd import pyarrow.parquet as pq import matplotlib.pyplot as plt -import numpy as np from matplotlib_venn import venn2 diff --git a/image_ref/dataset_ref.py b/image_ref/dataset_ref.py index 8d7f1fb0abca6ae3171cb70a42f8e2b6b8d3a866..be91a4cf616adb2a6ff7ab6621e377ddb8e1a7df 100644 --- a/image_ref/dataset_ref.py +++ b/image_ref/dataset_ref.py @@ -1,5 +1,4 @@ import random -from cProfile import label import numpy as np import torch @@ -9,10 +8,10 @@ import torch.utils.data as data from PIL import Image import os import os.path -from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union +from typing import Callable, cast, Dict, List, Optional, Tuple, Union from pathlib import Path from collections import OrderedDict -from sklearn.model_selection import train_test_split + IMG_EXTENSIONS = ".npy" class Threshold_noise: diff --git a/image_ref/main.py b/image_ref/main.py index 8fadee97c46b88a953b88014e46d80721cd61777..d65d0c8e7f31d2259271177123f609a225b6461e 100644 --- a/image_ref/main.py +++ b/image_ref/main.py @@ -1,8 +1,3 @@ -#TODO REFAIRE UN DATASET https://discuss.pytorch.org/t/upload-a-customize-data-set-for-multi-regression-task/43413?u=ptrblck -"""1er methode load 1 image pour 1 ref -2eme methode : load 1 image et toutes les refs : ok pour l'instant mais a voir comment est ce que cela scale avec l'augmentation du nb de classes -3eme methods 2 datasets différents : plus efficace en stockage mais pas facil a maintenir""" - import matplotlib.pyplot as plt import numpy as np