diff --git a/image_processing/build_dataset.py b/image_processing/build_dataset.py index e2e0e8ad9d2d273f09825c014be7268aad2cc34b..1fa21bd2f65571c5ecfd8becc53aeee4142d36b1 100644 --- a/image_processing/build_dataset.py +++ b/image_processing/build_dataset.py @@ -6,6 +6,7 @@ import numpy as np import matplotlib.image as mpimg from build_image import build_image_ms1 +from image_processing.build_image import build_image_ms1_wiff """ find . -name '*.mzML' -exec cp -prv '{}' '/home/leo/PycharmProjects/pseudo_image/data/raw_data' ';' @@ -98,6 +99,7 @@ antibiotic_enterrobacter_breakpoints = { def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx',suffix='-d200'): """ Extract and build file name corresponding to each sample and transform antioresistance measurements to labels + :param suffix: file suffix :param path: excel path :return: dataframe """ @@ -144,7 +146,7 @@ def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibi l = split_before_number(s) species = l[0] nb = l[1] - return '{}-{}-{}{}.mzML'.format(species,nb,analyse,suffix) + return '{}-{}-{}{}.wiff'.format(species,nb,analyse,suffix) df['path_ana'] = df['sample_name'].map(lambda x: create_fname(x,analyse='ANA')) df['path_aer'] = df['sample_name'].map(lambda x: create_fname(x, analyse='AER')) @@ -158,7 +160,7 @@ def create_dataset(): :return: None """ label = create_antibio_dataset(suffix='-d200') - for path in glob.glob("../data/raw_data/**.mzML"): + for path in glob.glob("../data/raw_data/**.wiff"): print(path) species = None #check if file exists in the label table @@ -171,19 +173,21 @@ def create_dataset(): name = label[label['path_aer'] == path.split("/")[-1]]['sample_name'].values[0] analyse = 'AER' if species is not None: #save image in species specific dir - directory_path_png = '../data/processed_data/png_image/{}'.format(species) - directory_path_npy = '../data/processed_data/npy_image/{}'.format(species) + directory_path_png = '../data/processed_data_wiff/png_image/{}'.format(species) + directory_path_npy = '../data/processed_data_wiff/npy_image/{}'.format(species) if not os.path.isdir(directory_path_png): os.makedirs(directory_path_png) if not os.path.isdir(directory_path_npy): os.makedirs(directory_path_npy) - mat = build_image_ms1(path, 1) - mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat) - np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) + if not os.path.isfile(directory_path_png + "/" + name + '_' + analyse + '.png'): + mat = build_image_ms1_wiff(path, 1) + mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat) + np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) + print('image create') #reiterate for other kind of raw file label = create_antibio_dataset(suffix='_100vW_100SPD') - for path in glob.glob("../data/raw_data/**.mzML"): + for path in glob.glob("../data/raw_data/**.wiff"): print(path) species = None if path.split("/")[-1] in label['path_ana'].values: @@ -195,16 +199,18 @@ def create_dataset(): name = label[label['path_aer'] == path.split("/")[-1]]['sample_name'].values[0] analyse = 'AER' if species is not None: - directory_path_png = '../data/processed_data/png_image/{}'.format(species) - directory_path_npy = '../data/processed_data/npy_image/{}'.format(species) + directory_path_png = '../data/processed_data_wiff/png_image/{}'.format(species) + directory_path_npy = '../data/processed_data_wiff/npy_image/{}'.format(species) if not os.path.isdir(directory_path_png): os.makedirs(directory_path_png) if not os.path.isdir(directory_path_npy): os.makedirs(directory_path_npy) - mat = build_image_ms1(path, 1) - mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat) - np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) + if not os.path.isfile(directory_path_png + "/" + name + '_' + analyse + '.png'): + mat = build_image_ms1_wiff(path, 1) + mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat) + np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) + print('image create') if __name__ =='__main__' : - df = create_antibio_dataset() \ No newline at end of file + create_dataset() \ No newline at end of file diff --git a/image_processing/build_image.py b/image_processing/build_image.py index f390b40eef5d0ecd1d105aa6d76b79878b8e6766..7c4934c70d489e0bd92b87d7e5d63928f20ea594 100644 --- a/image_processing/build_image.py +++ b/image_processing/build_image.py @@ -2,29 +2,38 @@ import numpy as np import matplotlib.pyplot as plt import matplotlib.colors as colors import pyopenms as oms +from pyRawMSDataReader.pyRawMSDataReader.WiffFileReader_py import WiffFileReader -def plot_spectra_2d(exp, ms_level=1, marker_size=5, out_path='temp.png'): - exp.updateRanges() - for spec in exp: - if spec.getMSLevel() == ms_level: - mz, intensity = spec.get_peaks() - p = intensity.argsort() # sort by intensity to plot highest on top - rt = np.full([mz.shape[0]], spec.getRT(), float) - plt.scatter( - rt, - mz[p], - c=intensity[p], - cmap="afmhot_r", - s=marker_size, - norm=colors.LogNorm( - exp.getMinIntensity() + 1, exp.getMaxIntensity() - ), - ) - plt.clim(exp.getMinIntensity() + 1, exp.getMaxIntensity()) - plt.xlabel("time (s)") - plt.ylabel("m/z") - plt.colorbar() - plt.savefig(out_path) # slow for larger data sets + +def build_image_ms1_wiff(path, bin_mz): + #load raw data + rawFile = WiffFileReader(path) + max_cycle=0 + for scanNumber in range (rawFile.GetLastSpectrumNumber()): + if rawFile.GetMSOrderForScanNum(scanNumber) == 1 : + ms1_start_mz = rawFile.source.ScanInfos[scanNumber].LowMz + ms1_end_mz = rawFile.source.ScanInfos[scanNumber].HighMz + max_cycle+=1 + + # print('start', ms1_start_mz, 'end', ms1_end_mz) + total_ms1_mz = ms1_end_mz - ms1_start_mz + + n_bin_ms1 = int(total_ms1_mz // bin_mz) + size_bin_ms1 = total_ms1_mz / n_bin_ms1 + im = np.zeros([max_cycle, n_bin_ms1]) + + cycle = 0 + for scanNumber in range(rawFile.GetLastSpectrumNumber()): + if rawFile.GetMSOrderForScanNum(scanNumber) == 1: + masses, intensities = rawFile.GetCentroidMassListFromScanNum(scanNumber) + line = np.zeros(n_bin_ms1) + if len(masses) > 0: + for k in range(len(masses)): + line[int((masses[k] - ms1_start_mz) // size_bin_ms1)] += intensities[k] + im[cycle, :] = line + cycle += 1 + + return im def build_image_ms1(path, bin_mz): diff --git a/image_ref/dataset_ref.py b/image_ref/dataset_ref.py index 3472d627e5a61a4c7d5f2a7f0768486c80081375..f2bd8fcca265441697a533676454d0c2fb071592 100644 --- a/image_ref/dataset_ref.py +++ b/image_ref/dataset_ref.py @@ -170,9 +170,11 @@ def load_data_duo(base_dir_train, base_dir_test, batch_size, shuffle=True, noise ref_transform = transforms.Compose( [transforms.Resize((224, 224)), - Threshold_noise(noise_threshold), + Threshold_noise(0), Log_normalisation(), - transforms.Normalize(0.5, 0.5)]) + transforms.Normalize(0.5, 0.5) + ]) + print('Default val transform') train_dataset = ImageFolderDuo(root=base_dir_train, transform=train_transform, ref_dir = ref_dir, positive_prop=positive_prop, ref_transform=ref_transform) diff --git a/image_ref/grad_cam.py b/image_ref/grad_cam.py index 091753e669109e76ef24628d48a0d7ab0f600ac6..139bcd234d064d44ec834977b553b751176588c1 100644 --- a/image_ref/grad_cam.py +++ b/image_ref/grad_cam.py @@ -112,4 +112,24 @@ def compute_class_activation_map(): return heatmap if __name__ =='__main__': - compute_class_activation_map() \ No newline at end of file + # compute_class_activation_map() + + transform = transforms.Compose( + [transforms.Resize((224, 224)), + Threshold_noise(500), + Log_normalisation(), + transforms.Normalize(0.5, 0.5)]) + + ref_transform = transforms.Compose( + [transforms.Resize((224, 224)), + Threshold_noise(0), + Log_normalisation(), + transforms.Normalize(0.5, 0.5) + ]) + + path_ref = '../image_ref/img_ref/Enterobacter hormaechei.npy' # negative + tensor_ref = npy_loader(path_ref) + + ref_base = tensor_ref.squeeze() + ref_false = transform(tensor_ref).squeeze() + ref_true = ref_transform(tensor_ref).squeeze() \ No newline at end of file