diff --git a/image_processing/build_dataset.py b/image_processing/build_dataset.py index 3e6155f841b4419831db3aff898885ea878ac76f..1c1b6584f4a7eb14477b6f3af77bf3fc1c5a27a7 100644 --- a/image_processing/build_dataset.py +++ b/image_processing/build_dataset.py @@ -1,3 +1,4 @@ +import argparse import glob import os import pandas as pd @@ -154,12 +155,13 @@ def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibi return df -def create_dataset(): +def create_dataset(bin_mz=1,tolerance=0.005,noise=1000,apex='apex',suffix='-d200'): """ Create images from raw .mzML files and sort it in their corresponding class directory :return: None """ - label = create_antibio_dataset(suffix='-d200') + label = create_antibio_dataset(suffix=suffix) + tolerance_str=str(tolerance)[2::] for path in glob.glob("../data/raw_data/**.wiff"): print(path) species = None @@ -173,52 +175,44 @@ def create_dataset(): name = label[label['path_aer'] == path.split("/")[-1]]['sample_name'].values[0] analyse = 'AER' if species is not None: #save image in species specific dir - directory_path_png = '../data/processed_data_wiff_clean_005_10000_apex/png_image/{}'.format(species) - directory_path_npy = '../data/processed_data_wiff_clean_005_10000_apex/npy_image/{}'.format(species) + directory_path_png = '../data/processed_data_wiff_clean_{}_{}_{}_{}/png_image/{}'.format(tolerance_str,noise,apex,bin_mz,species) + directory_path_npy = '../data/processed_data_wiff_clean_{}_{}_{}_{}/npy_image/{}'.format(tolerance_str,noise,apex,bin_mz,species) if not os.path.isdir(directory_path_png): os.makedirs(directory_path_png) if not os.path.isdir(directory_path_npy): os.makedirs(directory_path_npy) if not os.path.isfile(directory_path_png + "/" + name + '_' + analyse + '.png'): - try : - mat = build_image_ms1_wiff_charge_filtered_apex_only(path, bin_mz=1,tolerance=0.005,noise=10000) - mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat) - np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) - print('image create') - except: - print(name +' couldnt be laoded') - - #reiterate for other kind of raw file - label = create_antibio_dataset(suffix='_100vW_100SPD') - for path in glob.glob("../data/raw_data/**.wiff"): - print(path) - species = None - if path.split("/")[-1] in label['path_ana'].values: - species = label[label['path_ana'] == path.split("/")[-1]]['species'].values[0] - name = label[label['path_ana'] == path.split("/")[-1]]['sample_name'].values[0] - analyse = 'ANA' - elif path.split("/")[-1] in label['path_aer'].values: - species = label[label['path_aer'] == path.split("/")[-1]]['species'].values[0] - name = label[label['path_aer'] == path.split("/")[-1]]['sample_name'].values[0] - analyse = 'AER' - if species is not None: - directory_path_png = '../data/processed_data_wiff_clean_005_10000_apex/png_image/{}'.format(species) - directory_path_npy = '../data/processed_data_wiff_clean_005_10000_apex/npy_image/{}'.format(species) - if not os.path.isdir(directory_path_png): - os.makedirs(directory_path_png) - if not os.path.isdir(directory_path_npy): - os.makedirs(directory_path_npy) - if not os.path.isfile(directory_path_png + "/" + name + '_' + analyse + '.png'): - try : - mat = build_image_ms1_wiff_charge_filtered_apex_only(path, bin_mz=1,tolerance=0.005,noise=10000) - mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat) - np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) - print('image create') - except: - print(name +' couldnt be laoded') - + if apex == 'apex': + try : + mat = build_image_ms1_wiff_charge_filtered_apex_only(path, bin_mz=bin_mz,tolerance=tolerance,noise=noise) + mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat) + np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) + print('image create') + except: + print(name +' couldnt be loaded') + else : + try : + mat = build_image_ms1_wiff_charge_filtered(path, bin_mz=bin_mz,tolerance=tolerance,noise=noise) + mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat) + np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) + print('image create') + except: + print(name +' couldnt be loaded') + +def load_args_dataset(): + parser = argparse.ArgumentParser() + + parser.add_argument('--noise', type=float, default=1000.) + parser.add_argument('--tolerance', type=float, default=0.005) + parser.add_argument('--mz_bin', type=float, default=1.) + parser.add_argument('--apex', type=str, default='apex') + args = parser.parse_args() + + return args if __name__ =='__main__' : - create_dataset() + args = load_args_dataset() + create_dataset(noise=args.noise, tolerance=args.tolerance, bin_mz=args.mz_bin, apex=args.apex, suffix='-d200') + create_dataset(noise=args.noise, tolerance=args.tolerance, bin_mz=args.mz_bin, apex=args.apex, suffix='_100vW_100SPD') #KLEAER-12-AER-d200.wiff problème \ No newline at end of file diff --git a/image_processing/image_comparison.py b/image_processing/image_comparison.py new file mode 100644 index 0000000000000000000000000000000000000000..dc8dfb65ec726bb226723eaf4d68cfb51bbe1f78 --- /dev/null +++ b/image_processing/image_comparison.py @@ -0,0 +1,42 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib as mpl +import matplotlib.colors + +def compare_image(img_path,ref_path): + img = np.load(img_path) + img = np.log(img+1) + img = img/img.max() + + ref = np.load(ref_path) + + am1 = np.ones_like(img) # Find data to colour special + am1 = np.ma.masked_where(ref==0, am1) # Mask the data we are not colouring + + + # Colourmaps for each special colour to place. The left-hand colour (black) is + # not used because all black pixels are masked. The right-hand colour (red or + # green) is used because it represents the highest z-value of the mask matrices + cm1 = mpl.colors.ListedColormap(['black', 'green']) + cm2 = mpl.colors.ListedColormap(['black', 'red']) + + fig = plt.figure() # Make a new figure + ax = fig.add_subplot(111) # Add subplot to that figure, get ax + + # Plot the original data. We'll overlay the specially-coloured data + ax.imshow(img, aspect='auto', cmap='inferno', vmin=0, vmax=1,interpolation='nearest') + + # Plot the first mask. Values we wanted to colour (`a<0.3`) are masked, so they + # do not show up. The values that do show up are coloured using the `cm1` colour + # map. Since the range is constrained to `vmin=0, vmax=1` and a value of + # `cm2==True` corresponds to a 1, the top value of `cm1` is applied to all such + # pixels, thereby colouring them red. + ax.imshow(am1, aspect='auto', cmap=cm1, vmin=0, vmax=1,interpolation='nearest',alpha=0.4); + + plt.savefig('test.png') + +if __name__ == '__main__': + # img_path = '../data/processed_data_wiff_clean_005_10000_apex/npy_image/all data/Citrobacter portucalensis/CITPOR1_ANA.npy' + img_path = '../data/processed_data_wiff/npy_image/all data/Citrobacter portucalensis/CITPOR1_ANA.npy' + ref_path = '../image_ref/img_ref_aligned/Citrobacter portucalensis.npy' + compare_image(img_path,ref_path) \ No newline at end of file