Skip to content
Snippets Groups Projects
Commit 27124f64 authored by Schneider Leo's avatar Schneider Leo
Browse files

add : create image from .wiff file

fix : E.COLI sample name unification
parent 2845c788
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,7 @@ import numpy as np
import matplotlib.image as mpimg
from build_image import build_image_ms1
from image_processing.build_image import build_image_ms1_wiff
"""
find . -name '*.mzML' -exec cp -prv '{}' '/home/leo/PycharmProjects/pseudo_image/data/raw_data' ';'
......@@ -98,6 +99,7 @@ antibiotic_enterrobacter_breakpoints = {
def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx',suffix='-d200'):
"""
Extract and build file name corresponding to each sample and transform antioresistance measurements to labels
:param suffix: file suffix
:param path: excel path
:return: dataframe
"""
......@@ -144,7 +146,7 @@ def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibi
l = split_before_number(s)
species = l[0]
nb = l[1]
return '{}-{}-{}{}.mzML'.format(species,nb,analyse,suffix)
return '{}-{}-{}{}.wiff'.format(species,nb,analyse,suffix)
df['path_ana'] = df['sample_name'].map(lambda x: create_fname(x,analyse='ANA'))
df['path_aer'] = df['sample_name'].map(lambda x: create_fname(x, analyse='AER'))
......@@ -158,7 +160,7 @@ def create_dataset():
:return: None
"""
label = create_antibio_dataset(suffix='-d200')
for path in glob.glob("../data/raw_data/**.mzML"):
for path in glob.glob("../data/raw_data/**.wiff"):
print(path)
species = None
#check if file exists in the label table
......@@ -171,19 +173,21 @@ def create_dataset():
name = label[label['path_aer'] == path.split("/")[-1]]['sample_name'].values[0]
analyse = 'AER'
if species is not None: #save image in species specific dir
directory_path_png = '../data/processed_data/png_image/{}'.format(species)
directory_path_npy = '../data/processed_data/npy_image/{}'.format(species)
directory_path_png = '../data/processed_data_wiff/png_image/{}'.format(species)
directory_path_npy = '../data/processed_data_wiff/npy_image/{}'.format(species)
if not os.path.isdir(directory_path_png):
os.makedirs(directory_path_png)
if not os.path.isdir(directory_path_npy):
os.makedirs(directory_path_npy)
mat = build_image_ms1(path, 1)
mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat)
np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat)
if not os.path.isfile(directory_path_png + "/" + name + '_' + analyse + '.png'):
mat = build_image_ms1_wiff(path, 1)
mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat)
np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat)
print('image create')
#reiterate for other kind of raw file
label = create_antibio_dataset(suffix='_100vW_100SPD')
for path in glob.glob("../data/raw_data/**.mzML"):
for path in glob.glob("../data/raw_data/**.wiff"):
print(path)
species = None
if path.split("/")[-1] in label['path_ana'].values:
......@@ -195,16 +199,18 @@ def create_dataset():
name = label[label['path_aer'] == path.split("/")[-1]]['sample_name'].values[0]
analyse = 'AER'
if species is not None:
directory_path_png = '../data/processed_data/png_image/{}'.format(species)
directory_path_npy = '../data/processed_data/npy_image/{}'.format(species)
directory_path_png = '../data/processed_data_wiff/png_image/{}'.format(species)
directory_path_npy = '../data/processed_data_wiff/npy_image/{}'.format(species)
if not os.path.isdir(directory_path_png):
os.makedirs(directory_path_png)
if not os.path.isdir(directory_path_npy):
os.makedirs(directory_path_npy)
mat = build_image_ms1(path, 1)
mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat)
np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat)
if not os.path.isfile(directory_path_png + "/" + name + '_' + analyse + '.png'):
mat = build_image_ms1_wiff(path, 1)
mpimg.imsave(directory_path_png + "/" + name + '_' + analyse + '.png', mat)
np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat)
print('image create')
if __name__ =='__main__' :
df = create_antibio_dataset()
\ No newline at end of file
create_dataset()
\ No newline at end of file
......@@ -2,29 +2,38 @@ import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import pyopenms as oms
from pyRawMSDataReader.pyRawMSDataReader.WiffFileReader_py import WiffFileReader
def plot_spectra_2d(exp, ms_level=1, marker_size=5, out_path='temp.png'):
exp.updateRanges()
for spec in exp:
if spec.getMSLevel() == ms_level:
mz, intensity = spec.get_peaks()
p = intensity.argsort() # sort by intensity to plot highest on top
rt = np.full([mz.shape[0]], spec.getRT(), float)
plt.scatter(
rt,
mz[p],
c=intensity[p],
cmap="afmhot_r",
s=marker_size,
norm=colors.LogNorm(
exp.getMinIntensity() + 1, exp.getMaxIntensity()
),
)
plt.clim(exp.getMinIntensity() + 1, exp.getMaxIntensity())
plt.xlabel("time (s)")
plt.ylabel("m/z")
plt.colorbar()
plt.savefig(out_path) # slow for larger data sets
def build_image_ms1_wiff(path, bin_mz):
#load raw data
rawFile = WiffFileReader(path)
max_cycle=0
for scanNumber in range (rawFile.GetLastSpectrumNumber()):
if rawFile.GetMSOrderForScanNum(scanNumber) == 1 :
ms1_start_mz = rawFile.source.ScanInfos[scanNumber].LowMz
ms1_end_mz = rawFile.source.ScanInfos[scanNumber].HighMz
max_cycle+=1
# print('start', ms1_start_mz, 'end', ms1_end_mz)
total_ms1_mz = ms1_end_mz - ms1_start_mz
n_bin_ms1 = int(total_ms1_mz // bin_mz)
size_bin_ms1 = total_ms1_mz / n_bin_ms1
im = np.zeros([max_cycle, n_bin_ms1])
cycle = 0
for scanNumber in range(rawFile.GetLastSpectrumNumber()):
if rawFile.GetMSOrderForScanNum(scanNumber) == 1:
masses, intensities = rawFile.GetCentroidMassListFromScanNum(scanNumber)
line = np.zeros(n_bin_ms1)
if len(masses) > 0:
for k in range(len(masses)):
line[int((masses[k] - ms1_start_mz) // size_bin_ms1)] += intensities[k]
im[cycle, :] = line
cycle += 1
return im
def build_image_ms1(path, bin_mz):
......
......@@ -170,9 +170,11 @@ def load_data_duo(base_dir_train, base_dir_test, batch_size, shuffle=True, noise
ref_transform = transforms.Compose(
[transforms.Resize((224, 224)),
Threshold_noise(noise_threshold),
Threshold_noise(0),
Log_normalisation(),
transforms.Normalize(0.5, 0.5)])
transforms.Normalize(0.5, 0.5)
])
print('Default val transform')
train_dataset = ImageFolderDuo(root=base_dir_train, transform=train_transform, ref_dir = ref_dir, positive_prop=positive_prop, ref_transform=ref_transform)
......
......@@ -112,4 +112,24 @@ def compute_class_activation_map():
return heatmap
if __name__ =='__main__':
compute_class_activation_map()
\ No newline at end of file
# compute_class_activation_map()
transform = transforms.Compose(
[transforms.Resize((224, 224)),
Threshold_noise(500),
Log_normalisation(),
transforms.Normalize(0.5, 0.5)])
ref_transform = transforms.Compose(
[transforms.Resize((224, 224)),
Threshold_noise(0),
Log_normalisation(),
transforms.Normalize(0.5, 0.5)
])
path_ref = '../image_ref/img_ref/Enterobacter hormaechei.npy' # negative
tensor_ref = npy_loader(path_ref)
ref_base = tensor_ref.squeeze()
ref_false = transform(tensor_ref).squeeze()
ref_true = ref_transform(tensor_ref).squeeze()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment