Skip to content
Snippets Groups Projects
Commit 4cf3b6cc authored by Schneider Leo's avatar Schneider Leo
Browse files

image construction and label

parent c1807d54
No related branches found
No related tags found
No related merge requests found
File added
import pandas as pd
import re
def create_antibio_dataset(path='data/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'):
df = pd.read_excel(path, header=1)
df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
'CIP (vitek)','COL (disk)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LTM (disk)','LVX (disk)','LVX (vitek)','MEC (disk)',
'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TEM (disk)','TEM (vitek)','TGC (disk)','TGC (vitek)',
'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']]
def split_before_number(s):
return re.split(r'(\d+)', s)
def create_fname(s, analyse):
l = split_before_number(s)
species = l[0]
nb = l[1]
return '{}-{}-{}_100vW_100SPD.wiff'.format(species,nb,analyse)
df['path_ana'] = df['sample_name'].map(lambda x: create_fname(x,analyse='ANA'))
df['path_aer'] = df['sample_name'].map(lambda x: create_fname(x, analyse='AER'))
return df
def cut_fname(s):
return s.split('/')[-1]
def is_file_present(path, df):
return path in df['path_ana'].values or path in df['path_aer'].values
def is_fname_present(path, df):
return path in df['fname'].values
\ No newline at end of file
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
def plot_spectra_2d(exp, ms_level=1, marker_size=5, out_path='temp.png'):
exp.updateRanges()
for spec in exp:
if spec.getMSLevel() == ms_level:
mz, intensity = spec.get_peaks()
p = intensity.argsort() # sort by intensity to plot highest on top
rt = np.full([mz.shape[0]], spec.getRT(), float)
plt.scatter(
rt,
mz[p],
c=intensity[p],
cmap="afmhot_r",
s=marker_size,
norm=colors.LogNorm(
exp.getMinIntensity() + 1, exp.getMaxIntensity()
),
)
plt.clim(exp.getMinIntensity() + 1, exp.getMaxIntensity())
plt.xlabel("time (s)")
plt.ylabel("m/z")
plt.colorbar()
plt.savefig(out_path) # slow for larger data sets
def build_image_ms1(e, bin_mz):
e.updateRanges()
id = e.getSpectra()[-1].getNativeID()
dico = dict(s.split('=', 1) for s in id.split())
max_cycle = int(dico['cycle'])
list_cycle = [[] for _ in range(max_cycle)]
for s in e:
if s.getMSLevel() == 1:
ms1_start_mz = s.getInstrumentSettings().getScanWindows()[0].begin
ms1_end_mz = s.getInstrumentSettings().getScanWindows()[0].end
break
total_ms1_mz = ms1_end_mz - ms1_start_mz
n_bin_ms1 = int(total_ms1_mz//bin_mz)
size_bin_ms1 = total_ms1_mz / n_bin_ms1
for spec in e: # data structure
id = spec.getNativeID()
dico = dict(s.split('=', 1) for s in id.split())
if spec.getMSLevel() == 1:
list_cycle[int(dico['cycle']) - 1].insert(0, spec)
im = np.zeros([max_cycle, n_bin_ms1])
for c in range(max_cycle): # Build one cycle image
line = np.zeros(n_bin_ms1)
if len(list_cycle[c]) > 0:
for k in range(len(list_cycle[c])):
ms1 = list_cycle[c][k]
intensity = ms1.get_peaks()[1]
mz = ms1.get_peaks()[0]
id = ms1.getNativeID()
dico = dict(s.split('=', 1) for s in id.split())
for i in range(ms1.size()):
line[int((mz[i] - ms1_start_mz) // size_bin_ms1)] += intensity[i]
im[c, :] = line
return im
numpy~=2.2.3
matplotlib~=3.10.0
pandas~=2.2.3
pyopenms~=3.3.0
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment