Skip to content
Snippets Groups Projects
Commit 58509e21 authored by Schneider Leo's avatar Schneider Leo
Browse files

datasets

parent 2cb6249b
No related branches found
No related tags found
No related merge requests found
......@@ -4,6 +4,7 @@ import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from PIL import Image
import re
def plot_spectra_2d(exp, ms_level=1, marker_size=5):
exp.updateRanges()
......@@ -282,7 +283,34 @@ def check_energy(im):
def create_antibio_dataset(path='data/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'):
df = pd.read_excel(path, header=1)
#sample_name : lien avec le fichier brut
df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
'CIP (vitek)','COL (disk)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LTM (disk)','LVX (disk)','LVX (vitek)','MEC (disk)',
'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TEM (disk)','TEM (vitek)','TGC (disk)','TGC (vitek)',
'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']]
def split_before_number(s):
return re.split(r'(\d+)', s)
def create_fname(s, analyse):
l = split_before_number(s)
species = l[0]
nb = l[1]
return '{}-{}-{}_100vW_100SPD'.format(species,nb,analyse)
df['path_ana'] = df['sample_name'].map(lambda x: create_fname(x,analyse='ANA'))
df['path_aer'] = df['sample_name'].map(lambda x: create_fname(x, analyse='AER'))
return df
#69 antibio + species
#sample_name : lien avec le fichier brut ESCCOL-284-AER_100vW_100SPD correspond à ESCCOL284 variant ANA pour chaque echantillon
#species : espèce => label de prédiction "simple"
#antibiogramme : disk + grand - résistant, vitek + grand + resistant, mic + grand + resistant
#AMC, AMK, AMP, AMX, ATM, CAZ, CHL, CIP, COL, CRO, CTX, CXM, CZA, CZT, ETP, FEP, FOS, FOX, GEN, IPM, LVX, MEC, MEM,
......@@ -291,7 +319,7 @@ def create_antibio_dataset(path='data/230804_strain_peptides_antibiogram_Enterob
if __name__ == "__main__":
path = 'data/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'
df = pd.read_excel(path, header=1)
df = create_antibio_dataset(path)
# e = oms.MSExperiment()
# oms.MzMLFile().load("data/STAPH140.mzML", e)
# im = build_image_frag(e, 2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment