Skip to content
Snippets Groups Projects
Commit 92d24083 authored by Schneider Leo's avatar Schneider Leo
Browse files

antibio resistance label extraction

parent 665863c7
No related branches found
No related tags found
No related merge requests found
No preview for this file type
import random
import numpy as np
import torch
import torchvision
......@@ -57,11 +55,9 @@ def load_data(base_dir, batch_size, shuffle=True, noise_threshold=0):
train_dataset = torchvision.datasets.ImageFolder(root=base_dir, transform=train_transform)
val_dataset = torchvision.datasets.ImageFolder(root=base_dir, transform=val_transform)
#Same seed to avoid overlap while having different transforms
seed = random.randint(0,1000)
train_dataset, _ = train_test_split(train_dataset, test_size=None, train_size=None, random_state=seed, shuffle=True,
train_dataset, _ = train_test_split(train_dataset, test_size=None, train_size=None, random_state=42, shuffle=True,
stratify=True)
_, val_dataset = train_test_split(val_dataset, test_size=None, train_size=None, random_state=seed, shuffle=True,
_, val_dataset = train_test_split(val_dataset, test_size=None, train_size=None, random_state=42, shuffle=True,
stratify=True)
data_loader_train = data.DataLoader(
......
......@@ -12,24 +12,129 @@ from build_image import build_image_ms1
find . -name '*.mzML' -exec cp -prv '{}' '/home/leo/PycharmProjects/pseudo_image/data/raw_data' ';'
copy des mzml depuis lecteur
"""
antibiotic_tests = ['AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
'CIP (vitek)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LVX (disk)','LVX (vitek)','MEC (disk)',
'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TGC (disk)','TGC (vitek)',
'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']
antibiotic_enterrobacter_breakpoints = {
'AMC (disk)': {"S":14, "R":14 },
'AMK (disk)': {"S":18, "R":18 },
'AMK (mic)': {"S":8, "R":8 },
'AMK (vitek)': {"S":8, "R":8 },
'AMP (vitek)': {"S":8, "R":8 },
'AMX (disk)': {"S":14, "R":14 },
'AMX (vitek)': {"S":8, "R":8 },
'ATM (disk)': {"S":26, "R":21 },
'ATM (vitek)': {"S":1, "R":4 },
'CAZ (disk)': {"S":22, "R":22 },
'CAZ (mic)': {"S":1, "R":4 },
'CAZ (vitek)': {"S":1, "R":4 },
'CHL (vitek)': {"S":16, "R":16 },
'CIP (disk)': {"S":25, "R":22 },
'CIP (vitek)': {"S":0.25, "R":0.5 },
'COL (disk)': {"S":None, "R":None }, # : https://academic-oup-com.docelec.univ-lyon1.fr/cid/article/71/9/e523/5735218?login=true&token=eyJhbGciOiJub25lIn0.eyJleHAiOjE3NDU2NjA0NTgsImp0aSI6IjcxYzJmOWI1LTlhMWYtNGRiMy1iYmE0LTA0MGRlMTU3NjdmZSJ9.
#deleted since method is not accurate (DO NOT USE IT)
'COL (mic)': {"S":2, "R":2 },
'CRO (mic)': {"S":1, "R":2 },
'CRO (vitek)': {"S":1, "R":2 },
'CTX (disk)': {"S":20, "R":17 },
'CTX (mic)': {"S":1, "R":2 },
'CTX (vitek)': {"S":1, "R":2 },
'CXM (vitek)': {"S":0.001, "R":8 },
'CZA (disk)': {"S":13, "R":13 },
'CZA (vitek)': {"S":8, "R":8 },
'CZT (disk)': {"S":22, "R":22 },
'CZT (vitek)': {"S":2, "R":2 },
'ETP (disk)': {"S":23, "R":23 },
'ETP (mic)': {"S":0.5, "R":0.5 },
'ETP (vitek)': {"S":0.5, "R":0.5 },
'FEP (disk)': {"S":27, "R":24 },
'FEP (mic)': {"S":1, "R":4 },
'FEP (vitek)': {"S":1, "R":4 },
'FOS (disk)': {"S":24, "R":24 },#pas clair ?
'FOX (disk)': {"S":19, "R":19 },#screen only ?
'FOX (vitek)': {"S":8, "R":8 },#screen only ? high sensitivity but poor specificity for identification of AmpC-producing Enterobacterales
'GEN (disk)': {"S":17, "R":17 },
'GEN (mic)': {"S":2, "R":2 }, #entre parenthèse
'GEN (vitek)': {"S":2, "R":2 }, #entre parenthèse cf https://www.eucast.org/eucastguidancedocuments/ ?
'IPM (disk)': {"S":22, "R":19 },
'IPM (mic)': {"S":2, "R":4 },
'IPM (vitek)': {"S":2, "R":4 },
'LTM (disk)': {"S":None, "R":None }, # Lactimidomycin ?
'LVX (disk)': {"S":23, "R":19 },
'LVX (vitek)': {"S":0.5, "R":1 },
'MEC (disk)': {"S":15, "R":15 },
'MEM (disk)': {"S":22, "R":16 },
'MEM (mic)': {"S":2, "R":8 },
'MEM (vitek)': {"S":2, "R":8 },
'NAL (vitek)': {"S":2, "R":8 }, #pas présent dans EUCAST, trouvé dans CLSI M100 (for uninary tract only)
'NET (disk)': {"S":15, "R":12 }, #insuffisant evidencence for EUCAST, found in CLSI M100
'OFX (vitek)': {"S":0.25, "R":0.5 },
'PIP (vitek)': {"S":8, "R":8 },
'PRL (disk)': {"S":20, "R":20 },
'SXT (disk)': {"S":14, "R":11 },
'SXT (vitek)': {"S":2, "R":4 },
'TCC (disk)': {"S":8, "R":16 },
'TCC (vitek)': {"S":23, "R":20 },
'TEM (disk)': {"S":None, "R":None },#Abréviation non standard
'TEM (vitek)': {"S":None, "R":None },#Abréviation non standard
'TGC (disk)': {"S":18, "R":18 }, #pour E.coli et C.koseri seulement
'TGC (vitek)': {"S":0.5, "R":0.5 },
'TIC (disk)': {"S":13, "R":20 },
'TIC (vitek)': {"S":8, "R":16 },
'TOB (disk)': {"S":16, "R":16 }, #entre parenthèse cf https://www.eucast.org/eucastguidancedocuments/ ?
'TOB (vitek)': {"S":2, "R":2 }, #entre parenthèse cf https://www.eucast.org/eucastguidancedocuments/ ?
'TZP (disk)': {"S":20, "R":20 },
'TZP (mic)': {"S":8, "R":8 },
'TZP (vitek)': {"S":8, "R":8 },
}
def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx',suffix='-d200'):
"""
Extract and organise labels from raw excel file
Extract and build file name corresponding to each sample
:param path: excel path
:return: dataframe
"""
df = pd.read_excel(path, header=1)
df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
'CIP (vitek)','COL (disk)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CIP (vitek)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LTM (disk)','LVX (disk)','LVX (vitek)','MEC (disk)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LVX (disk)','LVX (vitek)','MEC (disk)',
'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TEM (disk)','TEM (vitek)','TGC (disk)','TGC (vitek)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TGC (disk)','TGC (vitek)',
'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']]
for test in antibiotic_tests :# S - Susceptible R - Resistant U- Uncertain
#convert to string and transform >8 to 8
df[test] = df[test].map(lambda x :float(str(x).replace('>','').replace('<','')))
df[test+' cat']= 'NA'
if 'mic' in test or 'vitek' in test :
try :
df.loc[df[test] <= antibiotic_enterrobacter_breakpoints[test]['S'], test+ ' cat'] = 'S'
df.loc[df[test] >= antibiotic_enterrobacter_breakpoints[test]['R'], test + ' cat'] = 'R'
df.loc[(antibiotic_enterrobacter_breakpoints[test]['S'] < df[test]) & (df[test] < antibiotic_enterrobacter_breakpoints[test]['R']), test + ' cat'] = 'U'
except:
pass
elif 'disk' in test:
try :
df.loc[df[test] >= antibiotic_enterrobacter_breakpoints[test]['S'], test + ' cat'] = 'S'
df.loc[df[test] <= antibiotic_enterrobacter_breakpoints[test]['R'], test + ' cat'] = 'R'
df.loc[
(antibiotic_enterrobacter_breakpoints[test]['S'] > df[test]) & (df[test] > antibiotic_enterrobacter_breakpoints[test][
'R']), test + ' cat'] = 'U'
except:
pass
def split_before_number(s):
return re.split(r'(\d+)', s)
......@@ -98,5 +203,26 @@ def create_dataset():
np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat)
def extract_antio_res_labels():
"""
Extract and organise labels from raw excel file
:param
path: excel
path
:return: dataframe
"""
path = '../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'
df = pd.read_excel(path, header=1)
df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
'CIP (vitek)','COL (disk)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LTM (disk)','LVX (disk)','LVX (vitek)','MEC (disk)',
'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TEM (disk)','TEM (vitek)','TGC (disk)','TGC (vitek)',
'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']]
if __name__ =='__main__' :
create_dataset()
\ No newline at end of file
df = create_antibio_dataset()
\ No newline at end of file
......@@ -296,7 +296,7 @@ class Classification_model_duo(nn.Module):
self.predictor = nn.Linear(in_features=self.n_class*2,out_features=self.n_class)
def forward(self, input_aer, input_ana):
def forward(self, input_aer, input_ana, input_ref):
out_aer = self.im_encoder(input_aer)
out_ana = self.im_encoder(input_ana)
out = torch.concat([out_aer,out_ana],dim=1)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment