Skip to content
Snippets Groups Projects
Commit 92d24083 authored by Schneider Leo's avatar Schneider Leo
Browse files

antibio resistance label extraction

parent 665863c7
No related branches found
No related tags found
No related merge requests found
No preview for this file type
import random
import numpy as np import numpy as np
import torch import torch
import torchvision import torchvision
...@@ -57,11 +55,9 @@ def load_data(base_dir, batch_size, shuffle=True, noise_threshold=0): ...@@ -57,11 +55,9 @@ def load_data(base_dir, batch_size, shuffle=True, noise_threshold=0):
train_dataset = torchvision.datasets.ImageFolder(root=base_dir, transform=train_transform) train_dataset = torchvision.datasets.ImageFolder(root=base_dir, transform=train_transform)
val_dataset = torchvision.datasets.ImageFolder(root=base_dir, transform=val_transform) val_dataset = torchvision.datasets.ImageFolder(root=base_dir, transform=val_transform)
#Same seed to avoid overlap while having different transforms train_dataset, _ = train_test_split(train_dataset, test_size=None, train_size=None, random_state=42, shuffle=True,
seed = random.randint(0,1000)
train_dataset, _ = train_test_split(train_dataset, test_size=None, train_size=None, random_state=seed, shuffle=True,
stratify=True) stratify=True)
_, val_dataset = train_test_split(val_dataset, test_size=None, train_size=None, random_state=seed, shuffle=True, _, val_dataset = train_test_split(val_dataset, test_size=None, train_size=None, random_state=42, shuffle=True,
stratify=True) stratify=True)
data_loader_train = data.DataLoader( data_loader_train = data.DataLoader(
......
...@@ -12,24 +12,129 @@ from build_image import build_image_ms1 ...@@ -12,24 +12,129 @@ from build_image import build_image_ms1
find . -name '*.mzML' -exec cp -prv '{}' '/home/leo/PycharmProjects/pseudo_image/data/raw_data' ';' find . -name '*.mzML' -exec cp -prv '{}' '/home/leo/PycharmProjects/pseudo_image/data/raw_data' ';'
copy des mzml depuis lecteur copy des mzml depuis lecteur
""" """
antibiotic_tests = ['AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
'CIP (vitek)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LVX (disk)','LVX (vitek)','MEC (disk)',
'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TGC (disk)','TGC (vitek)',
'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']
antibiotic_enterrobacter_breakpoints = {
'AMC (disk)': {"S":14, "R":14 },
'AMK (disk)': {"S":18, "R":18 },
'AMK (mic)': {"S":8, "R":8 },
'AMK (vitek)': {"S":8, "R":8 },
'AMP (vitek)': {"S":8, "R":8 },
'AMX (disk)': {"S":14, "R":14 },
'AMX (vitek)': {"S":8, "R":8 },
'ATM (disk)': {"S":26, "R":21 },
'ATM (vitek)': {"S":1, "R":4 },
'CAZ (disk)': {"S":22, "R":22 },
'CAZ (mic)': {"S":1, "R":4 },
'CAZ (vitek)': {"S":1, "R":4 },
'CHL (vitek)': {"S":16, "R":16 },
'CIP (disk)': {"S":25, "R":22 },
'CIP (vitek)': {"S":0.25, "R":0.5 },
'COL (disk)': {"S":None, "R":None }, # : https://academic-oup-com.docelec.univ-lyon1.fr/cid/article/71/9/e523/5735218?login=true&token=eyJhbGciOiJub25lIn0.eyJleHAiOjE3NDU2NjA0NTgsImp0aSI6IjcxYzJmOWI1LTlhMWYtNGRiMy1iYmE0LTA0MGRlMTU3NjdmZSJ9.
#deleted since method is not accurate (DO NOT USE IT)
'COL (mic)': {"S":2, "R":2 },
'CRO (mic)': {"S":1, "R":2 },
'CRO (vitek)': {"S":1, "R":2 },
'CTX (disk)': {"S":20, "R":17 },
'CTX (mic)': {"S":1, "R":2 },
'CTX (vitek)': {"S":1, "R":2 },
'CXM (vitek)': {"S":0.001, "R":8 },
'CZA (disk)': {"S":13, "R":13 },
'CZA (vitek)': {"S":8, "R":8 },
'CZT (disk)': {"S":22, "R":22 },
'CZT (vitek)': {"S":2, "R":2 },
'ETP (disk)': {"S":23, "R":23 },
'ETP (mic)': {"S":0.5, "R":0.5 },
'ETP (vitek)': {"S":0.5, "R":0.5 },
'FEP (disk)': {"S":27, "R":24 },
'FEP (mic)': {"S":1, "R":4 },
'FEP (vitek)': {"S":1, "R":4 },
'FOS (disk)': {"S":24, "R":24 },#pas clair ?
'FOX (disk)': {"S":19, "R":19 },#screen only ?
'FOX (vitek)': {"S":8, "R":8 },#screen only ? high sensitivity but poor specificity for identification of AmpC-producing Enterobacterales
'GEN (disk)': {"S":17, "R":17 },
'GEN (mic)': {"S":2, "R":2 }, #entre parenthèse
'GEN (vitek)': {"S":2, "R":2 }, #entre parenthèse cf https://www.eucast.org/eucastguidancedocuments/ ?
'IPM (disk)': {"S":22, "R":19 },
'IPM (mic)': {"S":2, "R":4 },
'IPM (vitek)': {"S":2, "R":4 },
'LTM (disk)': {"S":None, "R":None }, # Lactimidomycin ?
'LVX (disk)': {"S":23, "R":19 },
'LVX (vitek)': {"S":0.5, "R":1 },
'MEC (disk)': {"S":15, "R":15 },
'MEM (disk)': {"S":22, "R":16 },
'MEM (mic)': {"S":2, "R":8 },
'MEM (vitek)': {"S":2, "R":8 },
'NAL (vitek)': {"S":2, "R":8 }, #pas présent dans EUCAST, trouvé dans CLSI M100 (for uninary tract only)
'NET (disk)': {"S":15, "R":12 }, #insuffisant evidencence for EUCAST, found in CLSI M100
'OFX (vitek)': {"S":0.25, "R":0.5 },
'PIP (vitek)': {"S":8, "R":8 },
'PRL (disk)': {"S":20, "R":20 },
'SXT (disk)': {"S":14, "R":11 },
'SXT (vitek)': {"S":2, "R":4 },
'TCC (disk)': {"S":8, "R":16 },
'TCC (vitek)': {"S":23, "R":20 },
'TEM (disk)': {"S":None, "R":None },#Abréviation non standard
'TEM (vitek)': {"S":None, "R":None },#Abréviation non standard
'TGC (disk)': {"S":18, "R":18 }, #pour E.coli et C.koseri seulement
'TGC (vitek)': {"S":0.5, "R":0.5 },
'TIC (disk)': {"S":13, "R":20 },
'TIC (vitek)': {"S":8, "R":16 },
'TOB (disk)': {"S":16, "R":16 }, #entre parenthèse cf https://www.eucast.org/eucastguidancedocuments/ ?
'TOB (vitek)': {"S":2, "R":2 }, #entre parenthèse cf https://www.eucast.org/eucastguidancedocuments/ ?
'TZP (disk)': {"S":20, "R":20 },
'TZP (mic)': {"S":8, "R":8 },
'TZP (vitek)': {"S":8, "R":8 },
}
def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx',suffix='-d200'): def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx',suffix='-d200'):
""" """
Extract and organise labels from raw excel file Extract and build file name corresponding to each sample
:param path: excel path :param path: excel path
:return: dataframe :return: dataframe
""" """
df = pd.read_excel(path, header=1) df = pd.read_excel(path, header=1)
df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)', df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)', 'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
'CIP (vitek)','COL (disk)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)', 'CIP (vitek)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)', 'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)', 'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LTM (disk)','LVX (disk)','LVX (vitek)','MEC (disk)', 'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LVX (disk)','LVX (vitek)','MEC (disk)',
'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)', 'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TEM (disk)','TEM (vitek)','TGC (disk)','TGC (vitek)', 'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TGC (disk)','TGC (vitek)',
'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']] 'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']]
for test in antibiotic_tests :# S - Susceptible R - Resistant U- Uncertain
#convert to string and transform >8 to 8
df[test] = df[test].map(lambda x :float(str(x).replace('>','').replace('<','')))
df[test+' cat']= 'NA'
if 'mic' in test or 'vitek' in test :
try :
df.loc[df[test] <= antibiotic_enterrobacter_breakpoints[test]['S'], test+ ' cat'] = 'S'
df.loc[df[test] >= antibiotic_enterrobacter_breakpoints[test]['R'], test + ' cat'] = 'R'
df.loc[(antibiotic_enterrobacter_breakpoints[test]['S'] < df[test]) & (df[test] < antibiotic_enterrobacter_breakpoints[test]['R']), test + ' cat'] = 'U'
except:
pass
elif 'disk' in test:
try :
df.loc[df[test] >= antibiotic_enterrobacter_breakpoints[test]['S'], test + ' cat'] = 'S'
df.loc[df[test] <= antibiotic_enterrobacter_breakpoints[test]['R'], test + ' cat'] = 'R'
df.loc[
(antibiotic_enterrobacter_breakpoints[test]['S'] > df[test]) & (df[test] > antibiotic_enterrobacter_breakpoints[test][
'R']), test + ' cat'] = 'U'
except:
pass
def split_before_number(s): def split_before_number(s):
return re.split(r'(\d+)', s) return re.split(r'(\d+)', s)
...@@ -98,5 +203,26 @@ def create_dataset(): ...@@ -98,5 +203,26 @@ def create_dataset():
np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat) np.save(directory_path_npy + "/" + name + '_' + analyse + '.npy', mat)
def extract_antio_res_labels():
"""
Extract and organise labels from raw excel file
:param
path: excel
path
:return: dataframe
"""
path = '../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'
df = pd.read_excel(path, header=1)
df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
'CIP (vitek)','COL (disk)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LTM (disk)','LVX (disk)','LVX (vitek)','MEC (disk)',
'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TEM (disk)','TEM (vitek)','TGC (disk)','TGC (vitek)',
'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']]
if __name__ =='__main__' : if __name__ =='__main__' :
create_dataset() df = create_antibio_dataset()
\ No newline at end of file \ No newline at end of file
...@@ -296,7 +296,7 @@ class Classification_model_duo(nn.Module): ...@@ -296,7 +296,7 @@ class Classification_model_duo(nn.Module):
self.predictor = nn.Linear(in_features=self.n_class*2,out_features=self.n_class) self.predictor = nn.Linear(in_features=self.n_class*2,out_features=self.n_class)
def forward(self, input_aer, input_ana): def forward(self, input_aer, input_ana, input_ref):
out_aer = self.im_encoder(input_aer) out_aer = self.im_encoder(input_aer)
out_ana = self.im_encoder(input_ana) out_ana = self.im_encoder(input_ana)
out = torch.concat([out_aer,out_ana],dim=1) out = torch.concat([out_aer,out_ana],dim=1)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment