import glob
import os
import pandas as pd
import re
import numpy as np
from PIL import Image
import matplotlib.image as mpimg

from build_image import build_image_ms1

"""
find . -name '*.mzML' -exec cp -prv '{}' '/home/leo/PycharmProjects/pseudo_image/data/raw_data' ';'
copy des mzml depuis lecteur
"""

def create_antibio_dataset(path='../data/label_raw/230804_strain_peptides_antibiogram_Enterobacterales.xlsx'):
    """
    Extract and organise labels from raw excel file
    :param path: excel path
    :return: dataframe
    """
    df = pd.read_excel(path, header=1)
    df = df[['sample_name','species','AMC (disk)','AMK (disk)','AMK (mic)','AMK (vitek)','AMP (vitek)','AMX (disk)',
    'AMX (vitek)','ATM (disk)','ATM (vitek)','CAZ (disk)','CAZ (mic)','CAZ (vitek)','CHL (vitek)','CIP (disk)',
    'CIP (vitek)','COL (disk)','COL (mic)','CRO (mic)','CRO (vitek)','CTX (disk)','CTX (mic)','CTX (vitek)',
    'CXM (vitek)','CZA (disk)','CZA (vitek)','CZT (disk)','CZT (vitek)','ETP (disk)','ETP (mic)','ETP (vitek)',
    'FEP (disk)','FEP (mic)','FEP (vitek)','FOS (disk)','FOX (disk)','FOX (vitek)','GEN (disk)','GEN (mic)',
    'GEN (vitek)','IPM (disk)','IPM (mic)','IPM (vitek)','LTM (disk)','LVX (disk)','LVX (vitek)','MEC (disk)',
    'MEM (disk)','MEM (mic)','MEM (vitek)','NAL (vitek)','NET (disk)','OFX (vitek)','PIP (vitek)','PRL (disk)',
    'SXT (disk)','SXT (vitek)','TCC (disk)','TCC (vitek)','TEM (disk)','TEM (vitek)','TGC (disk)','TGC (vitek)',
    'TIC (disk)','TIC (vitek)','TOB (disk)','TOB (vitek)','TZP (disk)','TZP (mic)','TZP (vitek)']]

    def split_before_number(s):
        return re.split(r'(\d+)', s)


    def create_fname(s, analyse):
        l = split_before_number(s)
        species = l[0]
        nb = l[1]
        return '{}-{}-{}_100vW_100SPD.mzML'.format(species,nb,analyse)

    df['path_ana'] = df['sample_name'].map(lambda x: create_fname(x,analyse='ANA'))
    df['path_aer'] = df['sample_name'].map(lambda x: create_fname(x, analyse='AER'))

    return df


def create_dataset():
    """
    Create images from raw .mzML files and sort it in their corresponding class directory
    :return: None
    """
    label = create_antibio_dataset()
    for path in glob.glob("../data/raw_data/**.mzML"):
        print(path)
        species = None
        if path.split("/")[-1] in label['path_ana'].values:
            species = label[label['path_ana'] == path.split("/")[-1]]['species'].values[0]
            name = label[label['path_ana'] == path.split("/")[-1]]['sample_name'].values[0]
            analyse = 'ANA'
        elif path.split("/")[-1] in label['path_aer'].values:
            species = label[label['path_aer'] == path.split("/")[-1]]['species'].values[0]
            name = label[label['path_aer'] == path.split("/")[-1]]['sample_name'].values[0]
            analyse = 'AER'
        if species is not None:
            directory_path = '../data/processed_data/{}'.format(species)
            if not os.path.isdir(directory_path):
                os.makedirs(directory_path)
            mat = build_image_ms1(path, 1)
            mpimg.imsave(directory_path + "/" + name + '_' + analyse + '.png', mat)
            np.save(directory_path + "/" + name + '_' + analyse + '.npy', mat)



if __name__ =='__main__' :
    label = create_antibio_dataset()