# Basic import import math import argparse import os import json # Data Structure import numpy as np import geopandas as gpd from shapely.geometry import Point,box # NLP from nltk.tokenize import word_tokenize from ngram import NGram # Visualisation and parallelisation from tqdm import tqdm class TokenizerCustom(): def __init__(self,vocab): self.word_index = {vocab[i]:i for i in range(len(vocab))} self.index_word = {i:vocab[i] for i in range(len(vocab))} self.N = len(self.index_word) def texts_to_sequences(self,listText): seqs = [] for text in listText: seqs.append([self.word_index[word] for word in word_tokenize(text) if word in self.word_index]) return seqs class ConfigurationReader(object): def __init__(self,configuration_file): if not os.path.exists(configuration_file): raise FileNotFoundError("'{0} file could not be found ! '".format(configuration_file)) self.configuration = json.load(open(configuration_file)) self.__argparser_desc = ("" if not "description" in self.configuration else self.configuration["description"]) self.parser = argparse.ArgumentParser(description=self.__argparser_desc) self.parse_conf() def parse_conf(self): if not "args" in self.configuration: raise argparse.ArgumentError("","No args given in the configuration file") for dict_args in self.configuration["args"]: if not isinstance(dict_args,dict): raise ValueError("Args must be dictionnary") short_command = dict_args.get("short",None) long_command = dict_args.get("long",None) if not short_command and not long_command: raise ValueError("No command name was given !") add_func_dict_= {} if "help" in dict_args: add_func_dict_["help"]= dict_args["help"] if "default" in dict_args: add_func_dict_["default"]= dict_args["default"] if "action" in dict_args: add_func_dict_["action"]= dict_args["action"] if "type" in dict_args: add_func_dict_["type"]= eval(dict_args["type"]) if "choices" in dict_args: add_func_dict_["choices"]= dict_args["choices"] if not (short_command and long_command): command = (short_command if not long_command else long_command) self.parser.add_argument(command,**add_func_dict_) elif long_command and short_command: self.parser.add_argument(short_command,long_command,**add_func_dict_) def parse_args(self,input_=None): if not input_: return self.parser.parse_args() return self.parser.parse_args(input_) class MetaDataSerializer(object): def __init__(self, dataset_name, rel_code, cooc_sample_size, adj_iteration, ngram_size, tolerance_value, epochs, embedding_dim, word2vec_iter_nb, index_fn, keras_model_fn, train_test_history_fn): self.dataset_name = dataset_name self.rel_code = rel_code self.cooc_sample_size = cooc_sample_size self.adj_iteration = adj_iteration self.ngram_size = ngram_size self.tolerance_value = tolerance_value self.epochs = epochs self.embedding_dim = embedding_dim self.word2vec_iter_nb = word2vec_iter_nb self.index_fn = index_fn self.keras_model_fn = keras_model_fn self.train_test_history_fn = train_test_history_fn def save(self,fn): json.dump({ "dataset_name" : self.dataset_name, "rel_code" : self.rel_code, "cooc_sample_size" : self.cooc_sample_size, "adj_iteration" : self.adj_iteration, "ngram_size" : self.ngram_size, "tolerance_value" : self.tolerance_value, "epochs" : self.epochs, "embedding_dim" : self.embedding_dim, "word2vec_iter_nb" : self.word2vec_iter_nb, "index_fn" : self.index_fn, "keras_model_fn" : self.keras_model_fn, "train_test_history_fn" : self.train_test_history_fn },open(fn,'w')) import time class Chronometer: def __init__(self): self.__task_begin_timestamp = {} def start(self, task_name): """ Start a new task chronometer Parameters ---------- task_name : str task id Raises ------ ValueError if a running task already exists with that name """ if task_name in self.__task_begin_timestamp: raise ValueError( "A running task exists with the name {0}!".format(task_name) ) self.__task_begin_timestamp[task_name] = time.time() def stop(self, task_name): """ Stop and return the duration of the task Parameters ---------- task_name : str task id Returns ------- float duration of the task in seconds Raises ------ ValueError if no task exist with the id `task_name` """ if not task_name in self.__task_begin_timestamp: raise ValueError("The {0} task does not exist!".format(task_name)) duration = time.time() - self.__task_begin_timestamp[task_name] del self.__task_begin_timestamp[task_name] return duration