diff --git a/.gitignore b/.gitignore
index ddc1507dda5eebc7fbb67e9a3546f78022969b26..96d316e916ade4b9b4e4081aea3e4a2ed935b257 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,6 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
 lib64/
 parts/
 sdist/
diff --git a/combination_embeddings.py b/combination_embeddings.py
index c147f8064042bea90976c809d7ca42fe5012369e..025c586caa830963928ba0c47e0a816127966d88 100644
--- a/combination_embeddings.py
+++ b/combination_embeddings.py
@@ -12,6 +12,8 @@ import geopandas as gpd
 from keras.layers import Dense, Input, Embedding,concatenate,Bidirectional,LSTM
 from keras.models import Model
 from keras import backend as K
+from keras.callbacks import ModelCheckpoint
+
 import tensorflow as tf
 
 # Geometry
@@ -19,31 +21,15 @@ from shapely.geometry import Point
 
 # Custom module
 from helpers import read_geonames
-from utils import Grid
-from utils import  zero_one_encoding, NgramIndex,ConfigurationReader
-from metrics import lat_accuracy,lon_accuracy
+from lib.geo import Grid,zero_one_encoding, get_adjacency_rels, get_geonames_inclusion_rel,get_bounds
+from lib.ngram_index import NgramIndex
+from lib.utils import ConfigurationReader
+from lib.metrics import lat_accuracy,lon_accuracy
 
 # Logging
 from tqdm import tqdm
 import logging
-from helpers import Chronometer
-
-
-def parse_title_wiki(title_wiki):
-    """
-    Parse Wikipedia title
-    
-    Parameters
-    ----------
-    title_wiki : str
-        wikipedia title
-    
-    Returns
-    -------
-    str
-        parsed wikipedia title
-    """
-    return re.sub("\(.*\)","",title_wiki).strip().lower()
+from helpers import parse_title_wiki
 
 def get_new_ids(cooc_data,id_first_value):
     """
@@ -74,96 +60,122 @@ def get_new_ids(cooc_data,id_first_value):
                 topo_id[id_]=interlink
     return topo_id
 
-
-
 # LOGGING CONF
 logging.basicConfig(
     format='[%(asctime)s][%(levelname)s] %(message)s ', 
     datefmt='%m/%d/%Y %I:%M:%S %p',
     level=logging.INFO  
     )
-chrono = Chronometer()
 
 args = ConfigurationReader("./parser_config/toponym_combination_embedding.json")\
-    .parse_args()#("-n 4 -t 0.002 -e 20  -i data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split())
+    .parse_args()#("-i -e 5 ../data/geonamesData/FR.txt ../data/geonamesData/hierarchy.txt".split())
 
-# Initialisee CONSTANTS
-GEONAME_FN = args.geoname_input
-GEONAMES_HIERARCHY_FN = args.geoname_hierachy_input
+#
+#################################################
+############# MODEL TRAINING PARAMETER ##########
+#################################################
 NGRAM_SIZE = args.ngram_size
 ACCURACY_TOLERANCE = args.tolerance_value
 EPOCHS = args.epochs
 ITER_ADJACENCY = args.adjacency_iteration
-COOC_SAMPLING_NUMBER = 3
-WORDVEC_ITER = 50
+COOC_SAMPLING_NUMBER = args.cooc_sample_size
+WORDVEC_ITER = args.ngram_word2vec_dim
+#################################################
+########## FILENAME VARIABLE ####################
+#################################################
+GEONAME_FN = args.geoname_input
+GEONAMES_HIERARCHY_FN = args.geoname_hierachy_input
+REGION_SUFFIX_FN = "" if args.admin_code_1 == "None" else "_" + args.admin_code_1
+ADJACENCY_REL_FILENAME = "../data/geonamesData/{0}_{1}{2}adjacency.json".format(
+        GEONAME_FN.split("/")[-1],
+        ITER_ADJACENCY,
+        REGION_SUFFIX_FN)
+
+COOC_FN = "../data/wikipedia/cooccurrence_"+GEONAME_FN.split("/")[-1]
+PREFIX_OUTPUT_FN = "{0}_{1}_{2}_{3}_{4}".format(
+    GEONAME_FN.split("/")[-1],
+    EPOCHS,
+    NGRAM_SIZE,
+    ACCURACY_TOLERANCE,
+    REGION_SUFFIX_FN)
 
-# check for output dir
-if not os.path.exists("outputs/"):
-    os.makedirs("outputs/")
+if args.adjacency:
+    PREFIX_OUTPUT_FN += "_A"
+if args.inclusion:
+    PREFIX_OUTPUT_FN += "_I"
+if args.wikipedia_cooc:
+    PREFIX_OUTPUT_FN += "_C"
+
+MODEL_OUTPUT_FN = "outputs/{0}.h5".format(PREFIX_OUTPUT_FN)
+INDEX_FN = "outputs/{0}_index".format(PREFIX_OUTPUT_FN)
+
+#############################################################################################
+################################# LOAD DATA #################################################
+#############################################################################################
 
 # LOAD  Geonames DATA
 logging.info("Load Geonames data...")
 geoname_data = read_geonames(GEONAME_FN).fillna("")
-hierarchy_data = pd.read_csv(GEONAMES_HIERARCHY_FN,sep="\t",header=None,names="parentId,childId,type".split(",")).fillna("")
 
-train_indices,test_indices = pd.read_csv(GEONAME_FN+"_train.csv").geonameid.values, pd.read_csv(GEONAME_FN+"_test.csv").geonameid.values
-train_indices,test_indices = set(train_indices),set(test_indices)
+train_indices = set(pd.read_csv(GEONAME_FN+"_train.csv").geonameid.values)
+test_indices = set(pd.read_csv(GEONAME_FN+"_test.csv").geonameid.values)
 
 logging.info("Geonames data loaded!")
 
 # SELECT ENTRY with class == to A and P (Areas and Populated Places)
 filtered = geoname_data[geoname_data.feature_class.isin("A P".split())].copy() # Only take area and populated places
+#CLEAR RAM
+del geoname_data
 
-# IF REGION (ONLY FR for now !)
-admin_id_authorised_auth = "1 2 3 4 5 6 11 24 27 28 32 44 52 53 75 76 84 93 94".split()
-region_fn = "" if args.admin_code_1 == None else "_"+args.admin_code_1
-if args.admin_code_1 != None and args.admin_code_1 in admin_id_authorised_auth:
+
+# IF REGION
+if args.admin_code_1 != "None":
     filtered = filtered[filtered.admin1_code == args.admin_code_1].copy()
 
-# REDUCE DATA STORED
+# GET BOUNDS AND REDUCE DATA AVAILABLE FIELDS
 filtered = filtered["geonameid name longitude latitude".split()] # KEEP ONLY ID LABEL AND COORD
+bounds = get_bounds(filtered) # Required to get adjacency relationships
+
 
-# Geometry operation 
-filtered["geometry"] = filtered["longitude latitude".split()].apply(lambda x: Point(x.longitude,x.latitude),axis=1)
-filtered = gpd.GeoDataFrame(filtered)
-filtered["i"]=1
-bounds = filtered.dissolve("i").bounds.values[0] # Required to get adjacency relationships
+#############################################################################################
+################################# RETRIEVE RELATIONSHIPS ####################################
+#############################################################################################
 
 
+# INITIALIZE RELATION STORE
 rel_store = []
 
+# Retrieve adjacency relationships
 if args.adjacency:
-    # RETRIEVE ADJACENCY REL
     logging.info("Retrieve adjacency relationships ! ")
-    fn = "data/geonamesData/{0}_{1}{2}adjacency.json".format(GEONAME_FN.split("/")[-1],ITER_ADJACENCY,region_fn)
-    if not os.path.exists(fn):
-        g = Grid(*bounds,[360,180])
-        g.fit_data(filtered)
-        [g+(int(row.geonameid),row.latitude,row.longitude) for ix,row in tqdm(filtered["geonameid longitude latitude".split()].iterrows(),total=len(filtered))]
-        rel_store.extend([[int(i) for i in r.split("|")] for r in g.get_adjacent_relationships(ITER_ADJACENCY)])
-        json.dump(rel_store,open(fn,'w'))
+
+    if not os.path.exists(ADJACENCY_REL_FILENAME):
+        rel_store.extend(get_adjacency_rels(filtered,bounds,[360,180],ITER_ADJACENCY))
+        json.dump(rel_store,open(ADJACENCY_REL_FILENAME,'w'))
     else:
         logging.info("Open and load data from previous computation!")
-        rel_store=[[int(couple[0]),int(couple[1])] for couple in json.load(open(fn))]
+        rel_store=json.load(open(ADJACENCY_REL_FILENAME))
+
     logging.info("{0} adjacency relationships retrieved ! ".format(len(rel_store)))
 
+# Retrieve inclusion relationships
 if args.inclusion:
-    # RETRIEVE INCLUSION RELATIONSHIPS
     logging.info("Retrieve inclusion relationships ! ")
-    geonamesIDS = set(filtered.geonameid.values)
-    filter_mask = (hierarchy_data.childId.isin(geonamesIDS) & hierarchy_data.parentId.isin(geonamesIDS))
-    rel_store.extend((hierarchy_data[filter_mask]["childId parentId".split()].values.tolist()))
-    logging.info("{0} inclusion relationships retrieved ! ".format(len(hierarchy_data[filter_mask])))
 
-del filtered["geometry"]
+    cpt_rel = len(rel_store)
+    rel_store.extend(get_geonames_inclusion_rel(filtered,GEONAMES_HIERARCHY_FN))
+
+    logging.info("{0} inclusion relationships retrieved ! ".format(len(rel_store)-cpt_rel))
+
+
 
 if args.wikipedia_cooc:
     logging.info("Load Wikipedia Cooccurrence data and merge with geonames")
-    COOC_FN = "./data/wikipedia/cooccurrence_"+GEONAME_FN.split("/")[-1]
+    
     cooc_data = pd.read_csv(COOC_FN,sep="\t")
     cooc_data["title"] = cooc_data.title.apply(parse_title_wiki)
     cooc_data["interlinks"] = cooc_data.interlinks.apply(parse_title_wiki)
-    id_wikipediatitle = get_new_ids(cooc_data,geoname_data.geonameid.max())
+    id_wikipediatitle = get_new_ids(cooc_data,filtered.geonameid.max())
     wikipediatitle_id = {v:k for k,v in id_wikipediatitle.items()}
     title_coord = {row.title: (row.longitude,row.latitude) for _,row in cooc_data.iterrows()}
     cooc_data["geonameid"] = cooc_data.title.apply(lambda x: wikipediatitle_id[x])
@@ -191,37 +203,30 @@ geoname2name = dict(filtered["geonameid name".split()].values)
 # ENCODING NAME USING N-GRAM SPLITTING
 logging.info("Encoding toponyms to ngram...")
 index = NgramIndex(NGRAM_SIZE)
-filtered.name.apply(lambda x : index.split_and_add(x)) # Identify all ngram available
-if args.wikipedia_cooc:
-    [index.split_and_add(k) for k in wikipediatitle_id]
-filtered["encode_name"] = filtered.name.apply(lambda x : index.encode(x)) # First encoding
-max_len = filtered.encode_name.apply(len).max() # Retrieve the encodings max length
-if args.wikipedia_cooc:
-    extension = {v:index.encode(k) for k,v in wikipediatitle_id.items()}
 
-index.max_len = int(max_len) # For Index state dump
+ # Identify all ngram available
+filtered.name.apply(lambda x : index.split_and_add(x))
+if args.wikipedia_cooc:[index.split_and_add(k) for k in wikipediatitle_id]
 
-filtered["encode_name"] = filtered.encode_name.apply(lambda x: index.complete(x,max_len)) # Expend encodings with size < max_len
-if args.wikipedia_cooc:
-    extension = {k:index.complete(v,max_len) for k,v in extension.items()}
-geoname2encodedname = dict(filtered["geonameid encode_name".split()].values) #init a dict with the 'geonameid' --> 'encoded toponym' association
+geoname2encodedname = {row.geonameid : index.encode(row.name) for row in filtered.itertuples()} #init a dict with the 'geonameid' --> 'encoded toponym' association
 
 if args.wikipedia_cooc:
-    geoname2encodedname.update(extension)
+    geoname2encodedname.update({v:index.encode(k) for k,v in wikipediatitle_id.items()})
 
+# SAVE THE INDEX TO REUSE THE MODEL
+index.save(INDEX_FN)
 
 logging.info("Done !")
 
-#CLEAR RAM
-del hierarchy_data
-del geoname_data
+
+#############################################################################################
+################################# ENCODE COORDINATES #################################################
+#############################################################################################
+
+
 
 # Encode each geonames entry coordinates
-filtered["cell_vec"]=filtered.apply(
-    lambda x : zero_one_encoding(x.longitude,x.latitude),
-    axis=1
-    )
-geoname_vec = dict(filtered["geonameid cell_vec".split()].values)
+geoname_vec = {row.geonameid : zero_one_encoding(row.longitude,row.latitude) for row in filtered.itertuples()}
 # CLEAR RAM
 del filtered
 
@@ -231,14 +236,17 @@ num_words = len(index.index_ngram) # necessary for the embedding matrix
 
 logging.info("Preparing Input and Output data...")
 
+
+#############################################################################################
+################################# BUILD TRAIN/TEST DATASETS #################################
+#############################################################################################
+
 X_1_train,X_2_train,y_lat_train,y_lon_train=[],[],[],[]
 X_1_test,X_2_test,y_lat_test,y_lon_test=[],[],[],[]
 
-cpt=0
 for couple in rel_store:
     geonameId_1,geonameId_2 = couple[0],couple[1]
     if not geonameId_1 in geoname2encodedname:
-        cpt+=1
         continue
     top1,top2 = geoname2encodedname[geonameId_1],geoname2encodedname[geonameId_2]
     if geonameId_1 in train_indices: #and geonameId_2 in train_indices:
@@ -270,29 +278,28 @@ y_lon_test = np.array(y_lon_test)
 logging.info("Data prepared !")
 
 
-# OUTPUT FN BASE
-name = "{0}_{1}_{2}_{3}{4}".format(GEONAME_FN.split("/")[-1],EPOCHS,NGRAM_SIZE,ACCURACY_TOLERANCE,region_fn)
-if args.adjacency:
-    name += "_A"
-if args.inclusion:
-    name += "_I"
-if args.wikipedia_cooc:
-    name += "_C"
+# check for output dir
+if not os.path.exists("outputs/"):
+    os.makedirs("outputs/")
 
-index.save("outputs/"+name+"_index")
+#############################################################################################
+################################# NGRAM EMBEDDINGS ##########################################
+#############################################################################################
 
 
-# NGRAM EMBDEDDING
 logging.info("Generating N-GRAM Embedding...")
 embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim,iter=WORDVEC_ITER)
 logging.info("Embedding generated !")
 
-# DEEP MODEL
-name = "LSTM_"+ name
-input_1 = Input(shape=(max_len,))
-input_2 = Input(shape=(max_len,))
+#############################################################################################
+################################# MODEL DEFINITION ##########################################
+#############################################################################################
 
-embedding_layer = Embedding(num_words, embedding_dim,input_length=max_len,weights=[embedding_weights],trainable=False)#, trainable=True)
+
+input_1 = Input(shape=(index.max_len,))
+input_2 = Input(shape=(index.max_len,))
+
+embedding_layer = Embedding(num_words, embedding_dim,input_length=index.max_len,weights=[embedding_weights],trainable=False)#, trainable=True)
 
 x1 = Bidirectional(LSTM(98))(embedding_layer(input_1))
 x2 = Bidirectional(LSTM(98))(embedding_layer(input_2))
@@ -315,15 +322,29 @@ output_lat = Dense(1,activation="sigmoid",name="Output_LAT")(x2)
 model = Model(inputs = [input_1,input_2], outputs = [output_lon,output_lat])#input_3
 
 model.compile(loss=['mean_squared_error','mean_squared_error'], optimizer='adam',metrics={"Output_LON":lon_accuracy(),"Output_LAT":lat_accuracy()})
+
+
+checkpoint = ModelCheckpoint(MODEL_OUTPUT_FN + ".part", monitor='loss', verbose=1,
+    save_best_only=True, mode='auto', period=1)
+
+
+#############################################################################################
+################################# TRAINING LAUNCH ###########################################
+#############################################################################################
+
 history = model.fit(x=[X_1_train,X_2_train],
     y=[y_lon_train,y_lat_train],
     verbose=True, batch_size=100,
     epochs=EPOCHS,
-    validation_data=([X_1_test,X_2_test],[y_lon_test,y_lat_test]))
+    validation_data=([X_1_test,X_2_test],[y_lon_test,y_lat_test]),
+    callbacks=[checkpoint])
 
 
 hist_df = pd.DataFrame(history.history)
-hist_df.to_csv("outputs/{0}.csv".format(name))
+hist_df.to_csv("outputs/{0}.csv".format(PREFIX_OUTPUT_FN))
 
-model.save("outputs/"+name+".h5")
+model.save(MODEL_OUTPUT_FN)
 
+# Erase Model Checkpoint file
+if os.path.exists(output_fn + ".part"):
+    os.remove(output_fn + ".part")
\ No newline at end of file
diff --git a/documentation/imgs/first_approach.png b/documentation/imgs/first_approach.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b83d1184fc92e154510c934ecea41b4e80455ce
Binary files /dev/null and b/documentation/imgs/first_approach.png differ
diff --git a/documentation/imgs/second_approach.png b/documentation/imgs/second_approach.png
new file mode 100644
index 0000000000000000000000000000000000000000..bdff5964c3796980e518eb0f9aa724bd836e0ca6
Binary files /dev/null and b/documentation/imgs/second_approach.png differ
diff --git a/documentation/imgs/third_approach.png b/documentation/imgs/third_approach.png
new file mode 100644
index 0000000000000000000000000000000000000000..ea8e6aaa02e19084a61e346ebacff25139cc63cb
Binary files /dev/null and b/documentation/imgs/third_approach.png differ
diff --git a/helpers.py b/helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a47034ec466467011042648e9b43a1ccc4a7187
--- /dev/null
+++ b/helpers.py
@@ -0,0 +1,165 @@
+import os
+import time
+import re
+
+import numpy as np
+import pandas as pd
+
+
+def read_geonames(file):
+    """
+    Return a dataframe that contains Geonames data.
+    
+    Parameters
+    ----------
+    file : str
+        path of the Geonames Csv file
+    
+    Returns
+    -------
+    pd.DataFrame
+        geonames data
+    """
+    dtypes_dict = {
+    0: int, # geonameid
+    1: str,  # name
+    2: str,  # asciiname
+    3: str,  # alternatenames
+    4: float, # latitude
+    5: float, # longitude
+    6: str, # feature class
+    7: str, # feature code
+    8: str, # country code
+    9: str, # cc2
+    10: str, # admin1 code
+    11: str, # admin2 code
+    12: str, # admin3 code
+    13: str, # admin4 code
+    14: int, # population
+    15: str, # elevation
+    16: int, # dem (digital elevation model)
+    17: str, # timezone
+    18: str # modification date yyyy-MM-dd
+    }
+    rename_cols = {
+    0:"geonameid", # geonameid
+    1:"name",  # name
+    2:"asciiname",  # asciiname
+    3:"alternatenames",  # alternatenames
+    4:"latitude", # latitude
+    5:"longitude", # longitude
+    6:"feature_class", # feature class
+    7:"feature_code", # feature code
+    8:"country_code", # country code
+    9:"cc2", # cc2
+    10:"admin1_code", # admin1 code
+    11:"admin2_code", # admin2 code
+    12:"admin3_code", # admin3 code
+    13:"admin4_code", # admin4 code
+    14:"population", # population
+    15:"elevation", # elevation
+    16:"dem", # dem (digital elevation model)
+    17:"timezone", # timezone
+    18:"modification_date" # modification date yyyy-MM-dd
+    }
+    data = pd.read_csv(file, sep="\t", header = None, quoting=3,dtype=dtypes_dict,na_values='', keep_default_na=False,error_bad_lines=False)
+    data.rename(columns=rename_cols,inplace=True)
+    return data
+
+
+def parse_title_wiki(title_wiki):
+    """
+    Parse Wikipedia title
+    
+    Parameters
+    ----------
+    title_wiki : str
+        wikipedia title
+    
+    Returns
+    -------
+    str
+        parsed wikipedia title
+    """
+    return re.sub("\(.*\)","",title_wiki).strip().lower()
+
+
+def _split(lst,n,complete_chunk_value):
+    """
+    Split a list into chunk of n-size.
+    
+    Parameters
+    ----------
+    lst : list
+        input list
+    n : int
+        chunk size
+    complete_chunk_value : object
+        if last chunk size not equal to n, this value is used to complete it
+    
+    Returns
+    -------
+    list
+        chunked list
+    """
+    chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
+    if not chunks:return chunks
+    if len(chunks[-1]) != n:
+        chunks[-1].extend([complete_chunk_value]*(n-len(chunks[-1])))
+    return np.array(chunks)
+
+class Chronometer():
+    def __init__(self):
+        self.__task_begin_timestamp = {}
+
+    def start(self,task_name):
+        """
+        Start a new task chronometer
+        
+        Parameters
+        ----------
+        task_name : str
+            task id
+        
+        Raises
+        ------
+        ValueError
+            if a running task already exists with that name
+        """
+        if task_name in self.__task_begin_timestamp:
+            raise ValueError("A running task exists with the name {0}!".format(task_name))
+        self.__task_begin_timestamp[task_name] = time.time()
+
+    def stop(self,task_name):
+        """
+        Stop and return the duration of the task
+        
+        Parameters
+        ----------
+        task_name : str
+            task id
+        
+        Returns
+        -------
+        float
+            duration of the task in seconds
+        
+        Raises
+        ------
+        ValueError
+            if no task exist with the id `task_name`
+        """
+        if not task_name in self.__task_begin_timestamp:
+             raise ValueError("The {0} task does not exist!".format(task_name))
+        duration = time.time() - self.__task_begin_timestamp[task_name]
+        del self.__task_begin_timestamp[task_name]
+        return duration
+
+if __name__ == "__main__":
+    chrono = Chronometer()
+    chrono.start("test")
+    chrono.start("test2")
+    time.sleep(3)
+    print(chrono.stop("test"))
+    time.sleep(3)
+    print(chrono.stop("test2"))
\ No newline at end of file
diff --git a/lib/__init__.py b/lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/lib/geo.py b/lib/geo.py
new file mode 100644
index 0000000000000000000000000000000000000000..c91d2d4f3062fd72f440b6b3f308d6b2827852fd
--- /dev/null
+++ b/lib/geo.py
@@ -0,0 +1,335 @@
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+
+from shapely.geometry import Point,box 
+
+from tqdm import tqdm
+
+
+import pandas as pd, numpy as np
+from numba import njit
+from helpers import read_geonames
+from tqdm import tqdm
+from joblib import Parallel,delayed
+
+
+
+def haversine_pd(lon1, lat1, lon2, lat2):
+    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
+    dlon = lon2 - lon1
+    dlat = lat2 - lat1
+    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
+    
+    return 6367 * 2 * np.arcsin(np.sqrt(a))
+
+
+def get_adjacent(ids,lon1, lat1, lon2, lat2,threshold):
+    dist_ = haversine_pd(lon1, lat1, lon2, lat2)
+    return ids[dist_<threshold]
+
+def get_geonames_adjacency(geoname_data,threshold):
+    return Parallel(n_jobs=-1,backend="multiprocessing")(delayed(get_adjacent)(geoname_data.geonameid.values,
+    geoname_data.longitude,
+    geoname_data.latitude,
+    row.longitude,
+    row.latitude,
+    threshold) for ix,row in tqdm(geoname_data.iterrows(),total=len(geoname_data)))
+
+
+def generate_couple(object_list):
+    """
+    Return a randomly selected couple from an object list.
+    
+    Parameters
+    ----------
+    object_list : list
+        object list
+    
+    Returns
+    -------
+    list
+        list of coupled object
+    """
+    couples = []
+    lst = np.arange(len(object_list))
+    for _ in range(len(object_list)):
+        if len(lst) == 1:
+            break
+        idx = np.random.choice(np.arange(len(lst)))
+        idx2 = np.random.choice(np.arange(len(lst)))
+        while idx2 == idx:
+            idx2 = np.random.choice(np.arange(len(lst)))
+        couples.append([object_list[lst[idx]],object_list[lst[idx2]]])
+        lst = np.delete(lst,idx)
+    return couples
+
+def _hash_couple(o1,o2):
+    """
+    Return an hash for two object ids.
+    
+    Parameters
+    ----------
+    o1 : str or int
+        id of the first objeeect
+    o2 : str of int
+        id of the second object
+    
+    Returns
+    -------
+    str
+        hash
+    """
+    return "|".join(map(str,sorted([int(o1),int(o2)])))
+
+
+
+def zero_one_encoding(long,lat):
+    """
+    Encode coordinates (WGS84) between 0 and 1
+    
+    Parameters
+    ----------
+    long : float
+        longitude value
+    lat : float
+        latitude value
+    
+    Returns
+    -------
+    float,float
+        longitude, latitude
+    """
+    return ((long + 180.0 ) / 360.0), ((lat + 90.0 ) / 180.0) 
+
+class Cell(object):
+    """
+    A cell is box placed in geeographical space.
+    """
+    def __init__(self,upperleft_x,upperleft_y,bottomright_x,bottomright_y,x,y):
+        """
+        Constructor
+        
+        Parameters
+        ----------
+        upperleft_x : float
+            upperleft longitude
+        upperleft_y : float
+            upperleft latitude
+        bottomright_x : float
+            bottom right longitude
+        bottomright_y : float
+            bottom right latitude
+        x : int
+            cell x coordinates in the grid
+        y : int
+            cell y coordinates in the grid
+        """
+        self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y = upperleft_x,upperleft_y,bottomright_x,bottomright_y
+        self.box_ = box(self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y)
+        self.list_object={} # {id:Point(coord)}
+
+        self.x,self.y = x, y
+
+    def contains(self,lat,lon):
+        """
+        Return true if the cell contains a point at given coordinates
+        
+        Parameters
+        ----------
+        lat : float
+            latitude
+        lon : float
+            longitude
+        
+        Returns
+        -------
+        bool
+            true if contains
+        """ 
+        x,y = lon,lat
+        if x < self.upperleft_x or x > self.bottomright_x:
+            return False
+        if y < self.upperleft_y or y > self.bottomright_y:
+            return False
+        return True
+    
+    def add_object(self,id_,lat,lon):
+        """
+        Connect an object to the cell
+        
+        Parameters
+        ----------
+        id_ : int
+            id
+        lat : float
+            latitude
+        lon : float
+            longitude
+        """
+        self.list_object[id_] = Point(lon,lat)
+            
+    def __repr__(self):
+        return  "upperleft:{0}_{1}_;bottom_right:{2}_{3}".format(self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y)
+    
+        
+class Grid(object):
+    """
+    Define a grid 
+    
+    """
+    def __init__(self,upperleft_x,upperleft_y,bottomright_x,bottomright_y,cell_sub_div_index=[100,50]):
+        """
+        Constructor
+        
+        Parameters
+        ----------
+        upperleft_x : float
+            upperleft longitude
+        upperleft_y : float
+            upperleft latitude
+        bottomright_x : float
+            bottom right longitude
+        bottomright_y : float
+            bottom right latitude
+        cell_sub_div_index : list, optional
+            number of division in both latitude and longitude axis (longitude first), by default [100,50]
+        """
+        self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y = upperleft_x,upperleft_y,bottomright_x,bottomright_y
+        
+        self.x_r = abs(self.bottomright_x - self.upperleft_x)/cell_sub_div_index[0]
+        self.y_r = abs(self.upperleft_y - self.bottomright_y )/cell_sub_div_index[1]
+        
+        self.c_x_r = self.x_r/cell_sub_div_index[0] # Redivide
+        self.c_y_r = self.y_r/cell_sub_div_index[1]
+        
+        self.cells = []
+        self.inter_cells = []
+        for i in range(cell_sub_div_index[1]):
+            self.cells.append([])
+            for j in range(cell_sub_div_index[0]):
+                self.cells[-1].append(Cell(
+                    self.upperleft_x+j*self.x_r,
+                    self.upperleft_y+i*self.y_r,
+                    self.upperleft_x+((j+1)*self.x_r),
+                    self.upperleft_y+((i+1)*self.y_r),
+                    j,i)
+                )
+        dec_y = 0 
+        for i in range(cell_sub_div_index[1]):
+            self.inter_cells.append([])
+            dec_x = 0 
+            for j in range(cell_sub_div_index[0]):                 
+                self.inter_cells[-1].append(Cell(
+                    self.upperleft_x+(j*self.x_r)-self.c_x_r, # TOP
+                    self.upperleft_y+(i*self.y_r)-dec_y,
+                    self.upperleft_x+((j+1)*self.x_r)-self.c_x_r,#(self.u_pos*self.c_x_r),
+                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
+                    j,i)
+                )
+                self.inter_cells[-1].append(Cell(
+                    self.upperleft_x+(j*self.x_r)-self.c_x_r, # CENTER
+                    self.upperleft_y+(i*self.y_r)-self.c_y_r,
+                    self.upperleft_x+((j+1)*self.x_r)+self.c_x_r,
+                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
+                    j,i)
+                )
+                self.inter_cells[-1].append(Cell(
+                    self.upperleft_x+(j*self.x_r)+dec_x, # CENTER
+                    self.upperleft_y+(i*self.y_r)-self.c_y_r,
+                    self.upperleft_x+((j+1)*self.x_r)-self.c_x_r, #LEFT
+                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
+                    j,i)
+                )
+                dec_x = self.c_x_r
+            dec_y = self.c_y_r
+    
+    def fit_data(self,data = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))):
+        """
+        
+        To avoid unnecessary check when connecting an entity to one or multiple cells, we 
+        filter cells that does not appears in our geographic context (here countries surface).
+        
+        Parameters
+        ----------
+        data : GeoDataFrame
+            geographic context
+        """
+        world = data 
+        world["nn"] = 1
+        dissolved = world.dissolve(by="nn").iloc[0].geometry
+        new_cells= []
+        new_inter_cells=[]
+        for i in tqdm(range(len(self.cells))):
+            for j in range(len(self.cells[i])):
+                if dissolved.intersects(self.cells[i][j].box_):
+                    new_cells.append(self.cells[i][j])
+                    new_inter_cells.extend(self.inter_cells[i][j*3:(j+1)*3])
+                    
+        self.cells=new_cells
+        self.inter_cells = new_inter_cells
+        
+                    
+    def __add__(self,a): 
+        """
+        Add an object to the grid
+        
+        Parameters
+        ----------
+        a : tuple
+            (id, latitude, longitude)
+        """
+        for c1 in range(len(self.cells)):
+            if self.cells[c1].contains(a[1],a[2]):
+                self.cells[c1].add_object(*a)
+                
+        for c1 in range(len(self.inter_cells)):
+            if self.inter_cells[c1].contains(a[1],a[2]):
+                self.inter_cells[c1].add_object(*a)
+                
+    def get_adjacent_relationships(self,random_iteration=10):
+        """
+        Return a list of adjacent relationships founds in each cell.
+        
+        Parameters
+        ----------
+        random_iteration : int, optional
+            number of iteration for random selection of adjacency relationships, by default 10
+        
+        Returns
+        -------
+        list
+            adjacency relationships
+        """
+        relationships = set([])
+        for c1 in tqdm(range(len(self.cells))):
+            for _ in range(random_iteration):
+                for t in generate_couple(list(self.cells[c1].list_object.keys())):
+                    relationships.add(_hash_couple(t[0],t[1]))
+
+        for c1 in tqdm(range(len(self.inter_cells))):
+            for _ in range(random_iteration):
+                for t in generate_couple(list(self.inter_cells[c1].list_object.keys())):
+                    relationships.add(_hash_couple(t[0],t[1]))
+        return relationships
+    
+
+
+def get_adjacency_rels(geodataframe,bounds,subdiv_tuple,random_iter_adjacency):
+    g = Grid(*bounds,subdiv_tuple)
+    g.fit_data()
+    [g+(int(row.geonameid),row.latitude,row.longitude) for ix,row in tqdm(geodataframe["geonameid longitude latitude".split()].iterrows(),total=len(geodataframe))]
+    return [[int(i) for i in r.split("|")] for r in g.get_adjacent_relationships(random_iter_adjacency)]
+
+def get_geonames_inclusion_rel(geonames_data,geonames_hierarchy_data_fn):
+    geonames_hierarchy_data = pd.read_csv(geonames_hierarchy_data_fn,sep="\t",header=None,names="parentId,childId,type".split(",")).fillna("")
+    geonamesIDS = set(geonames_data.geonameid.values)
+    filter_mask = (geonames_hierarchy_data.childId.isin(geonamesIDS) & geonames_hierarchy_data.parentId.isin(geonamesIDS))
+    return (geonames_hierarchy_data[filter_mask]["childId parentId".split()].values.tolist())
+
+def get_bounds(geodataframe):
+    geodataframe["geometry"] = geodataframe["longitude latitude".split()].apply(lambda x: Point(x.longitude,x.latitude),axis=1)
+    geodataframe = gpd.GeoDataFrame(geodataframe)
+    geodataframe["i"]=1
+    return geodataframe.dissolve("i").bounds.values[0] # Required to get adjacency relationships
diff --git a/metrics.py b/lib/metrics.py
similarity index 100%
rename from metrics.py
rename to lib/metrics.py
diff --git a/lib/ngram_index.py b/lib/ngram_index.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d6d3fdd64ee9148dc38f976a78ff0258bcd53f4
--- /dev/null
+++ b/lib/ngram_index.py
@@ -0,0 +1,178 @@
+import json
+
+import numpy as np
+
+from ngram import NGram
+
+# Machine learning 
+from gensim.models import Word2Vec
+
+class NgramIndex():
+    """
+    Class used for encoding words in ngram representation
+    """
+    def __init__(self,n):
+        """
+        Constructor
+        
+        Parameters
+        ----------
+        n : int
+            ngram size
+        """
+        self.ngram_gen = NGram(N=n)
+
+        self.size = n
+        self.ngram_index = {"":0}
+        self.index_ngram = {0:""}
+        self.cpt = 0
+        self.max_len = 0
+
+    def split_and_add(self,word):
+        """
+        Split word in multiple ngram and add each one of them to the index
+        
+        Parameters
+        ----------
+        word : str
+            a word
+        """
+        ngrams = word.lower().replace(" ","$")
+        ngrams = list(self.ngram_gen.split(ngrams))
+        [self.add(ngram) for ngram in ngrams]
+        self.max_len = max(self.max_len,len(ngrams))
+
+    def add(self,ngram):
+        """
+        Add a ngram to the index
+        
+        Parameters
+        ----------
+        ngram : str
+            ngram
+        """
+        if not ngram in self.ngram_index:
+            self.cpt+=1
+            self.ngram_index[ngram]=self.cpt
+            self.index_ngram[self.cpt]=ngram
+        
+
+    def encode(self,word):
+        """
+        Return a ngram representation of a word
+        
+        Parameters
+        ----------
+        word : str
+            a word
+        
+        Returns
+        -------
+        list of int
+            listfrom shapely.geometry import Point,box
+ of ngram index
+        """
+        ngrams = word.lower().replace(" ","$")
+        ngrams = list(self.ngram_gen.split(ngrams))
+        [self.add(ng) for ng in ngrams if not ng in self.ngram_index]
+        return self.complete([self.ngram_index[ng] for ng in ngrams],self.max_len)
+
+    def complete(self,ngram_encoding,MAX_LEN,filling_item=0):
+        """
+        Complete a ngram encoded version of word with void ngram. It's necessary for neural network.
+        
+        Parameters
+        ----------
+        ngram_encoding : list of int
+            first encoding of a word
+        MAX_LEN : int
+            desired length of the encoding
+        filling_item : int, optional
+            ngram index you wish to use, by default 0
+        
+        Returns
+        -------
+        list of int
+            list of ngram index
+        """
+        assert len(ngram_encoding) <= MAX_LEN
+        diff = MAX_LEN - len(ngram_encoding)
+        ngram_encoding.extend([filling_item]*diff)  
+        return ngram_encoding
+    
+    def get_embedding_layer(self,texts,dim=100,**kwargs):
+        """
+        Return an embedding matrix for each ngram using encoded texts. Using gensim.Word2vec model.
+        
+        Parameters
+        ----------
+        texts : list of [list of int]
+            list of encoded word
+        dim : int, optional
+            embedding dimension, by default 100
+        
+        Returns
+        -------
+        np.array
+            embedding matrix
+        """
+        model = Word2Vec([[str(w) for w in t] for t in texts], size=dim,window=5, min_count=1, workers=4,**kwargs)
+        N = len(self.ngram_index)
+        embedding_matrix = np.zeros((N,dim))
+        for i in range(N):
+            embedding_matrix[i] = model.wv[str(i)]
+        return embedding_matrix
+
+    def save(self,fn):
+        """
+
+        Save the NgramIndex
+        
+        Parameters
+        ----------
+        fn : str
+            output filename
+        """
+        data = {
+            "ngram_size": self.size,
+            "ngram_index": self.ngram_index,
+            "cpt_state": self.cpt,
+            "max_len_state": self.max_len
+        }
+        json.dump(data,open(fn,'w'))
+
+    @staticmethod
+    def load(fn):
+        """
+        
+        Load a NgramIndex state from a file.
+        
+        Parameters
+        ----------
+        fn : str
+            input filename
+        
+        Returns
+        -------
+        NgramIndex
+            ngram index
+        
+        Raises
+        ------
+        KeyError
+            raised if a required field does not appear in the input file
+        """
+        try:
+            data = json.load(open(fn))
+        except json.JSONDecodeError:
+            print("Data file must be a JSON")
+        for key in ["ngram_size","ngram_index","cpt_state","max_len_state"]:
+            if not key in data:
+                raise KeyError("{0} field cannot be found in given file".format(key))
+        new_obj = NgramIndex(data["ngram_size"])
+        new_obj.ngram_index = data["ngram_index"]
+        new_obj.index_ngram = {v:k for k,v in new_obj.ngram_index.items()}
+        new_obj.cpt = data["cpt_state"]
+        new_obj.max_len = data["max_len_state"]
+        return new_obj
+
diff --git a/lib/utils.py b/lib/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..57326d86efe9bc26da23fcbc34047a2d66ed3a6d
--- /dev/null
+++ b/lib/utils.py
@@ -0,0 +1,80 @@
+# Basic import 
+import math
+import argparse
+import os
+import json
+
+# Data Structure
+import numpy as np
+import geopandas as gpd
+from shapely.geometry import Point,box
+
+# NLP 
+from nltk.tokenize import word_tokenize
+from ngram import NGram
+
+# Visualisation and parallelisation
+from tqdm import tqdm
+
+
+class TokenizerCustom():
+    def __init__(self,vocab):
+        self.word_index = {vocab[i]:i for i in range(len(vocab))}
+        self.index_word = {i:vocab[i] for i in range(len(vocab))}
+        self.N = len(self.index_word)
+    def texts_to_sequences(self,listText):
+        seqs = []
+        for text in listText:
+            seqs.append([self.word_index[word] for word in word_tokenize(text) if word in self.word_index])
+        return seqs
+
+
+class ConfigurationReader(object):
+    def __init__(self,configuration_file):
+        if not os.path.exists(configuration_file):
+            raise FileNotFoundError("'{0} file could not be found ! '".format(configuration_file))
+
+        self.configuration = json.load(open(configuration_file))
+
+        self.__argparser_desc = ("" if not "description" in self.configuration else self.configuration["description"])
+        self.parser = argparse.ArgumentParser(description=self.__argparser_desc)
+
+        self.parse_conf()
+    
+    def parse_conf(self):
+        if not "args" in self.configuration:
+            raise argparse.ArgumentError("","No args given in the configuration file")
+        
+        for dict_args in self.configuration["args"]:
+            if not isinstance(dict_args,dict):
+                raise ValueError("Args must be dictionnary")
+
+            short_command = dict_args.get("short",None)
+            long_command = dict_args.get("long",None)
+            
+            if not short_command and not long_command:
+                raise ValueError("No command name was given !") 
+            
+            add_func_dict_= {}
+            if "help" in dict_args:
+                add_func_dict_["help"]= dict_args["help"]
+            if "default" in dict_args:
+                add_func_dict_["default"]= dict_args["default"]
+            if "action" in dict_args:
+                add_func_dict_["action"]= dict_args["action"]
+            if "type" in dict_args:
+                add_func_dict_["type"]= eval(dict_args["type"])
+            if "choices" in dict_args:
+                add_func_dict_["choices"]= dict_args["choices"]
+
+            if not (short_command and long_command):
+                command = (short_command if not long_command else long_command)
+                self.parser.add_argument(command,**add_func_dict_)
+
+            elif long_command and short_command:
+                self.parser.add_argument(short_command,long_command,**add_func_dict_)
+    
+    def parse_args(self,input_=None):
+        if not input_:
+            return self.parser.parse_args()
+        return self.parser.parse_args(input_)
diff --git a/parser_config/embeddings_lat_lon.json b/parser_config/embeddings_lat_lon.json
deleted file mode 100644
index 1a0c774c47b9a6294bf3f54936c79773fc7027a9..0000000000000000000000000000000000000000
--- a/parser_config/embeddings_lat_lon.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "description": "Toponym Combination",
-    "args": [
-        { "short": "input", "help": "Corpus used to learn the embeddings" },
-        { "short": "-g", "long": "--glove__dir", "default": "data/glove" },
-        {"long": "--max_sequence_length", "type":"int","default":15},
-        {"long": "--max_num_words", "type":"int","default":400000},
-        {"long": "--embedding_dimension", "type":"int","default":100},
-        {"long": "--batch_size", "type":"int","default":100},
-        { "short": "-e", "long": "--epochs", "type": "int", "default": 100 }
-    ]
-}
\ No newline at end of file
diff --git a/parser_config/toponym_combination_embedding.json b/parser_config/toponym_combination_embedding.json
index a2fd9f120b3e791f17948eba7d02b8e2a34116e3..7f57c885d5149a24db6e7830d9d8fef249f05227 100644
--- a/parser_config/toponym_combination_embedding.json
+++ b/parser_config/toponym_combination_embedding.json
@@ -7,8 +7,10 @@
         { "short": "-i", "long": "--inclusion", "action": "store_true" },
         { "short": "-a", "long": "--adjacency", "action": "store_true" },
         { "short": "-w", "long": "--wikipedia-cooc", "action": "store_true" },
+        { "long": "--cooc-sample-size", "type": "int", "default": 3 },
         {"long": "--adjacency-iteration", "type":"int","default":1},
         { "short": "-n", "long": "--ngram-size", "type": "int", "default": 2 },
+        { "long": "--ngram-word2vec-dim", "type": "int", "default": 50 },
         { "short": "-t", "long": "--tolerance-value", "type": "float", "default": 0.002 },
         { "short": "-e", "long": "--epochs", "type": "int", "default": 100 },
         { "short": "-d", "long": "--dimension", "type": "int", "default": 256 },
diff --git a/predict_toponym_coordinates.py b/predict_toponym_coordinates.py
index 5dcdb7f81a8fbc28826131b5d1680f3647bf6e68..1cf9221ada921077953f8b689fd75bae790b07ce 100644
--- a/predict_toponym_coordinates.py
+++ b/predict_toponym_coordinates.py
@@ -2,6 +2,7 @@ from keras.models import load_model
 import tensorflow as tf
 import keras.backend as K
 from utils import NgramIndex
+import numpy as np
 
 from tensorflow.python.keras.backend import set_session
 from tensorflow.python.keras.models import load_model
@@ -9,7 +10,41 @@ from tensorflow.python.keras.models import load_model
 sess = None
 graph = None
 
-from metrics import lat_accuracy,lon_accuracy
+def lat_accuracy(LAT_TOL =1/180.):
+    def accuracy_at_k_lat(y_true, y_pred):
+        """
+        Metrics use to measure the accuracy of the coordinate prediction. But in comparison to the normal accuracy metrics, we add a tolerance threshold due to the (quasi) impossible 
+        task for neural network to obtain the exact  coordinate.
+
+        Parameters
+        ----------
+        y_true : tf.Tensor
+            truth data
+        y_pred : tf.Tensor
+            predicted output
+        """
+        diff = tf.abs(y_true - y_pred)
+        fit = tf.dtypes.cast(tf.less(diff,LAT_TOL),tf.int64)
+        return tf.reduce_sum(fit)/tf.size(y_pred,out_type=tf.dtypes.int64)
+    return accuracy_at_k_lat
+
+def lon_accuracy(LON_TOL=1/360.):
+    def accuracy_at_k_lon(y_true, y_pred):
+        """
+        Metrics use to measure the accuracy of the coordinate prediction. But in comparison to the normal accuracy metrics, we add a tolerance threshold due to the (quasi) impossible 
+        task for neural network to obtain the exact  coordinate.
+
+        Parameters
+        ----------
+        y_true : tf.Tensor
+            truth data
+        y_pred : tf.Tensor
+            predicted output
+        """
+        diff = tf.abs(y_true - y_pred)
+        fit = tf.dtypes.cast(tf.less(diff,LON_TOL),tf.int64)
+        return tf.reduce_sum(fit)/tf.size(y_pred,out_type=tf.dtypes.int64)
+    return accuracy_at_k_lon
 
 class Geocoder(object):
     """
@@ -21,12 +56,12 @@ class Geocoder(object):
     if you want an interactive map using leafletJS, set to True the `interactive_map` parameter of `Geocoder.plot_coord()`
     """
     def __init__(self,keras_model_fn,ngram_index_file):
-        global sess
-        global graph
-        sess = tf.compat.v1.Session()
-        graph = tf.compat.v1.get_default_graph()
-        set_session(sess)
-        self.keras_model = load_model(keras_model_fn,custom_objects={"lat_accuracy":lat_accuracy,"lon_accuracy":lon_accuracy})
+        # global sess
+        # global graph
+        # sess = tf.compat.v1.Session()
+        # graph = tf.compat.v1.get_default_graph()
+        # set_session(sess)
+        self.keras_model = load_model(keras_model_fn,custom_objects={"accuracy_at_k_lat":lat_accuracy(),"accuracy_at_k_lon":lon_accuracy()})
         self.ngram_encoder = NgramIndex.load(ngram_index_file)
 
     def get_coord(self,toponym,context_toponym):
@@ -34,9 +69,11 @@ class Geocoder(object):
         global graph
         p = self.ngram_encoder.complete(self.ngram_encoder.encode(toponym),self.ngram_encoder.max_len)
         c = self.ngram_encoder.complete(self.ngram_encoder.encode(context_toponym),self.ngram_encoder.max_len)
-        with sess.as_default():
-            with graph.as_default():
-                lon,lat = self.keras_model.predict([[p],[c]])
+        p = np.array(p)
+        c = np.array(c)       
+        # with sess.as_default():
+        #     with graph.as_default():
+        lon,lat = self.keras_model.predict([[p],[c]])
         return lon[0][0],lat[0][0]
 
     def wgs_coord(self,lon,lat):
@@ -61,13 +98,19 @@ class Geocoder(object):
             ax.plot(lon,lat,marker='o', color='red', markersize=5)
             plt.show()
 
+geocoder = Geocoder("outputs/LSTM_FR.txt_100_4_0.002_None_A_I_C.h5","./outputs/FR.txt_100_4_0.002_None_A_I_C_index")
+top,topc = "Paris","Cherbourg"
+lon,lat = geocoder.get_coord(top,topc)
+lon,lat = geocoder.wgs_coord(lon,lat)
+geocoder.plot_coord("{0},{1}".format(top,topc),lat,lon)
+
 if __name__ == "__main__":
     from flask import Flask, escape, request, render_template
 
     app = Flask(__name__)
 
 
-    geocoder = Geocoder("outputs/LSTM_FR.txt_20_4_0.002_None_A_I_C.h5","outputs/index_4gram_FR_backup.txt")
+    geocoder = Geocoder("outputs/LSTM_FR.txt_100_4_0.002_None_A_I_C.h5","./outputs/FR.txt_100_4_0.002_None_A_I_C_index")
 
     @app.route('/',methods=["GET"])
     def display():
diff --git a/train_test_split_cooccurrence_data.py b/train_test_split_cooccurrence_data.py
index 4748f3edf1813f2dcebe90f5febc68a04490127b..a5366c5839f46be6505d53918121e61b8b0a53c7 100644
--- a/train_test_split_cooccurrence_data.py
+++ b/train_test_split_cooccurrence_data.py
@@ -13,7 +13,7 @@ logging.basicConfig(
 from sklearn.model_selection import train_test_split
 from shapely.geometry import Point
 
-from utils import Grid
+from lib.geo import Grid
 
 from tqdm import tqdm 
 
diff --git a/train_test_split_geonames.py b/train_test_split_geonames.py
index ff87967ed111a34283b9ef6fd0623b9eb953e59b..585d26722b3fbe52bd33c564dfa4c187281f48cd 100644
--- a/train_test_split_geonames.py
+++ b/train_test_split_geonames.py
@@ -14,7 +14,7 @@ logging.basicConfig(
 from sklearn.model_selection import train_test_split
 from shapely.geometry import Point
 
-from utils import Grid
+from lib.geo import Grid
 from helpers import read_geonames
 
 from tqdm import tqdm 
diff --git a/utils.py b/utils.py
deleted file mode 100644
index db250b77474f8e1a135a373b76461dad485f88c1..0000000000000000000000000000000000000000
--- a/utils.py
+++ /dev/null
@@ -1,614 +0,0 @@
-# Basic import 
-import math
-import argparse
-import os
-import json
-
-# Data Structure
-import numpy as np
-import geopandas as gpd
-from shapely.geometry import Point,box
-
-# NLP 
-from nltk.tokenize import word_tokenize
-from ngram import NGram
-
-# Machine learning 
-from gensim.models import Word2Vec
-
-# Visualisation and parallelisation
-from tqdm import tqdm
-
-
-class TokenizerCustom():
-    def __init__(self,vocab):
-        self.word_index = {vocab[i]:i for i in range(len(vocab))}
-        self.index_word = {i:vocab[i] for i in range(len(vocab))}
-        self.N = len(self.index_word)
-    def texts_to_sequences(self,listText):
-        seqs = []
-        for text in listText:
-            seqs.append([self.word_index[word] for word in word_tokenize(text) if word in self.word_index])
-        return seqs
-
-
-class CoordinatesEncoder:
-    """
-    Will be replaced by Grid in grid2.py
-    """
-    def __init__(self, cell_size_lat=0.5, cell_size_lon=0.5):
-        self.min_lon = -180
-        self.max_lon = -(self.min_lon)  #  Symetric
-        self.min_lat = -90
-        self.max_lat = -(self.min_lat)  # Symetric
-
-        self.ecart_lat = self.max_lat - self.min_lat
-        self.ecart_lon = self.max_lon - self.min_lon
-
-        self.cell_size_lat = cell_size_lat
-        self.cell_size_lon = cell_size_lon
-
-        self.unit_size_lat = self.ecart_lat / self.cell_size_lat
-        self.unit_size_lon = self.ecart_lon / self.cell_size_lon
-
-    def encode(self, lat, lon):
-        return (
-            math.floor(((lat + self.max_lat) / self.ecart_lat) * self.unit_size_lat),
-            math.floor(((lon + self.max_lon) / self.ecart_lon) * (self.unit_size_lon))
-        )
-
-    def number_lat_cell(self):
-        return int(self.unit_size_lat)
-
-    def number_lon_cell(self):
-        return int(self.unit_size_lon)
-
-    def oneDimensionOutputSize(self):
-        return self.number_lat_cell() * self.number_lon_cell()
-
-    def vector(self, lat, lon):
-        lat_v, lon_v = np.zeros(self.number_lat_cell()), np.zeros(self.number_lon_cell())
-        new_coords = self.encode(lat, lon)
-        lat_v[int(new_coords[0])] = 1
-        lon_v[int(new_coords[1])] = 1
-        return lat_v, lon_v
-
-    def vector_flatten(self, lat, lon):
-        vec = np.zeros(self.oneDimensionOutputSize())  # 2D Dense softmax isn't possible
-        new_coords = self.encode(lat, lon)
-        pos = self.number_lat_cell() * (new_coords[0]) + new_coords[1]
-        vec[pos] = 1  # lon * lon size
-        return vec
-
-
-class NgramIndex():
-    """
-    Class used for encoding words in ngram representation
-    """
-    def __init__(self,n):
-        """
-        Constructor
-        
-        Parameters
-        ----------
-        n : int
-            ngram size
-        """
-        self.ngram_gen = NGram(N=n)
-
-        self.size = n
-        self.ngram_index = {"":0}
-        self.index_ngram = {0:""}
-        self.cpt = 0
-        self.max_len = 0
-
-    def split_and_add(self,word):
-        """
-        Split word in multiple ngram and add each one of them to the index
-        
-        Parameters
-        ----------
-        word : str
-            a word
-        """
-        ngrams = word.lower().replace(" ","$")
-        ngrams = list(self.ngram_gen.split(ngrams))
-        [self.add(ngram) for ngram in ngrams]
-
-    def add(self,ngram):
-        """
-        Add a ngram to the index
-        
-        Parameters
-        ----------
-        ngram : str
-            ngram
-        """
-        if not ngram in self.ngram_index:
-            self.cpt+=1
-            self.ngram_index[ngram]=self.cpt
-            self.index_ngram[self.cpt]=ngram
-
-    def encode(self,word):
-        """
-        Return a ngram representation of a word
-        
-        Parameters
-        ----------
-        word : str
-            a word
-        
-        Returns
-        -------
-        list of int
-            listfrom shapely.geometry import Point,box
- of ngram index
-        """
-        ngrams = word.lower().replace(" ","$")
-        ngrams = list(self.ngram_gen.split(ngrams))
-        [self.add(ng) for ng in ngrams if not ng in self.ngram_index]
-        return [self.ngram_index[ng] for ng in ngrams]
-
-    def complete(self,ngram_encoding,MAX_LEN,filling_item=0):
-        """
-        Complete a ngram encoded version of word with void ngram. It's necessary for neural network.
-        
-        Parameters
-        ----------
-        ngram_encoding : list of int
-            first encoding of a word
-        MAX_LEN : int
-            desired length of the encoding
-        filling_item : int, optional
-            ngram index you wish to use, by default 0
-        
-        Returns
-        -------
-        list of int
-            list of ngram index
-        """
-        assert len(ngram_encoding) <= MAX_LEN
-        diff = MAX_LEN - len(ngram_encoding)
-        ngram_encoding.extend([filling_item]*diff)  
-        return ngram_encoding
-    
-    def get_embedding_layer(self,texts,dim=100,**kwargs):
-        """
-        Return an embedding matrix for each ngram using encoded texts. Using gensim.Word2vec model.
-        
-        Parameters
-        ----------
-        texts : list of [list of int]
-            list of encoded word
-        dim : int, optional
-            embedding dimension, by default 100
-        
-        Returns
-        -------
-        np.array
-            embedding matrix
-        """
-        model = Word2Vec([[str(w) for w in t] for t in texts], size=dim,window=5, min_count=1, workers=4,**kwargs)
-        N = len(self.ngram_index)
-        embedding_matrix = np.zeros((N,dim))
-        for i in range(N):
-            embedding_matrix[i] = model.wv[str(i)]
-        return embedding_matrix
-
-    def save(self,fn):
-        """
-
-        Save the NgramIndex
-        
-        Parameters
-        ----------
-        fn : str
-            output filename
-        """
-        data = {
-            "ngram_size": self.size,
-            "ngram_index": self.ngram_index,
-            "cpt_state": self.cpt,
-            "max_len_state": self.max_len
-        }
-        json.dump(data,open(fn,'w'))
-
-    @staticmethod
-    def load(fn):
-        """
-        
-        Load a NgramIndex state from a file.
-        
-        Parameters
-        ----------
-        fn : str
-            input filename
-        
-        Returns
-        -------
-        NgramIndex
-            ngram index
-        
-        Raises
-        ------
-        KeyError
-            raised if a required field does not appear in the input file
-        """
-        try:
-            data = json.load(open(fn))
-        except json.JSONDecodeError:
-            print("Data file must be a JSON")
-        for key in ["ngram_size","ngram_index","cpt_state","max_len_state"]:
-            if not key in data:
-                raise KeyError("{0} field cannot be found in given file".format(key))
-        new_obj = NgramIndex(data["ngram_size"])
-        new_obj.ngram_index = data["ngram_index"]
-        new_obj.index_ngram = {v:k for k,v in new_obj.ngram_index.items()}
-        new_obj.cpt = data["cpt_state"]
-        new_obj.max_len = data["max_len_state"]
-        return new_obj
-
-
-def zero_one_encoding(long,lat):
-    """
-    Encode coordinates (WGS84) between 0 and 1
-    
-    Parameters
-    ----------
-    long : float
-        longitude value
-    lat : float
-        latitude value
-    
-    Returns
-    -------
-    float,float
-        longitude, latitude
-    """
-    return ((long + 180.0 ) / 360.0), ((lat + 90.0 ) / 180.0) 
-
-def _split(lst,n,complete_chunk_value):
-    """
-    Split a list into chunk of n-size.
-    
-    Parameters
-    ----------
-    lst : list
-        input list
-    n : int
-        chunk size
-    complete_chunk_value : object
-        if last chunk size not equal to n, this value is used to complete it
-    
-    Returns
-    -------
-    list
-        chunked list
-    """
-    chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
-    if not chunks:return chunks
-    if len(chunks[-1]) != n:
-        chunks[-1].extend([complete_chunk_value]*(n-len(chunks[-1])))
-    return np.array(chunks)
-
-def generate_couple(object_list):
-    """
-    Return a randomly selected couple from an object list.
-    
-    Parameters
-    ----------
-    object_list : list
-        object list
-    
-    Returns
-    -------
-    list
-        list of coupled object
-    """
-    couples = []
-    lst = np.arange(len(object_list))
-    for _ in range(len(object_list)):
-        if len(lst) == 1:
-            break
-        idx = np.random.choice(np.arange(len(lst)))
-        idx2 = np.random.choice(np.arange(len(lst)))
-        while idx2 == idx:
-            idx2 = np.random.choice(np.arange(len(lst)))
-        couples.append([object_list[lst[idx]],object_list[lst[idx2]]])
-        lst = np.delete(lst,idx)
-    return couples
-
-def _hash_couple(o1,o2):
-    """
-    Return an hash for two object ids.
-    
-    Parameters
-    ----------
-    o1 : str or int
-        id of the first objeeect
-    o2 : str of int
-        id of the second object
-    
-    Returns
-    -------
-    str
-        hash
-    """
-    return "|".join(map(str,sorted([int(o1),int(o2)])))
-
-
-
-### GEO ADJAC BEGIN
-class Cell(object):
-    """
-    A cell is box placed in geeographical space.
-    """
-    def __init__(self,upperleft_x,upperleft_y,bottomright_x,bottomright_y,x,y):
-        """
-        Constructor
-        
-        Parameters
-        ----------
-        object : [type]
-            [description]
-        upperleft_x : float
-            upperleft longitude
-        upperleft_y : float
-            upperleft latitude
-        bottomright_x : float
-            bottom right longitude
-        bottomright_y : float
-            bottom right latitude
-        x : int
-            cell x coordinates in the grid
-        y : int
-            cell y coordinates in the grid
-        """
-        self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y = upperleft_x,upperleft_y,bottomright_x,bottomright_y
-        self.box_ = box(self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y)
-        self.list_object={} # {id:Point(coord)}
-
-        self.x,self.y = x, y
-
-    def contains(self,lat,lon):
-        """
-        Return true if the cell contains a point at given coordinates
-        
-        Parameters
-        ----------
-        lat : float
-            latitude
-        lon : float
-            longitude
-        
-        Returns
-        -------
-        bool
-            true if contains
-        """ 
-        x,y = lon,lat
-        if x < self.upperleft_x or x > self.bottomright_x:
-            return False
-        if y < self.upperleft_y or y > self.bottomright_y:
-            return False
-        return True
-    
-    def add_object(self,id_,lat,lon):
-        """
-        Connect an object to the cell
-        
-        Parameters
-        ----------
-        id_ : int
-            id
-        lat : float
-            latitude
-        lon : float
-            longitude
-        """
-        self.list_object[id_] = Point(lon,lat)
-            
-    def __repr__(self):
-        return  "upperleft:{0}_{1}_;bottom_right:{2}_{3}".format(self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y)
-        
-class Grid(object):
-    """
-    Define a grid 
-    
-    """
-    def __init__(self,upperleft_x,upperleft_y,bottomright_x,bottomright_y,cell_sub_div_index=[100,50]):
-        """
-        Constructor
-        
-        Parameters
-        ----------
-        upperleft_x : float
-            upperleft longitude
-        upperleft_y : float
-            upperleft latitude
-        bottomright_x : float
-            bottom right longitude
-        bottomright_y : float
-            bottom right latitude
-        cell_sub_div_index : list, optional
-            number of division in both latitude and longitude axis (longitude first), by default [100,50]
-        """
-        self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y = upperleft_x,upperleft_y,bottomright_x,bottomright_y
-        
-        self.x_r = abs(self.bottomright_x - self.upperleft_x)/cell_sub_div_index[0]
-        self.y_r = abs(self.upperleft_y - self.bottomright_y )/cell_sub_div_index[1]
-        
-        self.c_x_r = self.x_r/cell_sub_div_index[0] # Redivide
-        self.c_y_r = self.y_r/cell_sub_div_index[1]
-        
-        self.cells = []
-        self.inter_cells = []
-        for i in range(cell_sub_div_index[1]):
-            self.cells.append([])
-            for j in range(cell_sub_div_index[0]):
-                self.cells[-1].append(Cell(
-                    self.upperleft_x+j*self.x_r,
-                    self.upperleft_y+i*self.y_r,
-                    self.upperleft_x+((j+1)*self.x_r),
-                    self.upperleft_y+((i+1)*self.y_r),
-                    j,i)
-                )
-        dec_y = 0 
-        for i in range(cell_sub_div_index[1]):
-            self.inter_cells.append([])
-            dec_x = 0 
-            for j in range(cell_sub_div_index[0]):                 
-                self.inter_cells[-1].append(Cell(
-                    self.upperleft_x+(j*self.x_r)-self.c_x_r, # TOP
-                    self.upperleft_y+(i*self.y_r)-dec_y,
-                    self.upperleft_x+((j+1)*self.x_r)-self.c_x_r,#(self.u_pos*self.c_x_r),
-                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
-                    j,i)
-                )
-                self.inter_cells[-1].append(Cell(
-                    self.upperleft_x+(j*self.x_r)-self.c_x_r, # CENTER
-                    self.upperleft_y+(i*self.y_r)-self.c_y_r,
-                    self.upperleft_x+((j+1)*self.x_r)+self.c_x_r,
-                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
-                    j,i)
-                )
-                self.inter_cells[-1].append(Cell(
-                    self.upperleft_x+(j*self.x_r)+dec_x, # CENTER
-                    self.upperleft_y+(i*self.y_r)-self.c_y_r,
-                    self.upperleft_x+((j+1)*self.x_r)-self.c_x_r, #LEFT
-                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
-                    j,i)
-                )
-                dec_x = self.c_x_r
-            dec_y = self.c_y_r
-    
-    def fit_data(self,data = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))):
-        """
-        
-        To avoid unnecessary check when connecting an entity to one or multiple cells, we 
-        filter cells that does not appears in our geographic context (here countries surface).
-        
-        Parameters
-        ----------
-        data : GeoDataFrame
-            geographic context
-        """
-        world = data 
-        world["nn"] = 1
-        dissolved = world.dissolve(by="nn").iloc[0].geometry
-        new_cells= []
-        new_inter_cells=[]
-        for i in tqdm(range(len(self.cells))):
-            for j in range(len(self.cells[i])):
-                if dissolved.intersects(self.cells[i][j].box_):
-                    new_cells.append(self.cells[i][j])
-                    new_inter_cells.extend(self.inter_cells[i][j*3:(j+1)*3])
-                    
-        self.cells=new_cells
-        self.inter_cells = new_inter_cells
-        
-                    
-    def __add__(self,a): 
-        """
-        Add an object to the grid
-        
-        Parameters
-        ----------
-        a : tuple
-            (id, latitude, longitude)
-        """
-        for c1 in range(len(self.cells)):
-            if self.cells[c1].contains(a[1],a[2]):
-                self.cells[c1].add_object(*a)
-                
-        for c1 in range(len(self.inter_cells)):
-            if self.inter_cells[c1].contains(a[1],a[2]):
-                self.inter_cells[c1].add_object(*a)
-                
-    def get_adjacent_relationships(self,random_iteration=10):
-        """
-        Return a list of adjacent relationships founds in each cell.
-        
-        Parameters
-        ----------
-        random_iteration : int, optional
-            number of iteration for random selection of adjacency relationships, by default 10
-        
-        Returns
-        -------
-        list
-            adjacency relationships
-        """
-        relationships = set([])
-        for c1 in tqdm(range(len(self.cells))):
-            for i in range(random_iteration):
-                for t in generate_couple(list(self.cells[c1].list_object.keys())):
-                    relationships.add(_hash_couple(t[0],t[1]))
-
-        for c1 in tqdm(range(len(self.inter_cells))):
-            for i in range(random_iteration):
-                for t in generate_couple(list(self.inter_cells[c1].list_object.keys())):
-                    relationships.add(_hash_couple(t[0],t[1]))
-        return relationships
-    
-
-### GEO ADJAC END
-
-class ConfigurationReader(object):
-    def __init__(self,configuration_file):
-        if not os.path.exists(configuration_file):
-            raise FileNotFoundError("'{0} file could not be found ! '".format(configuration_file))
-
-        self.configuration = json.load(open(configuration_file))
-
-        self.__argparser_desc = ("" if not "description" in self.configuration else self.configuration["description"])
-        self.parser = argparse.ArgumentParser(description=self.__argparser_desc)
-
-        self.parse_conf()
-    
-    def parse_conf(self):
-        if not "args" in self.configuration:
-            raise argparse.ArgumentError("","No args given in the configuration file")
-        
-        for dict_args in self.configuration["args"]:
-            if not isinstance(dict_args,dict):
-                raise ValueError("Args must be dictionnary")
-
-            short_command = dict_args.get("short",None)
-            long_command = dict_args.get("long",None)
-            
-            if not short_command and not long_command:
-                raise ValueError("No command name was given !") 
-            
-            add_func_dict_= {}
-            if "help" in dict_args:
-                add_func_dict_["help"]= dict_args["help"]
-            if "default" in dict_args:
-                add_func_dict_["default"]= dict_args["default"]
-            if "action" in dict_args:
-                add_func_dict_["action"]= dict_args["action"]
-            if "type" in dict_args:
-                add_func_dict_["type"]= eval(dict_args["type"])
-            if "choices" in dict_args:
-                add_func_dict_["choices"]= dict_args["choices"]
-
-            if not (short_command and long_command):
-                command = (short_command if not long_command else long_command)
-                self.parser.add_argument(command,**add_func_dict_)
-
-            elif long_command and short_command:
-                self.parser.add_argument(short_command,long_command,**add_func_dict_)
-    
-    def parse_args(self,input_=None):
-        if not input_:
-            return self.parser.parse_args()
-        return self.parser.parse_args(input_)
-
-
-
-if __name__ == "__main__":
-
-    index = NgramIndex(3)
-    index.split_and_add("J'aime le paté")
-    encoding = index.encode("xxxyyyy")
-    index.complete(encoding,10)
\ No newline at end of file