diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ddc1507dda5eebc7fbb67e9a3546f78022969b26
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,151 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+
+#### CUSTOM
+
+data/*
+deprecated/*
+*.ipynb_checkpoints
+notebooks/*
+outputs/*
+temp/*
+WikipediaExtract/*
+
+*.DS_Store
+test_comb.sh
+
+.vscode/*
+notes.md
+
+.idea/*
+.vscode/*
+other/*
+test*
\ No newline at end of file
diff --git a/README.md b/README.md
index 530b3d80c20df7912cbe3b5661ae793d9e74e714..3eee5842dba0dccf55dd541f5655c33ea92186ee 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,169 @@
-# Toponym Geocoding
+# Work on Place-embedding 
 
-Use of Ngram representation and colocation of toponyms in geography and text for geocoding.
\ No newline at end of file
+This repo contains various approach around geographic place embedding, and more precisely on its use for geocoding. At this moment, we designed three approaches :
+
+ * Use of geographic places Wikipedia pages to learn an embedding for toponyms
+ * Use Geonames place topology to produce an embedding using graph-embedding techniques
+ * Use toponym colocation (combination ?) based on spatial relatationships (inclusion, adjacency) for geocoding
+
+<hr>
+
+## Setup environnement
+
+- Python3.6+
+- Os free (all dependencies work on Windows !)
+
+It is strongly advised to used Anaconda in a windows environnement! 
+
+### Install dependencies
+
+    pip3 install -r requirements.txt
+
+For Anaconda users
+
+    while read requirement; do conda install --yes $requirement; done < requirements.txt
+
+<hr>
+
+## First approach : Embedding using places Wikipedia pages
+
+<div style="text-align:center">
+<img src="documentation/imgs/first_approach.png"/>
+<p>Figure 1 : First approach general workflow</p>
+</div>
+
+In this first approach, the goal is to produce embedding for place name. In order to do this, we designed a neural network that takes :
+
+* **Input:** Text sequence (phrase)
+* **Output** Latitute, Longitude, and the place type
+
+Input texts are selected using Wikidata to filter Wikipedia pages about geographic places. Then, the filtered pages are retrieved on the Wikipedia corpus file. For each pages, we got :
+
+* Title
+* Introduction text
+* Coordinates of the place (laatitude-Longitude)
+* Place type (using a mapping between Wikidata and DBpedia Place subclasses)
+
+### Step 1: Parse Wikipedia data !
+
+First, download the Wikipedia corpus in the wanted language, *e.g. enwiki-latest-pages-articles.xml.bz2*
+
+Then, use the `gensim` parser (doc [here](https://radimrehurek.com/gensim/scripts/segment_wiki.html)). Use the following command :
+
+    python3 -m gensim.scripts.segment_wiki -i -f <wikipedia_dump_file> -o <output>
+
+### Step 2: Select and Filter entity from Wikidata
+
+We use Wikidata to identify which Wikipedia pages concern a place. Simply, run the following command : 
+
+    python3 1_extractDataFromWikidata.py <Wikidata Dump (.gz)> <output_filename>
+
+### Step 3: Extract data from Wikipedia pages
+
+Using previous output, we extract text data from selected Wikipedia pages with the following command:
+
+    python3 2_extractLearningDataset.py <wikipedia_filename (output from step 1)> <wikidata_extract(output from step2)> <output_filename>
+
+### Step 4 : Run Embedding extraction
+
+To learn extract the place embedding, use the `embeddings_lat_lon_type.py`
+
+#### Available Parameters
+
+| Parameter              | Value (default)     |
+|------------------------|---------------------|
+| --max_sequence_length          | Maximum sequence length (15) |
+| --embedding_dimension           | Embedding vector size (100)             |
+| --batch_size | batch size used in the training (100)             |
+| --epochs         | Number of epochs (100) |
+| -v                     | Display the keras verbose          |
+
+#### Output
+
+The different outputs (on for each neural network architecture) are put in the `outputs` directory : 
+
+* outputs/Bi-GRU_100dim_20epoch_1000batch__coord.png : **coordinates accuracy plot**
+* outputs/Bi-GRU_100dim_20epoch_1000batch__place_type.png : **place type accuracy plot**
+* outputs/Bi-GRU_100dim_20epoch_1000batch.csv : **training history**
+* outputs/Bi-GRU_100dim_20epoch_1000batch.txt : **embeddings**
+
+<hr>
+
+## 2nd Approach: Geonames place embedding
+
+From this point, we change our vantage point by focusing our model propositions by using heavily spatial/geographical data, in this context gazetteer. In this second approach, we propose to generate an embedding for places (not place's toponym) based on their topology.
+
+In order to do that, we use Geonames data to build a topology graph. This graph is generated based on intersection found between place buffer intersection.
+
+(image ici)
+
+Then, using topology network, we use node-embedding techniques to generate an embedding for each vertex (places).
+
+<div style="text-align:center">
+<img src="documentation/imgs/second_approach.png"/>
+<p><strong>Figure 2</strong> : Second approach general workflow</p>
+</div>
+
+### Generate the embedding
+
+First, download the Geonames dump : [here](https://download.geonames.org/export/dump/)
+
+*N.B.* We advise you to take only the data from one country ! Topology network can be really dense and large !
+
+    python3 geonames_embedding.py <geonames dump(*.txt)>
+
+### Available Parameters
+
+| Parameter              | Description (default)                                             |
+|------------------------|-------------------------------------------------------------------|
+| --nbcpu                | Number of CPU used for during the learning phase                  |
+| --vector-size          | Embedding size                                                    |
+| --walk-length          | Generated walk length                                             |
+| --num-walks            | Number of walks for each vertex (place)                           |
+| --word2vec-window-size | Window-size used in Word2vec                                      |
+| --buffer-size          | Buffer size used to detect adjacency relationships between places |
+| -d                     | Integrate distances between places in the topology graph          |
+| --dist                 | Distance used if '-d'                                             |
+
+### Output files 
+
+Gensim word2vec format is saved in the execution directory.
+
+<hr>
+
+## Embedding : train using concatenation of close places
+
+<div style="text-align:center">
+<img src="documentation/imgs/third_approach.png"/>
+<p><strong>Figure 3</strong> : Third approach general workflow</p>
+</div>
+
+
+### Prepare required data
+
+ * download the Geonames data use to train the network [here](download.geonames.org/export/dump/)
+ * download the hierarchy data [here](http://download.geonames.org/export/dump/hierarchy.zip)
+ * unzip both file in the directory of your choice
+ * run the script `train_test_split_geonames.py <geoname_filename>`
+
+### Train the network
+
+The script `combination_embeddings.py` is the one responsible of the neural network training
+
+To train the network with default parameter use the following command : 
+
+    python3 combination_embeddings.py -a -i <geoname data filename> <hierarchy geonames data filename>
+
+### Available parameters
+
+
+| Parameter            | Description                                                          |
+|----------------------|----------------------------------------------------------------------|
+| -i,--inclusion       | Use inclusion relationships to train the network                     |
+| -a,--adjacency       | Use adjacency relationships to train the network                     |
+| -w,--wikipedia-coo   | Use Wikipedia place co-occurrences to train the network              |
+| -n,--ngram-size      | ngram size                                                           |
+| -t,--tolerance-value | K-value in the computation of the accuracy@k                         |
+| -e,--epochs          | number of epochs                                                     |
+| -d,--dimension       | size of the ngram embeddings                                         |
+| --admin_code_1       |  (Optional) If you wish to train the network on a specificate region |
diff --git a/combination_embeddings.py b/combination_embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..c147f8064042bea90976c809d7ca42fe5012369e
--- /dev/null
+++ b/combination_embeddings.py
@@ -0,0 +1,329 @@
+# Base module 
+import re
+import os
+import json
+
+# Structure
+import pandas as pd
+import numpy as np
+import geopandas as gpd
+
+# DEEPL module
+from keras.layers import Dense, Input, Embedding,concatenate,Bidirectional,LSTM
+from keras.models import Model
+from keras import backend as K
+import tensorflow as tf
+
+# Geometry
+from shapely.geometry import Point
+
+# Custom module
+from helpers import read_geonames
+from utils import Grid
+from utils import  zero_one_encoding, NgramIndex,ConfigurationReader
+from metrics import lat_accuracy,lon_accuracy
+
+# Logging
+from tqdm import tqdm
+import logging
+from helpers import Chronometer
+
+
+def parse_title_wiki(title_wiki):
+    """
+    Parse Wikipedia title
+    
+    Parameters
+    ----------
+    title_wiki : str
+        wikipedia title
+    
+    Returns
+    -------
+    str
+        parsed wikipedia title
+    """
+    return re.sub("\(.*\)","",title_wiki).strip().lower()
+
+def get_new_ids(cooc_data,id_first_value):
+    """
+    Return new ids from cooccurrence data
+    
+    Parameters
+    ----------
+    cooc_data : pd.DataFrame
+        cooccurrence da
+    id_first_value : int
+        id beginning value
+    
+    Returns
+    -------
+    dict
+        new ids for each toponyms
+    """
+    topo_id = {}
+    id_ = id_first_value
+    for title in cooc_data.title.values:
+        if not title in topo_id:
+            id_+=1
+            topo_id[id_]=title
+    for interlinks in cooc_data.interlinks.values:
+        for interlink in interlinks.split("|"):
+            if not interlink in topo_id:
+                id_+=1
+                topo_id[id_]=interlink
+    return topo_id
+
+
+
+# LOGGING CONF
+logging.basicConfig(
+    format='[%(asctime)s][%(levelname)s] %(message)s ', 
+    datefmt='%m/%d/%Y %I:%M:%S %p',
+    level=logging.INFO  
+    )
+chrono = Chronometer()
+
+args = ConfigurationReader("./parser_config/toponym_combination_embedding.json")\
+    .parse_args()#("-n 4 -t 0.002 -e 20  -i data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split())
+
+# Initialisee CONSTANTS
+GEONAME_FN = args.geoname_input
+GEONAMES_HIERARCHY_FN = args.geoname_hierachy_input
+NGRAM_SIZE = args.ngram_size
+ACCURACY_TOLERANCE = args.tolerance_value
+EPOCHS = args.epochs
+ITER_ADJACENCY = args.adjacency_iteration
+COOC_SAMPLING_NUMBER = 3
+WORDVEC_ITER = 50
+
+# check for output dir
+if not os.path.exists("outputs/"):
+    os.makedirs("outputs/")
+
+# LOAD  Geonames DATA
+logging.info("Load Geonames data...")
+geoname_data = read_geonames(GEONAME_FN).fillna("")
+hierarchy_data = pd.read_csv(GEONAMES_HIERARCHY_FN,sep="\t",header=None,names="parentId,childId,type".split(",")).fillna("")
+
+train_indices,test_indices = pd.read_csv(GEONAME_FN+"_train.csv").geonameid.values, pd.read_csv(GEONAME_FN+"_test.csv").geonameid.values
+train_indices,test_indices = set(train_indices),set(test_indices)
+
+logging.info("Geonames data loaded!")
+
+# SELECT ENTRY with class == to A and P (Areas and Populated Places)
+filtered = geoname_data[geoname_data.feature_class.isin("A P".split())].copy() # Only take area and populated places
+
+# IF REGION (ONLY FR for now !)
+admin_id_authorised_auth = "1 2 3 4 5 6 11 24 27 28 32 44 52 53 75 76 84 93 94".split()
+region_fn = "" if args.admin_code_1 == None else "_"+args.admin_code_1
+if args.admin_code_1 != None and args.admin_code_1 in admin_id_authorised_auth:
+    filtered = filtered[filtered.admin1_code == args.admin_code_1].copy()
+
+# REDUCE DATA STORED
+filtered = filtered["geonameid name longitude latitude".split()] # KEEP ONLY ID LABEL AND COORD
+
+# Geometry operation 
+filtered["geometry"] = filtered["longitude latitude".split()].apply(lambda x: Point(x.longitude,x.latitude),axis=1)
+filtered = gpd.GeoDataFrame(filtered)
+filtered["i"]=1
+bounds = filtered.dissolve("i").bounds.values[0] # Required to get adjacency relationships
+
+
+rel_store = []
+
+if args.adjacency:
+    # RETRIEVE ADJACENCY REL
+    logging.info("Retrieve adjacency relationships ! ")
+    fn = "data/geonamesData/{0}_{1}{2}adjacency.json".format(GEONAME_FN.split("/")[-1],ITER_ADJACENCY,region_fn)
+    if not os.path.exists(fn):
+        g = Grid(*bounds,[360,180])
+        g.fit_data(filtered)
+        [g+(int(row.geonameid),row.latitude,row.longitude) for ix,row in tqdm(filtered["geonameid longitude latitude".split()].iterrows(),total=len(filtered))]
+        rel_store.extend([[int(i) for i in r.split("|")] for r in g.get_adjacent_relationships(ITER_ADJACENCY)])
+        json.dump(rel_store,open(fn,'w'))
+    else:
+        logging.info("Open and load data from previous computation!")
+        rel_store=[[int(couple[0]),int(couple[1])] for couple in json.load(open(fn))]
+    logging.info("{0} adjacency relationships retrieved ! ".format(len(rel_store)))
+
+if args.inclusion:
+    # RETRIEVE INCLUSION RELATIONSHIPS
+    logging.info("Retrieve inclusion relationships ! ")
+    geonamesIDS = set(filtered.geonameid.values)
+    filter_mask = (hierarchy_data.childId.isin(geonamesIDS) & hierarchy_data.parentId.isin(geonamesIDS))
+    rel_store.extend((hierarchy_data[filter_mask]["childId parentId".split()].values.tolist()))
+    logging.info("{0} inclusion relationships retrieved ! ".format(len(hierarchy_data[filter_mask])))
+
+del filtered["geometry"]
+
+if args.wikipedia_cooc:
+    logging.info("Load Wikipedia Cooccurrence data and merge with geonames")
+    COOC_FN = "./data/wikipedia/cooccurrence_"+GEONAME_FN.split("/")[-1]
+    cooc_data = pd.read_csv(COOC_FN,sep="\t")
+    cooc_data["title"] = cooc_data.title.apply(parse_title_wiki)
+    cooc_data["interlinks"] = cooc_data.interlinks.apply(parse_title_wiki)
+    id_wikipediatitle = get_new_ids(cooc_data,geoname_data.geonameid.max())
+    wikipediatitle_id = {v:k for k,v in id_wikipediatitle.items()}
+    title_coord = {row.title: (row.longitude,row.latitude) for _,row in cooc_data.iterrows()}
+    cooc_data["geonameid"] = cooc_data.title.apply(lambda x: wikipediatitle_id[x])
+    filtered = pd.concat((filtered,cooc_data["geonameid title longitude latitude".split()].rename(columns={"title":"name"}).copy()))
+
+    train_cooc_indices,test_cooc_indices = pd.read_csv(COOC_FN+"_train.csv"), pd.read_csv(COOC_FN+"_test.csv")
+    train_indices = train_indices.union(set(train_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]).values))
+    test_indices = test_indices.union(set(test_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]).values))
+
+    logging.info("Merged with Geonames data !")
+
+    # EXTRACT rel
+    logging.info("Extracting cooccurrence relationships")
+    cpt=0
+    for ix, row in tqdm(cooc_data.iterrows(),total=len(cooc_data),desc="Extracting Wikipedia Cooccurrence"):
+        for inter in np.random.choice(row.interlinks.split("|"),COOC_SAMPLING_NUMBER):
+            cpt+=1
+            rel_store.extend([[row.geonameid,wikipediatitle_id[inter]]])
+    logging.info("Extract {0} cooccurrence relationships !".format(cpt))
+
+
+# STORE ID to name
+geoname2name = dict(filtered["geonameid name".split()].values)
+
+# ENCODING NAME USING N-GRAM SPLITTING
+logging.info("Encoding toponyms to ngram...")
+index = NgramIndex(NGRAM_SIZE)
+filtered.name.apply(lambda x : index.split_and_add(x)) # Identify all ngram available
+if args.wikipedia_cooc:
+    [index.split_and_add(k) for k in wikipediatitle_id]
+filtered["encode_name"] = filtered.name.apply(lambda x : index.encode(x)) # First encoding
+max_len = filtered.encode_name.apply(len).max() # Retrieve the encodings max length
+if args.wikipedia_cooc:
+    extension = {v:index.encode(k) for k,v in wikipediatitle_id.items()}
+
+index.max_len = int(max_len) # For Index state dump
+
+filtered["encode_name"] = filtered.encode_name.apply(lambda x: index.complete(x,max_len)) # Expend encodings with size < max_len
+if args.wikipedia_cooc:
+    extension = {k:index.complete(v,max_len) for k,v in extension.items()}
+geoname2encodedname = dict(filtered["geonameid encode_name".split()].values) #init a dict with the 'geonameid' --> 'encoded toponym' association
+
+if args.wikipedia_cooc:
+    geoname2encodedname.update(extension)
+
+
+logging.info("Done !")
+
+#CLEAR RAM
+del hierarchy_data
+del geoname_data
+
+# Encode each geonames entry coordinates
+filtered["cell_vec"]=filtered.apply(
+    lambda x : zero_one_encoding(x.longitude,x.latitude),
+    axis=1
+    )
+geoname_vec = dict(filtered["geonameid cell_vec".split()].values)
+# CLEAR RAM
+del filtered
+
+
+embedding_dim = 256
+num_words = len(index.index_ngram) # necessary for the embedding matrix 
+
+logging.info("Preparing Input and Output data...")
+
+X_1_train,X_2_train,y_lat_train,y_lon_train=[],[],[],[]
+X_1_test,X_2_test,y_lat_test,y_lon_test=[],[],[],[]
+
+cpt=0
+for couple in rel_store:
+    geonameId_1,geonameId_2 = couple[0],couple[1]
+    if not geonameId_1 in geoname2encodedname:
+        cpt+=1
+        continue
+    top1,top2 = geoname2encodedname[geonameId_1],geoname2encodedname[geonameId_2]
+    if geonameId_1 in train_indices: #and geonameId_2 in train_indices:
+        
+        X_1_train.append(top1)
+        X_2_train.append(top2)
+
+        y_lon_train.append(geoname_vec[geonameId_1][0])
+        y_lat_train.append(geoname_vec[geonameId_1][1])
+    
+    else:
+        X_1_test.append(top1)
+        X_2_test.append(top2)
+
+        y_lon_test.append(geoname_vec[geonameId_1][0])
+        y_lat_test.append(geoname_vec[geonameId_1][1])
+
+# NUMPYZE inputs and output lists
+X_1_train = np.array(X_1_train)
+X_2_train = np.array(X_2_train)
+y_lat_train = np.array(y_lat_train)
+y_lon_train = np.array(y_lon_train)
+
+X_1_test = np.array(X_1_test)
+X_2_test = np.array(X_2_test)
+y_lat_test = np.array(y_lat_test)
+y_lon_test = np.array(y_lon_test)
+
+logging.info("Data prepared !")
+
+
+# OUTPUT FN BASE
+name = "{0}_{1}_{2}_{3}{4}".format(GEONAME_FN.split("/")[-1],EPOCHS,NGRAM_SIZE,ACCURACY_TOLERANCE,region_fn)
+if args.adjacency:
+    name += "_A"
+if args.inclusion:
+    name += "_I"
+if args.wikipedia_cooc:
+    name += "_C"
+
+index.save("outputs/"+name+"_index")
+
+
+# NGRAM EMBDEDDING
+logging.info("Generating N-GRAM Embedding...")
+embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim,iter=WORDVEC_ITER)
+logging.info("Embedding generated !")
+
+# DEEP MODEL
+name = "LSTM_"+ name
+input_1 = Input(shape=(max_len,))
+input_2 = Input(shape=(max_len,))
+
+embedding_layer = Embedding(num_words, embedding_dim,input_length=max_len,weights=[embedding_weights],trainable=False)#, trainable=True)
+
+x1 = Bidirectional(LSTM(98))(embedding_layer(input_1))
+x2 = Bidirectional(LSTM(98))(embedding_layer(input_2))
+
+x = concatenate([x1,x2])#,x3])
+
+x1 = Dense(500,activation="relu")(x)
+#x1 = Dropout(0.3)(x1)
+x1 = Dense(500,activation="relu")(x1)
+#x1 = Dropout(0.3)(x1)
+
+x2 = Dense(500,activation="relu")(x)
+#x2 = Dropout(0.3)(x2)
+x2 = Dense(500,activation="relu")(x2)
+#x2 = Dropout(0.3)(x2)
+
+output_lon = Dense(1,activation="sigmoid",name="Output_LON")(x1)
+output_lat = Dense(1,activation="sigmoid",name="Output_LAT")(x2)
+
+model = Model(inputs = [input_1,input_2], outputs = [output_lon,output_lat])#input_3
+
+model.compile(loss=['mean_squared_error','mean_squared_error'], optimizer='adam',metrics={"Output_LON":lon_accuracy(),"Output_LAT":lat_accuracy()})
+history = model.fit(x=[X_1_train,X_2_train],
+    y=[y_lon_train,y_lat_train],
+    verbose=True, batch_size=100,
+    epochs=EPOCHS,
+    validation_data=([X_1_test,X_2_test],[y_lon_test,y_lat_test]))
+
+
+hist_df = pd.DataFrame(history.history)
+hist_df.to_csv("outputs/{0}.csv".format(name))
+
+model.save("outputs/"+name+".h5")
+
diff --git a/metrics.py b/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..e82c54809aa2a6bece60cd74875140d3719c1ea6
--- /dev/null
+++ b/metrics.py
@@ -0,0 +1,37 @@
+import tensorflow as tf
+
+def lat_accuracy(LAT_TOL =1/180.):
+    def accuracy_at_k_lat(y_true, y_pred):
+        """
+        Metrics use to measure the accuracy of the coordinate prediction. But in comparison to the normal accuracy metrics, we add a tolerance threshold due to the (quasi) impossible 
+        task for neural network to obtain the exact  coordinate.
+
+        Parameters
+        ----------
+        y_true : tf.Tensor
+            truth data
+        y_pred : tf.Tensor
+            predicted output
+        """
+        diff = tf.abs(y_true - y_pred)
+        fit = tf.dtypes.cast(tf.less(diff,LAT_TOL),tf.int64)
+        return tf.reduce_sum(fit)/tf.size(y_pred,out_type=tf.dtypes.int64)
+    return accuracy_at_k_lat
+
+def lon_accuracy(LON_TOL=1/360.):
+    def accuracy_at_k_lon(y_true, y_pred):
+        """
+        Metrics use to measure the accuracy of the coordinate prediction. But in comparison to the normal accuracy metrics, we add a tolerance threshold due to the (quasi) impossible 
+        task for neural network to obtain the exact  coordinate.
+
+        Parameters
+        ----------
+        y_true : tf.Tensor
+            truth data
+        y_pred : tf.Tensor
+            predicted output
+        """
+        diff = tf.abs(y_true - y_pred)
+        fit = tf.dtypes.cast(tf.less(diff,LON_TOL),tf.int64)
+        return tf.reduce_sum(fit)/tf.size(y_pred,out_type=tf.dtypes.int64)
+    return accuracy_at_k_lon
\ No newline at end of file
diff --git a/parser_config/embeddings_lat_lon.json b/parser_config/embeddings_lat_lon.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a0c774c47b9a6294bf3f54936c79773fc7027a9
--- /dev/null
+++ b/parser_config/embeddings_lat_lon.json
@@ -0,0 +1,12 @@
+{
+    "description": "Toponym Combination",
+    "args": [
+        { "short": "input", "help": "Corpus used to learn the embeddings" },
+        { "short": "-g", "long": "--glove__dir", "default": "data/glove" },
+        {"long": "--max_sequence_length", "type":"int","default":15},
+        {"long": "--max_num_words", "type":"int","default":400000},
+        {"long": "--embedding_dimension", "type":"int","default":100},
+        {"long": "--batch_size", "type":"int","default":100},
+        { "short": "-e", "long": "--epochs", "type": "int", "default": 100 }
+    ]
+}
\ No newline at end of file
diff --git a/parser_config/toponym_combination_embedding.json b/parser_config/toponym_combination_embedding.json
new file mode 100644
index 0000000000000000000000000000000000000000..a2fd9f120b3e791f17948eba7d02b8e2a34116e3
--- /dev/null
+++ b/parser_config/toponym_combination_embedding.json
@@ -0,0 +1,17 @@
+{
+    "description": "Toponym Combination",
+    "args": [
+        { "short": "geoname_input", "help": "Filepath of the Geonames file you want to use." },
+        { "short": "geoname_hierachy_input", "help": "Filepath of the Geonames file you want to use." },
+        { "short": "-v", "long": "--verbose", "action": "store_true" },
+        { "short": "-i", "long": "--inclusion", "action": "store_true" },
+        { "short": "-a", "long": "--adjacency", "action": "store_true" },
+        { "short": "-w", "long": "--wikipedia-cooc", "action": "store_true" },
+        {"long": "--adjacency-iteration", "type":"int","default":1},
+        { "short": "-n", "long": "--ngram-size", "type": "int", "default": 2 },
+        { "short": "-t", "long": "--tolerance-value", "type": "float", "default": 0.002 },
+        { "short": "-e", "long": "--epochs", "type": "int", "default": 100 },
+        { "short": "-d", "long": "--dimension", "type": "int", "default": 256 },
+        {  "long": "--admin_code_1", "default": "None" }
+    ]
+}
\ No newline at end of file
diff --git a/predict_toponym_coordinates.py b/predict_toponym_coordinates.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dcdb7f81a8fbc28826131b5d1680f3647bf6e68
--- /dev/null
+++ b/predict_toponym_coordinates.py
@@ -0,0 +1,80 @@
+from keras.models import load_model
+import tensorflow as tf
+import keras.backend as K
+from utils import NgramIndex
+
+from tensorflow.python.keras.backend import set_session
+from tensorflow.python.keras.models import load_model
+
+sess = None
+graph = None
+
+from metrics import lat_accuracy,lon_accuracy
+
+class Geocoder(object):
+    """
+    >>>geocoder = Geocoder("LSTM_FR.txt_20_4_0.002_None_A_I_C.h5","index_4gram_FR_backup.txt")
+    >>>lon,lat = geocoder.get_coord("Paris","New-York")
+    >>>lon,lat = geocoder.wgs_coord(lon,lat)
+    >>>geocoder.plot_coord("Paris,New-York",lat,lon)
+
+    if you want an interactive map using leafletJS, set to True the `interactive_map` parameter of `Geocoder.plot_coord()`
+    """
+    def __init__(self,keras_model_fn,ngram_index_file):
+        global sess
+        global graph
+        sess = tf.compat.v1.Session()
+        graph = tf.compat.v1.get_default_graph()
+        set_session(sess)
+        self.keras_model = load_model(keras_model_fn,custom_objects={"lat_accuracy":lat_accuracy,"lon_accuracy":lon_accuracy})
+        self.ngram_encoder = NgramIndex.load(ngram_index_file)
+
+    def get_coord(self,toponym,context_toponym):
+        global sess
+        global graph
+        p = self.ngram_encoder.complete(self.ngram_encoder.encode(toponym),self.ngram_encoder.max_len)
+        c = self.ngram_encoder.complete(self.ngram_encoder.encode(context_toponym),self.ngram_encoder.max_len)
+        with sess.as_default():
+            with graph.as_default():
+                lon,lat = self.keras_model.predict([[p],[c]])
+        return lon[0][0],lat[0][0]
+
+    def wgs_coord(self,lon,lat):
+        return ((lon*360)-180),((lat*180)-90)
+    
+    def plot_coord(self,toponym,lat,lon,interactive_map=False,**kwargs):
+        if interactive_map:
+            import folium
+            import tempfile
+            import webbrowser
+            fp = tempfile.NamedTemporaryFile(delete=False)
+            m = folium.Map()
+            folium.Marker([lat, lon], popup=toponym).add_to(m)
+            m.save(fp.name)
+            webbrowser.open('file://' + fp.name)
+        else:
+            import matplotlib.pyplot as plt
+            import geopandas
+            fig, ax = plt.subplots(1,**kwargs)
+            world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
+            world.plot(color='white', edgecolor='black',ax=ax)
+            ax.plot(lon,lat,marker='o', color='red', markersize=5)
+            plt.show()
+
+if __name__ == "__main__":
+    from flask import Flask, escape, request, render_template
+
+    app = Flask(__name__)
+
+
+    geocoder = Geocoder("outputs/LSTM_FR.txt_20_4_0.002_None_A_I_C.h5","outputs/index_4gram_FR_backup.txt")
+
+    @app.route('/',methods=["GET"])
+    def display():
+        toponym = request.args.get("top", "Paris")
+        c_toponym = request.args.get("c_top", "Cherbourg")
+        lon,lat = geocoder.get_coord(toponym,c_toponym)
+        lon,lat = geocoder.wgs_coord(lon,lat)
+        return  render_template("skeleton.html",lat=lat,lon=lon)
+
+    app.run(host='0.0.0.0')
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4eb9e43ed0fb7aefddd5bdfe87ddf049a489394c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,21 @@
+#pyroutelib3
+node2vec
+#osrm
+geopandas
+pandas
+numpy
+tqdm
+networkx
+matplotlib
+joblib
+gensim
+sklearn
+tensorflow
+keras
+ngram
+shapely
+sqlitedict
+nltk
+folium
+flask
+numba
diff --git a/templates/cover.css b/templates/cover.css
new file mode 100644
index 0000000000000000000000000000000000000000..7c6d33cdd58d82b8936fd0209c691184883d5e67
--- /dev/null
+++ b/templates/cover.css
@@ -0,0 +1,106 @@
+/*
+ * Globals
+ */
+
+/* Links */
+a,
+a:focus,
+a:hover {
+  color: #fff;
+}
+
+/* Custom default button */
+.btn-secondary,
+.btn-secondary:hover,
+.btn-secondary:focus {
+  color: #333;
+  text-shadow: none; /* Prevent inheritance from `body` */
+  background-color: #fff;
+  border: .05rem solid #fff;
+}
+
+
+/*
+ * Base structure
+ */
+
+html,
+body {
+  height: 100%;
+  background-color: #333;
+}
+
+body {
+  display: -ms-flexbox;
+  display: flex;
+  color: #fff;
+  text-shadow: 0 .05rem .1rem rgba(0, 0, 0, .5);
+  box-shadow: inset 0 0 5rem rgba(0, 0, 0, .5);
+}
+
+.cover-container {
+  max-width: 42em;
+}
+
+
+/*
+ * Header
+ */
+.masthead {
+  margin-bottom: 2rem;
+}
+
+.masthead-brand {
+  margin-bottom: 0;
+}
+
+.nav-masthead .nav-link {
+  padding: .25rem 0;
+  font-weight: 700;
+  color: rgba(255, 255, 255, .5);
+  background-color: transparent;
+  border-bottom: .25rem solid transparent;
+}
+
+.nav-masthead .nav-link:hover,
+.nav-masthead .nav-link:focus {
+  border-bottom-color: rgba(255, 255, 255, .25);
+}
+
+.nav-masthead .nav-link + .nav-link {
+  margin-left: 1rem;
+}
+
+.nav-masthead .active {
+  color: #fff;
+  border-bottom-color: #fff;
+}
+
+@media (min-width: 48em) {
+  .masthead-brand {
+    float: left;
+  }
+  .nav-masthead {
+    float: right;
+  }
+}
+
+
+/*
+ * Cover
+ */
+.cover {
+  padding: 0 1.5rem;
+}
+.cover .btn-lg {
+  padding: .75rem 1.25rem;
+  font-weight: 700;
+}
+
+
+/*
+ * Footer
+ */
+.mastfoot {
+  color: rgba(255, 255, 255, .5);
+}
diff --git a/templates/skeleton.html b/templates/skeleton.html
new file mode 100644
index 0000000000000000000000000000000000000000..43fe21d207d3ebd53e955efdfc0ab9dedfa36081
--- /dev/null
+++ b/templates/skeleton.html
@@ -0,0 +1,88 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=auto, initial-scale=1.0">
+    <meta http-equiv="X-UA-Compatible" content="ie=edge">
+    <title>Geocoder Interface</title>
+    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
+        integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
+
+    <!-- Load Leaflet -->
+    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.3.4/dist/leaflet.css"
+        integrity="sha512-puBpdR0798OZvTTbP4A8Ix/l+A4dHDD0DGqYW6RQ+9jxkRFclaxxQb/SJAWZfWAkuyeQUytO7+7N4QKrDh+drA=="
+        crossorigin="" />
+    <script src="https://unpkg.com/leaflet@1.3.4/dist/leaflet.js"
+        integrity="sha512-nMMmRyTVoLYqjP9hrbed9S+FzjZHW5gY1TWCHA5ckwXZBadntCNs8kEqAWdrb9O7rxbCaA4lKTIWjDXZxflOcA=="
+        crossorigin=""></script>
+</head>
+
+<body>
+    <style>
+        body {
+            
+        }
+
+        #mapid {
+            height: 400px;
+            width: 100%;
+        }
+    </style>
+
+    <main class="container-fluid">
+        <h1 style="text-align: center;color:white;text-shadow: 1px 1px 2px black;background-color: #999;">Geocoder Demo</h1>
+        <div id="mapid"></div>
+        <div class="container" style="background-color: white;padding: 5px;">
+            <h2>Input</h2>
+            <form action="/" method="get">
+                <div class="form-group">
+                    <label for="formGroupExampleInput">Toponym</label>
+                    <input type="text" class="form-control" name="top"
+                        placeholder="Paris">
+                </div>
+                <div class="form-group">
+                    <label for="formGroupExampleInput2">Context Toponym</label>
+                    <input type="text" class="form-control" name="c_top"
+                        placeholder="Cherbourg">
+                </div>
+                <button type="submit" class="btn btn-primary">Get Coords !</button>
+            </form>
+        </div>
+    </main>
+
+    <!-- JS SCRIPTS -->
+    <script src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
+        integrity="sha384-J6qa4849blE2+poT4WnyKhv5vZF5SrPo0iEjwBvKU7imGFAV0wwj1yYfoRSJoZ+n"
+        crossorigin="anonymous"></script>
+    <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js"
+        integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo"
+        crossorigin="anonymous"></script>
+    <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"
+        integrity="sha384-wfSDF2E50Y2D1uUdj0O3uMBJnjuUD4Ih7YwaYd1iqfktj0Uod8GCExl3Og8ifwB6"
+        crossorigin="anonymous"></script>
+
+    <script>
+
+        // Initialize the map
+        // [50, -0.1] are the latitude and longitude
+        // 4 is the zoom
+        // mapid is the id of the div where the map will appear
+        var mymap = L
+            .map('mapid')
+            .setView([50, -0.1], 4);
+
+        // Add a tile to the map = a background. Comes from OpenStreetmap
+        L.tileLayer(
+            'http://tile.stamen.com/toner/{z}/{x}/{y}.png', {
+            attribution: 'Map data &copy; <a href="https://www.openstreetmap.org/">OpenStreetMap</a>',
+            maxZoom: 6,
+        }).addTo(mymap);
+
+        var marker = L.marker([{{lat}}, {{lon}}]).addTo(mymap);
+
+
+    </script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/train_test_split_cooccurrence_data.py b/train_test_split_cooccurrence_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..4748f3edf1813f2dcebe90f5febc68a04490127b
--- /dev/null
+++ b/train_test_split_cooccurrence_data.py
@@ -0,0 +1,85 @@
+import argparse
+
+import pandas as pd
+import geopandas as gpd
+
+import logging
+logging.basicConfig(
+    format='[%(asctime)s][%(levelname)s] %(message)s ', 
+    datefmt='%m/%d/%Y %I:%M:%S %p',
+    level=logging.INFO
+    )
+
+from sklearn.model_selection import train_test_split
+from shapely.geometry import Point
+
+from utils import Grid
+
+from tqdm import tqdm 
+
+parser = argparse.ArgumentParser()
+parser.add_argument("cooccurrence_file")
+
+args = parser.parse_args("data/wikipedia/cooccurrence_FR.txt".split())#("data/geonamesData/FR.txt".split())
+
+# LOAD DATAgeopandas
+COOC_FN = args.cooccurrence_file
+
+
+
+logging.info("Load Cooc DATA data...")
+cooc_data = pd.read_csv(COOC_FN,sep="\t").fillna("")
+cooc_data["geometry"] = cooc_data["longitude latitude".split()].apply(lambda x: Point(x.longitude,x.latitude),axis=1)
+cooc_data = gpd.GeoDataFrame(cooc_data)
+logging.info("Cooc data loaded!")
+
+# World Shape bounds
+world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
+world["nn"] = 1
+dissolved = world.dissolve(by="nn").iloc[0].geometry
+
+#Creating Grid
+logging.info("Initializing Grid (360,180)...")
+g = Grid(*dissolved.bounds,[360,180])
+logging.info("Fit Data to the Grid...")
+g.fit_data(cooc_data)
+logging.info("Placing place into the grid...")
+[g+(row.title,row.latitude,row.longitude) for ix,row in tqdm(cooc_data.iterrows(),total=len(cooc_data))]
+
+#ASSOCIATE CELL NUMBER TO EACH PLACE IN THE GEONAME DATAFRAME
+logging.info("Associate a cell number to each place in the Geoname Dataframe")
+def foo(g,id_):
+    for ix,cell in enumerate(g.cells):
+        if id_ in cell.list_object:
+            return ix
+
+cooc_data["cat"] = cooc_data.title.apply(lambda x:foo(g,x))
+
+# TRAIN AND TEST SPLIT
+logging.info("Split Between Train and Test")
+
+#  Cell can be empty
+i=0
+while 1:
+    if len(cooc_data[cooc_data.cat == i])> 1:
+        X_train,X_test = train_test_split(cooc_data[cooc_data.cat == i])
+        break
+    i+=1
+
+for i in range(i+1,len(g.cells)):
+    try:
+        x_train,x_test = train_test_split(cooc_data[cooc_data.cat == i])
+        X_train,X_test = pd.concat((X_train,x_train)),pd.concat((X_test,x_test))
+    except Exception as e:
+        print(e) #print("Error",len(filtered[filtered.cat == i]))
+
+del X_train["geometry"]
+del X_train["nn"]
+del X_train["cat"]
+del X_test["cat"]
+del X_test["geometry"]
+del X_test["nn"]
+# SAVING THE DATA
+logging.info("Saving Output !")
+X_train.to_csv(COOC_FN+"_train.csv")
+X_test.to_csv(COOC_FN+"_test.csv")
\ No newline at end of file
diff --git a/train_test_split_geonames.py b/train_test_split_geonames.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff87967ed111a34283b9ef6fd0623b9eb953e59b
--- /dev/null
+++ b/train_test_split_geonames.py
@@ -0,0 +1,92 @@
+import argparse
+
+import numpy as np
+import pandas as pd
+import geopandas as gpd
+
+import logging
+logging.basicConfig(
+    format='[%(asctime)s][%(levelname)s] %(message)s ', 
+    datefmt='%m/%d/%Y %I:%M:%S %p',
+    level=logging.INFO
+    )
+
+from sklearn.model_selection import train_test_split
+from shapely.geometry import Point
+
+from utils import Grid
+from helpers import read_geonames
+
+from tqdm import tqdm 
+
+parser = argparse.ArgumentParser()
+parser.add_argument("geoname_file")
+parser.add_argument("--feature_classes",help="List of class",default="A P")
+
+args = parser.parse_args()#("data/geonamesData/FR.txt".split())
+
+# LOAD DATAgeopandas
+GEONAME_FN = args.geoname_file
+FEATURE_CLASSES = args.feature_classes
+
+
+logging.info("Load Geonames data...")
+geoname_data = read_geonames(GEONAME_FN).fillna("")
+geoname_data["geometry"] = geoname_data["longitude latitude".split()].apply(lambda x: Point(x.longitude,x.latitude),axis=1)
+geoname_data = gpd.GeoDataFrame(geoname_data)
+logging.info("Geonames data loaded!")
+
+# SELECT ENTRY with class == to A and P (Areas and Populated Places)
+filtered = geoname_data[geoname_data.feature_class.isin(FEATURE_CLASSES.split())].copy() # Only take area and populated places
+
+# World Shape bounds
+world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
+world["nn"] = 1
+dissolved = world.dissolve(by="nn").iloc[0].geometry
+
+#Creating Grid
+logging.info("Initializing Grid (360,180)...")
+g = Grid(*dissolved.bounds,[360,180])
+logging.info("Fit Data to the Grid...")
+g.fit_data(filtered)
+logging.info("Placing place into the grid...")
+[g+(int(row.geonameid),row.latitude,row.longitude) for ix,row in tqdm(filtered.iterrows(),total=len(filtered))]
+
+#ASSOCIATE CELL NUMBER TO EACH PLACE IN THE GEONAME DATAFRAME
+logging.info("Associate a cell number to each place in the Geoname Dataframe")
+def foo(g,id_):
+    for ix,cell in enumerate(g.cells):
+        if id_ in cell.list_object:
+            return ix
+
+filtered["cat"] = filtered.geonameid.apply(lambda x:foo(g,x))
+
+# TRAIN AND TEST SPLIT
+logging.info("Split Between Train and Test")
+
+#  Cell can be empty
+i=0
+while 1:
+    if len(filtered[filtered.cat == i])> 1:
+        X_train,X_test = train_test_split(filtered[filtered.cat == i])
+        break
+    i+=1
+
+for i in range(i+1,len(g.cells)):
+    try:
+        x_train,x_test = train_test_split(filtered[filtered.cat == i])
+        X_train,X_test = pd.concat((X_train,x_train)),pd.concat((X_test,x_test))
+    except:
+        pass #print("Error",len(filtered[filtered.cat == i]))
+
+
+del X_train["geometry"]
+del X_train["nn"]
+del X_train["cat"]
+del X_test["cat"]
+del X_test["geometry"]
+del X_test["nn"]
+# SAVING THE DATA
+logging.info("Saving Output !")
+X_train.to_csv(GEONAME_FN+"_train.csv")
+X_test.to_csv(GEONAME_FN+"_test.csv")
\ No newline at end of file
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..db250b77474f8e1a135a373b76461dad485f88c1
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,614 @@
+# Basic import 
+import math
+import argparse
+import os
+import json
+
+# Data Structure
+import numpy as np
+import geopandas as gpd
+from shapely.geometry import Point,box
+
+# NLP 
+from nltk.tokenize import word_tokenize
+from ngram import NGram
+
+# Machine learning 
+from gensim.models import Word2Vec
+
+# Visualisation and parallelisation
+from tqdm import tqdm
+
+
+class TokenizerCustom():
+    def __init__(self,vocab):
+        self.word_index = {vocab[i]:i for i in range(len(vocab))}
+        self.index_word = {i:vocab[i] for i in range(len(vocab))}
+        self.N = len(self.index_word)
+    def texts_to_sequences(self,listText):
+        seqs = []
+        for text in listText:
+            seqs.append([self.word_index[word] for word in word_tokenize(text) if word in self.word_index])
+        return seqs
+
+
+class CoordinatesEncoder:
+    """
+    Will be replaced by Grid in grid2.py
+    """
+    def __init__(self, cell_size_lat=0.5, cell_size_lon=0.5):
+        self.min_lon = -180
+        self.max_lon = -(self.min_lon)  #  Symetric
+        self.min_lat = -90
+        self.max_lat = -(self.min_lat)  # Symetric
+
+        self.ecart_lat = self.max_lat - self.min_lat
+        self.ecart_lon = self.max_lon - self.min_lon
+
+        self.cell_size_lat = cell_size_lat
+        self.cell_size_lon = cell_size_lon
+
+        self.unit_size_lat = self.ecart_lat / self.cell_size_lat
+        self.unit_size_lon = self.ecart_lon / self.cell_size_lon
+
+    def encode(self, lat, lon):
+        return (
+            math.floor(((lat + self.max_lat) / self.ecart_lat) * self.unit_size_lat),
+            math.floor(((lon + self.max_lon) / self.ecart_lon) * (self.unit_size_lon))
+        )
+
+    def number_lat_cell(self):
+        return int(self.unit_size_lat)
+
+    def number_lon_cell(self):
+        return int(self.unit_size_lon)
+
+    def oneDimensionOutputSize(self):
+        return self.number_lat_cell() * self.number_lon_cell()
+
+    def vector(self, lat, lon):
+        lat_v, lon_v = np.zeros(self.number_lat_cell()), np.zeros(self.number_lon_cell())
+        new_coords = self.encode(lat, lon)
+        lat_v[int(new_coords[0])] = 1
+        lon_v[int(new_coords[1])] = 1
+        return lat_v, lon_v
+
+    def vector_flatten(self, lat, lon):
+        vec = np.zeros(self.oneDimensionOutputSize())  # 2D Dense softmax isn't possible
+        new_coords = self.encode(lat, lon)
+        pos = self.number_lat_cell() * (new_coords[0]) + new_coords[1]
+        vec[pos] = 1  # lon * lon size
+        return vec
+
+
+class NgramIndex():
+    """
+    Class used for encoding words in ngram representation
+    """
+    def __init__(self,n):
+        """
+        Constructor
+        
+        Parameters
+        ----------
+        n : int
+            ngram size
+        """
+        self.ngram_gen = NGram(N=n)
+
+        self.size = n
+        self.ngram_index = {"":0}
+        self.index_ngram = {0:""}
+        self.cpt = 0
+        self.max_len = 0
+
+    def split_and_add(self,word):
+        """
+        Split word in multiple ngram and add each one of them to the index
+        
+        Parameters
+        ----------
+        word : str
+            a word
+        """
+        ngrams = word.lower().replace(" ","$")
+        ngrams = list(self.ngram_gen.split(ngrams))
+        [self.add(ngram) for ngram in ngrams]
+
+    def add(self,ngram):
+        """
+        Add a ngram to the index
+        
+        Parameters
+        ----------
+        ngram : str
+            ngram
+        """
+        if not ngram in self.ngram_index:
+            self.cpt+=1
+            self.ngram_index[ngram]=self.cpt
+            self.index_ngram[self.cpt]=ngram
+
+    def encode(self,word):
+        """
+        Return a ngram representation of a word
+        
+        Parameters
+        ----------
+        word : str
+            a word
+        
+        Returns
+        -------
+        list of int
+            listfrom shapely.geometry import Point,box
+ of ngram index
+        """
+        ngrams = word.lower().replace(" ","$")
+        ngrams = list(self.ngram_gen.split(ngrams))
+        [self.add(ng) for ng in ngrams if not ng in self.ngram_index]
+        return [self.ngram_index[ng] for ng in ngrams]
+
+    def complete(self,ngram_encoding,MAX_LEN,filling_item=0):
+        """
+        Complete a ngram encoded version of word with void ngram. It's necessary for neural network.
+        
+        Parameters
+        ----------
+        ngram_encoding : list of int
+            first encoding of a word
+        MAX_LEN : int
+            desired length of the encoding
+        filling_item : int, optional
+            ngram index you wish to use, by default 0
+        
+        Returns
+        -------
+        list of int
+            list of ngram index
+        """
+        assert len(ngram_encoding) <= MAX_LEN
+        diff = MAX_LEN - len(ngram_encoding)
+        ngram_encoding.extend([filling_item]*diff)  
+        return ngram_encoding
+    
+    def get_embedding_layer(self,texts,dim=100,**kwargs):
+        """
+        Return an embedding matrix for each ngram using encoded texts. Using gensim.Word2vec model.
+        
+        Parameters
+        ----------
+        texts : list of [list of int]
+            list of encoded word
+        dim : int, optional
+            embedding dimension, by default 100
+        
+        Returns
+        -------
+        np.array
+            embedding matrix
+        """
+        model = Word2Vec([[str(w) for w in t] for t in texts], size=dim,window=5, min_count=1, workers=4,**kwargs)
+        N = len(self.ngram_index)
+        embedding_matrix = np.zeros((N,dim))
+        for i in range(N):
+            embedding_matrix[i] = model.wv[str(i)]
+        return embedding_matrix
+
+    def save(self,fn):
+        """
+
+        Save the NgramIndex
+        
+        Parameters
+        ----------
+        fn : str
+            output filename
+        """
+        data = {
+            "ngram_size": self.size,
+            "ngram_index": self.ngram_index,
+            "cpt_state": self.cpt,
+            "max_len_state": self.max_len
+        }
+        json.dump(data,open(fn,'w'))
+
+    @staticmethod
+    def load(fn):
+        """
+        
+        Load a NgramIndex state from a file.
+        
+        Parameters
+        ----------
+        fn : str
+            input filename
+        
+        Returns
+        -------
+        NgramIndex
+            ngram index
+        
+        Raises
+        ------
+        KeyError
+            raised if a required field does not appear in the input file
+        """
+        try:
+            data = json.load(open(fn))
+        except json.JSONDecodeError:
+            print("Data file must be a JSON")
+        for key in ["ngram_size","ngram_index","cpt_state","max_len_state"]:
+            if not key in data:
+                raise KeyError("{0} field cannot be found in given file".format(key))
+        new_obj = NgramIndex(data["ngram_size"])
+        new_obj.ngram_index = data["ngram_index"]
+        new_obj.index_ngram = {v:k for k,v in new_obj.ngram_index.items()}
+        new_obj.cpt = data["cpt_state"]
+        new_obj.max_len = data["max_len_state"]
+        return new_obj
+
+
+def zero_one_encoding(long,lat):
+    """
+    Encode coordinates (WGS84) between 0 and 1
+    
+    Parameters
+    ----------
+    long : float
+        longitude value
+    lat : float
+        latitude value
+    
+    Returns
+    -------
+    float,float
+        longitude, latitude
+    """
+    return ((long + 180.0 ) / 360.0), ((lat + 90.0 ) / 180.0) 
+
+def _split(lst,n,complete_chunk_value):
+    """
+    Split a list into chunk of n-size.
+    
+    Parameters
+    ----------
+    lst : list
+        input list
+    n : int
+        chunk size
+    complete_chunk_value : object
+        if last chunk size not equal to n, this value is used to complete it
+    
+    Returns
+    -------
+    list
+        chunked list
+    """
+    chunks = [lst[i:i + n] for i in range(0, len(lst), n)]
+    if not chunks:return chunks
+    if len(chunks[-1]) != n:
+        chunks[-1].extend([complete_chunk_value]*(n-len(chunks[-1])))
+    return np.array(chunks)
+
+def generate_couple(object_list):
+    """
+    Return a randomly selected couple from an object list.
+    
+    Parameters
+    ----------
+    object_list : list
+        object list
+    
+    Returns
+    -------
+    list
+        list of coupled object
+    """
+    couples = []
+    lst = np.arange(len(object_list))
+    for _ in range(len(object_list)):
+        if len(lst) == 1:
+            break
+        idx = np.random.choice(np.arange(len(lst)))
+        idx2 = np.random.choice(np.arange(len(lst)))
+        while idx2 == idx:
+            idx2 = np.random.choice(np.arange(len(lst)))
+        couples.append([object_list[lst[idx]],object_list[lst[idx2]]])
+        lst = np.delete(lst,idx)
+    return couples
+
+def _hash_couple(o1,o2):
+    """
+    Return an hash for two object ids.
+    
+    Parameters
+    ----------
+    o1 : str or int
+        id of the first objeeect
+    o2 : str of int
+        id of the second object
+    
+    Returns
+    -------
+    str
+        hash
+    """
+    return "|".join(map(str,sorted([int(o1),int(o2)])))
+
+
+
+### GEO ADJAC BEGIN
+class Cell(object):
+    """
+    A cell is box placed in geeographical space.
+    """
+    def __init__(self,upperleft_x,upperleft_y,bottomright_x,bottomright_y,x,y):
+        """
+        Constructor
+        
+        Parameters
+        ----------
+        object : [type]
+            [description]
+        upperleft_x : float
+            upperleft longitude
+        upperleft_y : float
+            upperleft latitude
+        bottomright_x : float
+            bottom right longitude
+        bottomright_y : float
+            bottom right latitude
+        x : int
+            cell x coordinates in the grid
+        y : int
+            cell y coordinates in the grid
+        """
+        self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y = upperleft_x,upperleft_y,bottomright_x,bottomright_y
+        self.box_ = box(self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y)
+        self.list_object={} # {id:Point(coord)}
+
+        self.x,self.y = x, y
+
+    def contains(self,lat,lon):
+        """
+        Return true if the cell contains a point at given coordinates
+        
+        Parameters
+        ----------
+        lat : float
+            latitude
+        lon : float
+            longitude
+        
+        Returns
+        -------
+        bool
+            true if contains
+        """ 
+        x,y = lon,lat
+        if x < self.upperleft_x or x > self.bottomright_x:
+            return False
+        if y < self.upperleft_y or y > self.bottomright_y:
+            return False
+        return True
+    
+    def add_object(self,id_,lat,lon):
+        """
+        Connect an object to the cell
+        
+        Parameters
+        ----------
+        id_ : int
+            id
+        lat : float
+            latitude
+        lon : float
+            longitude
+        """
+        self.list_object[id_] = Point(lon,lat)
+            
+    def __repr__(self):
+        return  "upperleft:{0}_{1}_;bottom_right:{2}_{3}".format(self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y)
+        
+class Grid(object):
+    """
+    Define a grid 
+    
+    """
+    def __init__(self,upperleft_x,upperleft_y,bottomright_x,bottomright_y,cell_sub_div_index=[100,50]):
+        """
+        Constructor
+        
+        Parameters
+        ----------
+        upperleft_x : float
+            upperleft longitude
+        upperleft_y : float
+            upperleft latitude
+        bottomright_x : float
+            bottom right longitude
+        bottomright_y : float
+            bottom right latitude
+        cell_sub_div_index : list, optional
+            number of division in both latitude and longitude axis (longitude first), by default [100,50]
+        """
+        self.upperleft_x,self.upperleft_y,self.bottomright_x,self.bottomright_y = upperleft_x,upperleft_y,bottomright_x,bottomright_y
+        
+        self.x_r = abs(self.bottomright_x - self.upperleft_x)/cell_sub_div_index[0]
+        self.y_r = abs(self.upperleft_y - self.bottomright_y )/cell_sub_div_index[1]
+        
+        self.c_x_r = self.x_r/cell_sub_div_index[0] # Redivide
+        self.c_y_r = self.y_r/cell_sub_div_index[1]
+        
+        self.cells = []
+        self.inter_cells = []
+        for i in range(cell_sub_div_index[1]):
+            self.cells.append([])
+            for j in range(cell_sub_div_index[0]):
+                self.cells[-1].append(Cell(
+                    self.upperleft_x+j*self.x_r,
+                    self.upperleft_y+i*self.y_r,
+                    self.upperleft_x+((j+1)*self.x_r),
+                    self.upperleft_y+((i+1)*self.y_r),
+                    j,i)
+                )
+        dec_y = 0 
+        for i in range(cell_sub_div_index[1]):
+            self.inter_cells.append([])
+            dec_x = 0 
+            for j in range(cell_sub_div_index[0]):                 
+                self.inter_cells[-1].append(Cell(
+                    self.upperleft_x+(j*self.x_r)-self.c_x_r, # TOP
+                    self.upperleft_y+(i*self.y_r)-dec_y,
+                    self.upperleft_x+((j+1)*self.x_r)-self.c_x_r,#(self.u_pos*self.c_x_r),
+                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
+                    j,i)
+                )
+                self.inter_cells[-1].append(Cell(
+                    self.upperleft_x+(j*self.x_r)-self.c_x_r, # CENTER
+                    self.upperleft_y+(i*self.y_r)-self.c_y_r,
+                    self.upperleft_x+((j+1)*self.x_r)+self.c_x_r,
+                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
+                    j,i)
+                )
+                self.inter_cells[-1].append(Cell(
+                    self.upperleft_x+(j*self.x_r)+dec_x, # CENTER
+                    self.upperleft_y+(i*self.y_r)-self.c_y_r,
+                    self.upperleft_x+((j+1)*self.x_r)-self.c_x_r, #LEFT
+                    self.upperleft_y+((i+1)*self.y_r)+self.c_y_r,
+                    j,i)
+                )
+                dec_x = self.c_x_r
+            dec_y = self.c_y_r
+    
+    def fit_data(self,data = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))):
+        """
+        
+        To avoid unnecessary check when connecting an entity to one or multiple cells, we 
+        filter cells that does not appears in our geographic context (here countries surface).
+        
+        Parameters
+        ----------
+        data : GeoDataFrame
+            geographic context
+        """
+        world = data 
+        world["nn"] = 1
+        dissolved = world.dissolve(by="nn").iloc[0].geometry
+        new_cells= []
+        new_inter_cells=[]
+        for i in tqdm(range(len(self.cells))):
+            for j in range(len(self.cells[i])):
+                if dissolved.intersects(self.cells[i][j].box_):
+                    new_cells.append(self.cells[i][j])
+                    new_inter_cells.extend(self.inter_cells[i][j*3:(j+1)*3])
+                    
+        self.cells=new_cells
+        self.inter_cells = new_inter_cells
+        
+                    
+    def __add__(self,a): 
+        """
+        Add an object to the grid
+        
+        Parameters
+        ----------
+        a : tuple
+            (id, latitude, longitude)
+        """
+        for c1 in range(len(self.cells)):
+            if self.cells[c1].contains(a[1],a[2]):
+                self.cells[c1].add_object(*a)
+                
+        for c1 in range(len(self.inter_cells)):
+            if self.inter_cells[c1].contains(a[1],a[2]):
+                self.inter_cells[c1].add_object(*a)
+                
+    def get_adjacent_relationships(self,random_iteration=10):
+        """
+        Return a list of adjacent relationships founds in each cell.
+        
+        Parameters
+        ----------
+        random_iteration : int, optional
+            number of iteration for random selection of adjacency relationships, by default 10
+        
+        Returns
+        -------
+        list
+            adjacency relationships
+        """
+        relationships = set([])
+        for c1 in tqdm(range(len(self.cells))):
+            for i in range(random_iteration):
+                for t in generate_couple(list(self.cells[c1].list_object.keys())):
+                    relationships.add(_hash_couple(t[0],t[1]))
+
+        for c1 in tqdm(range(len(self.inter_cells))):
+            for i in range(random_iteration):
+                for t in generate_couple(list(self.inter_cells[c1].list_object.keys())):
+                    relationships.add(_hash_couple(t[0],t[1]))
+        return relationships
+    
+
+### GEO ADJAC END
+
+class ConfigurationReader(object):
+    def __init__(self,configuration_file):
+        if not os.path.exists(configuration_file):
+            raise FileNotFoundError("'{0} file could not be found ! '".format(configuration_file))
+
+        self.configuration = json.load(open(configuration_file))
+
+        self.__argparser_desc = ("" if not "description" in self.configuration else self.configuration["description"])
+        self.parser = argparse.ArgumentParser(description=self.__argparser_desc)
+
+        self.parse_conf()
+    
+    def parse_conf(self):
+        if not "args" in self.configuration:
+            raise argparse.ArgumentError("","No args given in the configuration file")
+        
+        for dict_args in self.configuration["args"]:
+            if not isinstance(dict_args,dict):
+                raise ValueError("Args must be dictionnary")
+
+            short_command = dict_args.get("short",None)
+            long_command = dict_args.get("long",None)
+            
+            if not short_command and not long_command:
+                raise ValueError("No command name was given !") 
+            
+            add_func_dict_= {}
+            if "help" in dict_args:
+                add_func_dict_["help"]= dict_args["help"]
+            if "default" in dict_args:
+                add_func_dict_["default"]= dict_args["default"]
+            if "action" in dict_args:
+                add_func_dict_["action"]= dict_args["action"]
+            if "type" in dict_args:
+                add_func_dict_["type"]= eval(dict_args["type"])
+            if "choices" in dict_args:
+                add_func_dict_["choices"]= dict_args["choices"]
+
+            if not (short_command and long_command):
+                command = (short_command if not long_command else long_command)
+                self.parser.add_argument(command,**add_func_dict_)
+
+            elif long_command and short_command:
+                self.parser.add_argument(short_command,long_command,**add_func_dict_)
+    
+    def parse_args(self,input_=None):
+        if not input_:
+            return self.parser.parse_args()
+        return self.parser.parse_args(input_)
+
+
+
+if __name__ == "__main__":
+
+    index = NgramIndex(3)
+    index.split_and_add("J'aime le paté")
+    encoding = index.encode("xxxyyyy")
+    index.complete(encoding,10)
\ No newline at end of file