diff --git a/combination_embeddings.py b/combination_embeddings.py
index 24712bae251b5673bdcd07ada9dfb9a3b07b55a0..599ad1a8993547b7b17b88e4d4348aafbb728c2d 100644
--- a/combination_embeddings.py
+++ b/combination_embeddings.py
@@ -70,7 +70,7 @@ logging.basicConfig(
     )
 
 args = ConfigurationReader("./parser_config/toponym_combination_embedding.json")\
-    .parse_args()#("-i -e 5 ../data/geonamesData/FR.txt ../data/geonamesData/hierarchy.txt".split())
+    .parse_args()#("-w --wikipedia-cooc-fn subsetCoocALL.csv ../data/geonamesData/allCountries.txt ../data/geonamesData/hierarchy.txt".split())
 
 #
 #################################################
@@ -205,11 +205,10 @@ if args.wikipedia_cooc:
     cooc_data["interlinks"] = cooc_data.interlinks.apply(parse_title_wiki)
     id_wikipediatitle = get_new_ids(cooc_data,filtered.geonameid.max())
     wikipediatitle_id = {v:k for k,v in id_wikipediatitle.items()}
-    title_coord = {row.title: (row.longitude,row.latitude) for _,row in cooc_data.iterrows()}
+    title_coord = {row.title: (row.longitude,row.latitude) for _,row in tqdm(cooc_data.iterrows(),total=len(cooc_data))}
     cooc_data["geonameid"] = cooc_data.title.apply(lambda x: wikipediatitle_id[x])
     filtered = pd.concat((filtered,cooc_data["geonameid title longitude latitude".split()].rename(columns={"title":"name"}).copy()))
-
-    train_cooc_indices,test_cooc_indices = pd.read_csv(COOC_FN+"_train.csv"), pd.read_csv(COOC_FN+"_test.csv")
+    train_cooc_indices,test_cooc_indices = pd.read_csv(COOC_FN+"_train.csv",sep="\t"), pd.read_csv(COOC_FN+"_test.csv",sep="\t")
     train_indices = train_indices.union(set(train_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]).values))
     test_indices = test_indices.union(set(test_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]).values))
 
diff --git a/combination_embeddingsv2.py b/combination_embeddingsv2.py
index 6bec212c0d54eabba808c0fe2c0a9f7783659e42..dcde16a21e388f460a4f91d31cd08c7c62c97353 100644
--- a/combination_embeddingsv2.py
+++ b/combination_embeddingsv2.py
@@ -3,16 +3,21 @@ import os
 
 # Structure
 import pandas as pd
+
 # DEEPL module
 from keras.layers import Dense, Input, Embedding,concatenate,Bidirectional,LSTM
 from keras.models import Model
 from keras.callbacks import ModelCheckpoint
+from tensorflow.keras.layers import Lambda
+import keras.backend as K 
+import tensorflow as tf 
 
 # Custom module
 from lib.ngram_index import NgramIndex
 from lib.utils import ConfigurationReader, MetaDataSerializer
 from lib.metrics import lat_accuracy,lon_accuracy
 from data_generator import DataGenerator,CoOccurrences,load_embedding,Inclusion,Adjacency
+from lib.geo import haversine_tf,accuracy_k
 
 # Logging
 import logging
@@ -29,19 +34,22 @@ logging.basicConfig(
     )
 
 args = ConfigurationReader("./parser_config/toponym_combination_embedding_v2.json")\
-    .parse_args()#("-w -e 100 ../data/geonamesData/allCountries.txt ../data/geonamesData/hierarchy.txt".split())
+    .parse_args("-i --inclusion-fn ../data/geonamesData/hierarchy.txt ../data/geonamesData/allCountries.txt ../data/embeddings/word2vec4gram/4gramWiki+geonames_index.json ../data/embeddings/word2vec4gram/embedding4gramWiki+Geonames.bin".split())
+
+#.parse_args("-w  --wikipedia-cooc-fn  subsetCoocALLv2.csv ../data/geonamesData/allCountries.txt ../data/embeddings/word2vec4gram/4gramWiki+geonames_index.json ../data/embeddings/word2vec4gram/embedding4gramWiki+Geonames.bin".split())
 
 #
 #################################################
 ############# MODEL TRAINING PARAMETER ##########
 #################################################
 NGRAM_SIZE = args.ngram_size
-ACCURACY_TOLERANCE = args.tolerance_value
+ACCURACY_TOLERANCE = args.k_value
 EPOCHS = args.epochs
-ITER_ADJACENCY = args.adjacency_iteration
-COOC_SAMPLING_NUMBER = args.cooc_sample_size
-WORDVEC_ITER = args.ngram_word2vec_iter
-EMBEDDING_DIM = 100
+ADJACENCY_SAMPLING = args.adjacency_sample
+COOC_SAMPLING = args.cooc_sample
+WORDVEC_ITER = 50
+EMBEDDING_DIM = args.dimension
+BATCH_SIZE = args.batch_size
 #################################################
 ########## FILENAME VARIABLE ####################
 #################################################
@@ -49,22 +57,17 @@ EMBEDDING_DIM = 100
 if not os.path.exists("outputs/"):
     os.makedirs("outputs/")
 
-GEONAME_FN = "ALL"#args.geoname_input
-DATASET_NAME = "ALL"#args.geoname_input.split("/")[-1]
-GEONAMES_HIERARCHY_FN = ""#args.geoname_hierachy_input
-REGION_SUFFIX_FN = "" if args.admin_code_1 == "None" else "_" + args.admin_code_1
-ADJACENCY_REL_FILENAME = "{0}_{1}{2}adjacency.json".format(
-        GEONAME_FN,
-        ITER_ADJACENCY,
-        REGION_SUFFIX_FN)
-
+GEONAME_FN = args.geoname_input
+DATASET_NAME = args.geoname_input.split("/")[-1]
+GEONAMES_HIERARCHY_FN = args.inclusion_fn
+ADJACENCY_REL_FILENAME = args.adjacency_fn
 COOC_FN = args.wikipedia_cooc_fn
-PREFIX_OUTPUT_FN = "{0}_{1}_{2}_{3}_{4}".format(
+
+PREFIX_OUTPUT_FN = "{0}_{1}_{2}_{3}".format(
     GEONAME_FN.split("/")[-1],
     EPOCHS,
     NGRAM_SIZE,
-    ACCURACY_TOLERANCE,
-    REGION_SUFFIX_FN)
+    ACCURACY_TOLERANCE)
 
 REL_CODE=""
 if args.adjacency:
@@ -85,8 +88,8 @@ HISTORY_FN = "outputs/{0}.csv".format(PREFIX_OUTPUT_FN)
 meta_data = MetaDataSerializer(
     DATASET_NAME,
     REL_CODE,
-    COOC_SAMPLING_NUMBER,
-    ITER_ADJACENCY,
+    COOC_SAMPLING,
+    ADJACENCY_SAMPLING,
     NGRAM_SIZE,
     ACCURACY_TOLERANCE,
     EPOCHS,
@@ -103,16 +106,30 @@ meta_data.save("outputs/{0}.json".format(PREFIX_OUTPUT_FN))
 
 index = NgramIndex.load(args.ngram_index_fn)
 
-#c_train = CoOccurrences(COOC_FN + "_train.csv",sampling=1)
-#c_test = CoOccurrences(COOC_FN + "_test.csv",sampling=1)
+train_src = []
+test_src = []
+
+if args.wikipedia_cooc:
+    train_src.append(CoOccurrences(COOC_FN + "_train.csv",sampling=4))
+    test_src.append(CoOccurrences(COOC_FN + "_test.csv",sampling=4))
+
+if args.adjacency:
+    a_train = Adjacency(ADJACENCY_REL_FILENAME + "_train.csv",GEONAME_FN,sampling=ADJACENCY_SAMPLING,gzip=False)
+    a_test = Adjacency(ADJACENCY_REL_FILENAME + "_test.csv",GEONAME_FN,sampling=ADJACENCY_SAMPLING,gzip=False)
+    train_src.append(a_train)
+    test_src.append(a_test)
+
+if args.inclusion:
+    i_train = Inclusion(GEONAME_FN,GEONAMES_HIERARCHY_FN+"_train.csv")
+    i_test = Inclusion(GEONAME_FN,GEONAMES_HIERARCHY_FN+"_test.csv")
+    train_src.append(i_train)
+    test_src.append(i_test)
 #Adjacency
-a_train = Adjacency(COOC_FN + "_train.csv","../data/geonamesData/allCountries.txt",sampling=1,gzip=False)
-a_test = Adjacency(COOC_FN + "_test.csv","../data/geonamesData/allCountries.txt",sampling=1,gzip=False)
 
-BATCH_SIZE = 100
 
-d_train = DataGenerator([a_train],index,batch_size=BATCH_SIZE) 
-d_test = DataGenerator([a_test],index,batch_size=BATCH_SIZE) 
+
+d_train = DataGenerator(train_src,index,batch_size=BATCH_SIZE) 
+d_test = DataGenerator(test_src,index,batch_size=BATCH_SIZE) 
 
 num_words = len(index.index_ngram)  
 
@@ -147,30 +164,43 @@ x1 = Dense(500,
     activation="relu",
     kernel_regularizer=regularizers.l2(0.01)
     )(x)
-# x1 = Dropout(0.3)(x1)
+x1 = Dropout(0.3)(x1)
 x1 = Dense(500,
     activation="relu",
     kernel_regularizer=regularizers.l2(0.01)
     )(x1)
-# x1 = Dropout(0.3)(x1)
+x1 = Dropout(0.3)(x1)
 
 x2 = Dense(500,
     activation="relu",
     kernel_regularizer=regularizers.l2(0.01)
     )(x)
-# x2 = Dropout(0.3)(x2)
+x2 = Dropout(0.3)(x2)
 x2 = Dense(500,
     activation="relu",
     kernel_regularizer=regularizers.l2(0.01)
     )(x2)
-# x2 = Dropout(0.3)(x2)
+x2 = Dropout(0.3)(x2)
 
 output_lon = Dense(1,activation="sigmoid",name="Output_LON")(x1)
 output_lat = Dense(1,activation="sigmoid",name="Output_LAT")(x2)
+from keras.layers import Lambda
+
+def to_wgs84_lat(lat):
+    return ((lat*180)-90)
+def to_wgs84_lon(lon):
+    return ((lon*360)-180)
+
+#output_lon = Lambda(to_wgs84_lon)(output_lon)
+#output_lat = Lambda(to_wgs84_lat)(output_lat) Still between 0 and 1 to avoid loss value explosion
+
+
+
+output = concatenate([output_lon,output_lat],name="output_layer")
 
-model = Model(inputs = [input_1,input_2], outputs = [output_lon,output_lat])#input_3
+model = Model(inputs = [input_1,input_2], outputs = output)#input_3
 
-model.compile(loss=['mean_squared_error','mean_squared_error'], optimizer='rmsprop',metrics={"Output_LON":lon_accuracy(),"Output_LAT":lat_accuracy()})
+model.compile(loss=haversine_tf, optimizer='adam',metrics=[accuracy_k(ACCURACY_TOLERANCE)])
 
 
 #############################################################################################
diff --git a/data_generator.py b/data_generator.py
index 02e246117e02f2870f09189e435e91987e43888d..12d38bb17c42c3c6e469359018bc355fb11d17cb 100644
--- a/data_generator.py
+++ b/data_generator.py
@@ -295,7 +295,8 @@ class DataGenerator(keras.utils.Sequence):
             
             X[i] = [ self.ngram_index.encode(topo),self.ngram_index.encode(topo_context)]
             y[i] = [*zero_one_encoding(longitude,latitude)]
-        return [X[:,0],X[:,1]], [y[:,0],y[:,1]]
+            #y[i] = [longitude,latitude]
+        return [X[:,0],X[:,1]], y#[y[:,0],y[:,1]]
 
     def on_epoch_end(self):
         'Updates indexes after each epoch'
diff --git a/helpers.py b/helpers.py
index d665f8f8f7419d9dbb02557b8dc360bf6dcf86da..b093f62f25a9dce7913cca5373141f13f1753554 100644
--- a/helpers.py
+++ b/helpers.py
@@ -90,7 +90,7 @@ def parse_title_wiki(title_wiki):
     str
         parsed wikipedia title
     """
-    return re.sub("\(.*\)", "", title_wiki).strip().lower()
+    return re.sub("\(.*\)", "", str(title_wiki)).strip().lower()
 
 
 def _split(lst, n, complete_chunk_value):
diff --git a/lib/geo.py b/lib/geo.py
index 5b809bb8708ed2f6dc942c14b594f5abf243fe1a..6841247eaf0b4d1137d73538dbf0359ff1f9dcc1 100644
--- a/lib/geo.py
+++ b/lib/geo.py
@@ -14,7 +14,54 @@ from helpers import read_geonames
 from tqdm import tqdm
 from joblib import Parallel,delayed
 
+import tensorflow as tf
+import keras.backend as K
 
+def tf_deg2rad(deg):
+    pi_on_180 = 0.017453292519943295
+    return deg * pi_on_180
+
+def haversine_tf(y_true,y_pred):
+    """
+    Return the geodesic distance between (lon1,lat1) and (lon2,lat2) coordinates
+    
+    Parameters
+    ----------
+    lon1 : numeric or array-like (pandas Dataframe works also)
+        longitude of first coordinates
+    lat1 :  numeric or array-like (pandas Dataframe works also)
+        latitude of first coordinates
+    lon2 :  numeric or array-like (pandas Dataframe works also)
+        longitude of second coordinates
+    lat2 :  numeric or array-like (pandas Dataframe works also)
+        longitude of second coordinates
+    
+    Returns
+    -------
+    float or array-like
+        distance(s) value(s)
+    """
+    lon1, lat1, lon2, lat2 = map(tf_deg2rad, [y_true[:,0], y_true[:,1], y_pred[:,0], y_pred[:,1]])
+    dlon = lon2 - lon1
+    dlat = lat2 - lat1
+    a = K.sin(dlat/2.0)**2 + K.cos(lat1) * K.cos(lat2) * K.sin(dlon/2.0)**2
+    
+    return 6367 * 2 * tf.math.asin(K.sqrt(a))
+
+def to_wgs84_lat(lat):
+    return ((lat*180)-90)
+def to_wgs84_lon(lon):
+    return ((lon*360)-180)
+
+def to_wgs84(x):
+    lon=to_wgs84_lon(x[:,0])
+    lat=to_wgs84_lat(x[:,1])
+    return tf.stack([lon,lat],axis=1)
+
+def accuracy_k(k=100):#km
+    def compute_metric(y_true,y_pred):
+        return K.less_equal(haversine_tf(to_wgs84(y_true),to_wgs84(y_pred)),k) 
+    return compute_metric
 
 def haversine_pd(lon1, lat1, lon2, lat2):
     """
diff --git a/parser_config/toponym_combination_embedding.json b/parser_config/toponym_combination_embedding.json
index 13e622b173e2018ab4f7c8f5df1994b8295ffe8d..a7dc96c78f74d703cad17f6c64f4e4a90c97dfa9 100644
--- a/parser_config/toponym_combination_embedding.json
+++ b/parser_config/toponym_combination_embedding.json
@@ -8,7 +8,7 @@
         { "short": "-a", "long": "--adjacency", "action": "store_true" },
         { "short": "-w", "long": "--wikipedia-cooc", "action": "store_true" },
         { "long": "--wikipedia-cooc-fn","help":"Cooccurrence data filename"},
-        { "long": "--cooc-sample-size", "type": "int", "default": 3 },
+        { "long": "--cooc-sample-size", "type": "int", "default": 1 },
         {"long": "--adjacency-iteration", "type":"int","default":1},
         { "short": "-n", "long": "--ngram-size", "type": "int", "default": 2 },
         { "long": "--ngram-word2vec-iter", "type": "int", "default": 50 },
diff --git a/parser_config/toponym_combination_embedding_v2.json b/parser_config/toponym_combination_embedding_v2.json
index 050c8446e7d885c60706f05bfdbfe4a19029c0a3..f0fb1fd6ceee135e98990ab6c71662e892f1dc06 100644
--- a/parser_config/toponym_combination_embedding_v2.json
+++ b/parser_config/toponym_combination_embedding_v2.json
@@ -1,21 +1,22 @@
 {
     "description": "Toponym Combination",
     "args": [
+        { "short": "geoname_input", "help": "Filepath of the Geonames file you want to use." },
         { "short": "ngram_index_fn", "help": "Filepath of the NgramIndex file you want to use." },
         { "short": "embedding_fn", "help": "Filepath of the Embedding file you want to use." },
+        { "short": "-n", "long": "--ngram-size", "type": "int", "default": 4 },
+        { "short": "-d", "long": "--dimension", "type": "int", "default": 100 },
         { "short": "-v", "long": "--verbose", "action": "store_true" },
         { "short": "-i", "long": "--inclusion", "action": "store_true" },
         { "short": "-a", "long": "--adjacency", "action": "store_true" },
         { "short": "-w", "long": "--wikipedia-cooc", "action": "store_true" },
+        { "long": "--inclusion-fn","help":"Cooccurrence data filename"},
         { "long": "--wikipedia-cooc-fn","help":"Cooccurrence data filename"},
         { "long": "--adjacency-fn","help":"Adjacency data filename"},
-        { "long": "--cooc-sample-size", "type": "int", "default": 3 },
-        {"long": "--adjacency-iteration", "type":"int","default":1},
-        { "short": "-n", "long": "--ngram-size", "type": "int", "default": 2 },
-        { "long": "--ngram-word2vec-iter", "type": "int", "default": 50 },
-        { "short": "-t", "long": "--tolerance-value", "type": "float", "default": 0.002 },
+        { "long": "--cooc-sample", "type": "int", "default": 3 },
+        {"long": "--adjacency-sample", "type":"int","default":1},
         { "short": "-e", "long": "--epochs", "type": "int", "default": 100 },
-        { "short": "-d", "long": "--dimension", "type": "int", "default": 256 },
-        {  "long": "--admin_code_1", "default": "None" }
+        { "short": "-b", "long": "--batch-size", "type": "int", "default": 100 },
+        { "short": "-k", "long": "--k-value", "type": "float", "default": 100 ,"help":"Used for the accuracy@k metrics. Given in kilometers"}    
     ]
 }
\ No newline at end of file