diff --git a/lib/data_generator.py b/lib/data_generator.py
index 5183017b47d0a81c3bf2cfd2c0399ece4c34c761..fb871467542858a421f3ef2d33ad644825cae9cb 100644
--- a/lib/data_generator.py
+++ b/lib/data_generator.py
@@ -11,6 +11,9 @@ from .geo import zero_one_encoding
 from helpers import parse_title_wiki,read_geonames
 from gensim.models.keyedvectors import KeyedVectors
 
+from sklearn.preprocessing import LabelEncoder
+
+
 def wc_l(filename,gzip=True):
     lc = 0
     if not gzip:
@@ -40,7 +43,9 @@ class DataSource(object):
         self.name = name
         assert os.path.exists(input_filename)
         self.input_filename = input_filename
-        self.len = 0 
+        self.len = 0
+
+        self.is_there_healpix = False
 
     def __next__(self):
         raise NotImplementedError()
@@ -112,29 +117,6 @@ class Adjacency(DataSource):
         return (self.geonames_data_dict[self.topo],
         self.geonames_data_dict[self.context_topo_context[self.i-1]],
         self.lat,self.lon)
-    
-    def __nextv2__(self):
-        if  self.i >= len(self.context_topo_context):
-            line = self.data_src.readline()
-            if not line:
-                self.is_over = True
-                raise StopIteration
-            line = line.decode("utf-8").rstrip("\n")
-            geonameid, adjacent_geoname_id,latitude,longitude = tuple(line.split(","))
-
-            self.topo = int(geonameid)
-            self.context_topo_context = [int(x) for x in adjacent_geoname_id.split("|")]
-            if self.sampling:
-                self.curr_probs = [self.probs_storage(x) for x in self.context_topo_context]
-                self.context_topo_context = np.random.choice(self.context_topo_context,self.sampling,self.curr_probs)
-            self.lat, self.lon = float(latitude),float(longitude)
-
-            self.i = 0
-        
-        self.i += 1
-        return (self.topo,
-        self.context_topo_context[self.i-1],
-        self.lat,self.lon)
 
     def __reset__(self):
         if not self.gzip:
@@ -193,40 +175,48 @@ class Inclusion(DataSource):
         return (self.i == self.len)
     
 
-from sklearn.preprocessing import LabelEncoder
+
 
 class CoOccurrences(DataSource):
-    def __init__(self, filename, label_encoder,sampling=3):
+    def __init__(self, filename, label_encoder,sampling=3,resolution = 1):
         super().__init__("Co-Occurrence data",filename)
-
+        self.is_there_healpix = True
+        # LOAD DATA
         try:
             self.data_src = pd.read_csv(filename)
         except:
             self.data_src = pd.read_csv(filename,sep="\t")
-            
+        # CHECK IF THE HEALPIX RESOLUTION DATA APPEARS IN THE DATA
+        if not "healpix_{0}".format(resolution) in self.data_src.columns:
+            raise KeyError("healpix_{0} column does not exists ! ".format(resolution))
+        
+        # PARSE TOPONYMS
         self.data_src["title"] = self.data_src.title.apply(parse_title_wiki)
         try:
             self.data_src["interlinks"] = self.data_src.interlinks.apply(parse_title_wiki)
         except:
             pass
 
+        # LOOP parameter
+        self.sampling = sampling
+        if self.sampling:
+            self.probs_storage = SamplingProbabilities()
+            
+        # LOOP INDICES
         self.i = 0
         self.j = 0
         self.is_over = False
-
-        self.sampling = sampling
         self.len = len(self.data_src)*self.sampling
 
-        if self.sampling:
-            self.probs_storage = SamplingProbabilities()
         
+        # BUFFER VARIABLE
         self.topo = None
         self.context_topo_context = []
         self.curr_probs = None
         self.lat, self.lon = None, None
 
 
-        self.resolution = 64 #fixed for now
+        self.resolution = resolution
         self.classes = self.data_src["healpix_{0}".format(self.resolution)].unique().tolist()
 
         self.class_encoder = label_encoder
@@ -248,7 +238,9 @@ class CoOccurrences(DataSource):
                 self.curr_probs = [self.probs_storage(x) for x in self.context_topo_context]
                 self.context_topo_context = np.random.choice(self.context_topo_context,self.sampling,self.curr_probs)
             self.lat, self.lon = line.latitude,line.longitude
+            
             self.healpix = line["healpix_{0}".format(self.resolution)]
+            
             self.i += 1
             self.j = 0
         
@@ -264,9 +256,6 @@ class CoOccurrences(DataSource):
     def isOver(self):
         return self.is_over
     
-
-
-
 class DataGenerator(keras.utils.Sequence):
     'Generates data for Keras'
     def __init__(self,data_sources,ngram_index,class_encoder,**kwargs):
@@ -275,49 +264,68 @@ class DataGenerator(keras.utils.Sequence):
         self.ngram_index = ngram_index
 
         self.batch_size = kwargs.get("batch_size",1000)
+        self.only_healpix = kwargs.get("only_healpix",False)
         
-
         self.len = sum([len(d) for d in self.data_src])
         self.datasrc_index = 0
 
         self.num_classes = class_encoder.get_num_classes()
 
-        #self.on_epoch_end()
+        self.is_there_healpix = self.data_src[self.datasrc_index].is_there_healpix
 
     def __len__(self):
         'Denotes the number of batches per epoch'
         return int(np.floor(self.len / self.batch_size))
 
+    def return_(self,X,y,y2=None):
+        if self.is_there_healpix and self.only_healpix:
+            return [X[:,0],X[:,1]],y2
+
+        if self.is_there_healpix:
+            return [X[:,0],X[:,1]],[y,y2]
+        else:
+            return [X[:,0],X[:,1]],y
+
     def __getitem__(self, index):
         'Generate one batch of data'
         X = np.empty((self.batch_size,2,self.ngram_index.max_len),dtype=np.int32) # toponym
         y = np.empty((self.batch_size,2),dtype=float) #lat lon coord
-        y2 = np.empty((self.batch_size,self.num_classes),dtype=float) # healpix class
+
+        y2=None # For healpix
+        if self.is_there_healpix:
+            y2 = np.empty((self.batch_size,self.num_classes),dtype=float) # healpix class
+
         if self.data_src[self.datasrc_index].isOver():
                 self.datasrc_index += 1
+                self.is_there_healpix = self.data_src[self.datasrc_index].is_there_healpix
+
+
         if self.datasrc_index >= len(self.data_src):
-            return X,[y,y2]
+            self.return_(X,y,y2)
         
         for i in range(self.batch_size):
             if self.data_src[self.datasrc_index].isOver():
-                return X, y
+                return self.return_(X,y,y2)
             try:
                 topo, topo_context, latitude, longitude, healpix_class = self.data_src[self.datasrc_index].__next__()
             except StopIteration as e:
-                return X, [y,y2]
+                return self.return_(X,y,y2)
             
             X[i] = [ self.ngram_index.encode(topo),self.ngram_index.encode(topo_context)]
             y[i] =  [*zero_one_encoding(longitude,latitude)]
-            y2[i] = to_categorical(healpix_class, num_classes=self.num_classes, dtype='int32'
+            if self.is_there_healpix:
+                y2[i] = to_categorical(healpix_class, num_classes=self.num_classes, dtype='int32'
 )
 
             #y[i] = [longitude,latitude]
-        return [X[:,0],X[:,1]], [y,y2]#[y[:,0],y[:,1]]
+        return self.return_(X,y,y2)
 
     def on_epoch_end(self):
         'Updates indexes after each epoch'
         [d.__reset__() for d in self.data_src]
         self.datasrc_index = 0
+
+
     
 def load_embedding(model_fn,dim_vector=100):
     model = KeyedVectors.load(model_fn)
diff --git a/region_embedding.py b/region_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..906a422ba6ac0d8ecd4003325f8462f6fb70ec44
--- /dev/null
+++ b/region_embedding.py
@@ -0,0 +1,199 @@
+# Base module 
+import os
+
+# Structure
+import pandas as pd
+
+# DEEPL module
+from keras.layers import Dense, Input, Embedding,concatenate,Bidirectional,LSTM,Dropout
+from keras.models import Model
+from keras.callbacks import ModelCheckpoint
+from tensorflow.keras.layers import Lambda
+import keras.backend as K 
+import tensorflow as tf 
+from lib.custom_layer import *
+
+# Custom module
+from lib.ngram_index import NgramIndex
+from lib.utils import ConfigurationReader, MetaDataSerializer,LabelEncoder
+from lib.metrics import lat_accuracy,lon_accuracy
+from lib.data_generator import DataGenerator,CoOccurrences,load_embedding,Inclusion,Adjacency
+from lib.geo import haversine_tf,accuracy_k,haversine_tf_1circle
+
+# Logging
+import logging
+
+logging.getLogger('gensim').setLevel(logging.WARNING)
+
+from helpers import EpochTimer
+
+# LOGGING CONF
+logging.basicConfig(
+    format='[%(asctime)s][%(levelname)s] %(message)s ', 
+    datefmt='%m/%d/%Y %I:%M:%S %p',
+    level=logging.INFO  
+    )
+
+args = ConfigurationReader("./parser_config/toponym_combination_embedding_v2.json")\
+    .parse_args()#("-i --inclusion-fn ../data/geonamesData/hierarchy.txt ../data/geonamesData/allCountries.txt ../data/embeddings/word2vec4gram/4gramWiki+geonames_index.json ../data/embeddings/word2vec4gram/embedding4gramWiki+Geonames.bin".split())
+
+#.parse_args("-w  --wikipedia-cooc-fn  subsetCoocALLv2.csv ../data/geonamesData/allCountries.txt ../data/embeddings/word2vec4gram/4gramWiki+geonames_index.json ../data/embeddings/word2vec4gram/embedding4gramWiki+Geonames.bin".split())
+
+#
+#################################################
+############# MODEL TRAINING PARAMETER ##########
+#################################################
+NGRAM_SIZE = args.ngram_size
+ACCURACY_TOLERANCE = args.k_value
+EPOCHS = args.epochs
+ADJACENCY_SAMPLING = args.adjacency_sample
+COOC_SAMPLING = args.cooc_sample
+WORDVEC_ITER = 50
+EMBEDDING_DIM = args.dimension
+BATCH_SIZE = args.batch_size
+#################################################
+########## FILENAME VARIABLE ####################
+#################################################
+# check for output dir
+if not os.path.exists("outputs/"):
+    os.makedirs("outputs/")
+
+GEONAME_FN = args.geoname_input
+DATASET_NAME = args.geoname_input.split("/")[-1]
+GEONAMES_HIERARCHY_FN = args.inclusion_fn
+ADJACENCY_REL_FILENAME = args.adjacency_fn
+COOC_FN = args.wikipedia_cooc_fn
+
+PREFIX_OUTPUT_FN = "REGION_{0}_{1}_{2}_{3}".format(
+    GEONAME_FN.split("/")[-1],
+    EPOCHS,
+    NGRAM_SIZE,
+    ACCURACY_TOLERANCE)
+
+REL_CODE=""
+if args.adjacency:
+    PREFIX_OUTPUT_FN += "_A"
+    REL_CODE+= "A"
+if args.inclusion:
+    PREFIX_OUTPUT_FN += "_I"
+    REL_CODE+= "I"
+if args.wikipedia_cooc:
+    PREFIX_OUTPUT_FN += "_C"
+    REL_CODE+= "C"
+
+MODEL_OUTPUT_FN = "outputs/{0}.h5".format(PREFIX_OUTPUT_FN)
+INDEX_FN = "outputs/{0}_index".format(PREFIX_OUTPUT_FN)
+HISTORY_FN = "outputs/{0}.csv".format(PREFIX_OUTPUT_FN)
+
+
+meta_data = MetaDataSerializer(
+    DATASET_NAME,
+    REL_CODE,
+    COOC_SAMPLING,
+    ADJACENCY_SAMPLING,
+    NGRAM_SIZE,
+    ACCURACY_TOLERANCE,
+    EPOCHS,
+    EMBEDDING_DIM,
+    WORDVEC_ITER,
+    INDEX_FN,
+    MODEL_OUTPUT_FN,
+    HISTORY_FN
+)
+meta_data.save("outputs/{0}.json".format(PREFIX_OUTPUT_FN))
+
+
+### PUT DATASRC + GENERATOR
+
+index = NgramIndex.load(args.ngram_index_fn)
+
+train_src = []
+test_src = []
+
+class_encoder = LabelEncoder()
+if args.wikipedia_cooc:
+    train_src.append(CoOccurrences(COOC_FN + "_train.csv",class_encoder,sampling=4))
+    test_src.append(CoOccurrences(COOC_FN + "_test.csv",class_encoder,sampling=4))
+
+if args.adjacency:
+    a_train = Adjacency(ADJACENCY_REL_FILENAME + "_train.csv",GEONAME_FN,sampling=ADJACENCY_SAMPLING,gzip=False)
+    a_test = Adjacency(ADJACENCY_REL_FILENAME + "_test.csv",GEONAME_FN,sampling=ADJACENCY_SAMPLING,gzip=False)
+    train_src.append(a_train)
+    test_src.append(a_test)
+
+if args.inclusion:
+    i_train = Inclusion(GEONAME_FN,GEONAMES_HIERARCHY_FN+"_train.csv")
+    i_test = Inclusion(GEONAME_FN,GEONAMES_HIERARCHY_FN+"_test.csv")
+    train_src.append(i_train)
+    test_src.append(i_test)
+#Adjacency
+
+
+
+d_train = DataGenerator(train_src,index,class_encoder,batch_size=BATCH_SIZE,only_healpix=True) 
+d_test = DataGenerator(test_src,index,class_encoder,batch_size=BATCH_SIZE,only_healpix=True) 
+
+num_words = len(index.index_ngram)  
+
+#############################################################################################
+################################# NGRAM EMBEDDINGS ##########################################
+#############################################################################################
+
+embedding_weights = load_embedding(args.embedding_fn) 
+
+
+#############################################################################################
+################################# MODEL DEFINITION ##########################################
+#############################################################################################
+
+from keras import regularizers
+
+input_1 = Input(shape=(index.max_len,))
+input_2 = Input(shape=(index.max_len,))
+
+embedding_layer = Embedding(num_words, EMBEDDING_DIM,input_length=index.max_len,trainable=False)#, trainable=True)
+
+x1 = embedding_layer(input_1)
+x2 = embedding_layer(input_2)
+
+# Each LSTM learn on a permutation of the input toponyms
+biLSTM = Bidirectional(LSTM(32,activation="pentanh", recurrent_activation="pentanh"))
+x1 = biLSTM(x1)
+x2 = biLSTM(x2)
+x = concatenate([x1,x2])#,x3])
+
+#x = Dense(class_encoder.get_num_classes()*2,activation="relu")(x)
+
+
+aux_layer = Dense(class_encoder.get_num_classes(),activation="softmax",name="aux_layer")(x)
+
+model = Model(inputs = [input_1,input_2], outputs = aux_layer)#input_3
+
+model.compile(loss={"aux_layer":"categorical_crossentropy"}, optimizer='adam',metrics={"aux_layer":"accuracy"})
+
+
+#############################################################################################
+################################# TRAINING LAUNCH ###########################################
+#############################################################################################
+
+checkpoint = ModelCheckpoint(MODEL_OUTPUT_FN + ".part", monitor='loss', verbose=1,
+    save_best_only=True, mode='auto', period=1)
+
+epoch_timer = EpochTimer("outputs/"+PREFIX_OUTPUT_FN+"_epoch_timer_output.csv")
+
+
+history = model.fit_generator(generator=d_train,
+    validation_data=d_test,
+    verbose=True,
+    epochs=EPOCHS,
+    callbacks=[checkpoint,epoch_timer])
+
+
+hist_df = pd.DataFrame(history.history)
+hist_df.to_csv(HISTORY_FN)
+
+model.save(MODEL_OUTPUT_FN)
+
+# Erase Model Checkpoint file
+if os.path.exists(MODEL_OUTPUT_FN + ".part"):
+    os.remove(MODEL_OUTPUT_FN + ".part")
\ No newline at end of file
diff --git a/scripts/gethealpix.py b/scripts/gethealpix.py
index 387cacfbd700a175ec96b1957cb9dff819dc77c1..6e572fdb256e92c9a6690df2b7e806e1b11e1573 100644
--- a/scripts/gethealpix.py
+++ b/scripts/gethealpix.py
@@ -27,5 +27,6 @@ df = pd.read_csv(args.input_file,sep="\t")
 df["healpix_256"] = df.progress_apply(lambda row:latlon2healpix(lat=row.latitude,lon=row.longitude,res=256),axis=1)
 df["healpix_64"] = df.progress_apply(lambda row:latlon2healpix(lat=row.latitude,lon=row.longitude,res=64),axis=1)
 df["healpix_32"] = df.progress_apply(lambda row:latlon2healpix(lat=row.latitude,lon=row.longitude,res=32),axis=1)
+df["healpix_1"] = df.progress_apply(lambda row:latlon2healpix(lat=row.latitude,lon=row.longitude,res=1),axis=1)
 
 df.to_csv(args.output_file,sep="\t",index=False)
\ No newline at end of file