diff --git a/mongiris/__init__.py b/mongiris/__init__.py
deleted file mode 100644
index 70f0388d609953c032c4804fce7d9712de31e35c..0000000000000000000000000000000000000000
--- a/mongiris/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-from .mongiris import Mongiris
-
-# all modules
-__all__ = ['Mongiris', 'config', 'integrator']
-
-
diff --git a/mongiris/config.py b/mongiris/config.py
index ec0614d67014aff07c21a072f6fe24674e564b2d..b6ac1d19ca028b9eea765022273538ef60acfc21 100644
--- a/mongiris/config.py
+++ b/mongiris/config.py
@@ -4,28 +4,38 @@
 # Configuration file
 # =============================================================================
 
+from os import path
+
 # mongoDB parameters
 database_iris = "dbhil"  # name of the database
-collection_iris = "colliris"  # collection containing the ~50,000 IRIS
-collection_meta = "collmeta"  # collection containing metadata information (eg, dict of indicators)
+collection_iris = "colliris"  # collection containing the ~50,000 IRIS and values for indicators
+collection_indic = "collindic"  # collection containing metadata information about indicators (short label, long label)
+collection_sources = "collsources"  # collection containing metadata information about sources (filepath, title, etc.)
 max_timeout = 3  # delay before connection timeout
 
+# data parameters
+insee_dir = path.join('data', 'insee', '2019-01')  # the date indicates the last check for updates of INSEE files
+
 # labels for geojson iris files and for the json indicators dictionary
-geojson_indicators_label = 'raw_indicators'
-geojson_insee_files_label = 'insee_files'
-geojson_shortname_label = 'short_fieldname'
-geojson_longname_label = 'long_fieldname'
+geojson_raw_indicators_label = 'raw_indicators'
 geojson_grouped_indicators_label = 'grouped_indicators'
+geojson_insee_files_label = 'insee_files'
+geojson_shortname_label = 'short_label'
+geojson_longname_label = 'full_label'
+geojson_from_files_label = 'from_insee_files'
 
-indicators_dictionary_init = {'CODE_IRIS': {"long_fieldname": 'Code IRIS'}, 'NOM_IRIS': {"long_fieldname": 'Nom IRIS'},
-                              'INSEE_COM': {"long_fieldname": 'Code postal commune'},
-                              'NOM_COM': {"long_fieldname": 'Nom commune'},
-                              'TYP_IRIS': {"long_fieldname": 'Type IRIS'}, 'REG': {"long_fieldname": 'Code région'},
-                              'DEP': {"long_fieldname": 'Code département'}}
-# some properties (found in XLS INSEE files) are actually data about IRIS (and not indicators)
-metadata_about_iris = ['DEP', 'REG', 'TRIRIS']
+# the following labels are metadata about an IRIS, not indicators
+labels_dictionary_init = {'CODE_IRIS': {"long_fieldname": 'Code IRIS'}, 'NOM_IRIS': {"long_fieldname": 'Nom IRIS'},
+                          'INSEE_COM': {"long_fieldname": 'Code postal commune'},
+                          'NOM_COM': {"long_fieldname": 'Nom commune'}, 'IRIS': {"long_fieldname": 'Code IRIS'},
+                          'TYP_IRIS': {"long_fieldname": 'Type IRIS'}, 'REG': {"long_fieldname": 'Code région'},
+                          'DEP': {"long_fieldname": 'Code département'}, 'TRIRIS': {"long_fieldname": 'TRIRIS'},
+                          'UU2010': {"long_fieldname": 'Unité urbaine'}, 'LIBIRIS': {"long_fieldname": 'Libellé IRIS'},
+                          'GRD_QUART': {"long_fieldname": 'Grand quartier'}, 'LAB_IRIS': {"long_fieldname": 'Label qualité IRIS'},
+                          'MODIF_IRIS': {"long_fieldname": 'Type de modification de IRIS'},
+                          }
 
-list_grouped_indicators = ['logement-resident', 'education-superieur-prive', 'animation-culturel',
+labels_grouped_indicators = ['logement-resident', 'education-superieur-prive', 'animation-culturel',
                            'education-secondaire-cycle1-public', 'education-secondaire-cycle2-professionnel-public',
                            'animation-commerce-nonalimentaire', 'education-primaire-prive', 'espacevert',
                            'service-sante', 'service-divers-public', 'education-secondaire-cycle2-professionnel-prive',
diff --git a/mongiris/data/insee/action-sociale-2016.xls b/mongiris/data/insee/2019-01/action-sociale-2016.xls
similarity index 100%
rename from mongiris/data/insee/action-sociale-2016.xls
rename to mongiris/data/insee/2019-01/action-sociale-2016.xls
diff --git a/mongiris/data/insee/activite-residents-2014.xls b/mongiris/data/insee/2019-01/activite-residents-2014.xls
similarity index 100%
rename from mongiris/data/insee/activite-residents-2014.xls
rename to mongiris/data/insee/2019-01/activite-residents-2014.xls
diff --git a/mongiris/data/insee/commerces-2016.xls b/mongiris/data/insee/2019-01/commerces-2016.xls
similarity index 100%
rename from mongiris/data/insee/commerces-2016.xls
rename to mongiris/data/insee/2019-01/commerces-2016.xls
diff --git a/mongiris/data/insee/diplomes-formation-2014.xls b/mongiris/data/insee/2019-01/diplomes-formation-2014.xls
similarity index 100%
rename from mongiris/data/insee/diplomes-formation-2014.xls
rename to mongiris/data/insee/2019-01/diplomes-formation-2014.xls
diff --git a/mongiris/data/insee/education-colleges-lycees-2016.xls b/mongiris/data/insee/2019-01/education-colleges-lycees-2016.xls
similarity index 100%
rename from mongiris/data/insee/education-colleges-lycees-2016.xls
rename to mongiris/data/insee/2019-01/education-colleges-lycees-2016.xls
diff --git a/mongiris/data/insee/education-ecoles-2016.xls b/mongiris/data/insee/2019-01/education-ecoles-2016.xls
similarity index 100%
rename from mongiris/data/insee/education-ecoles-2016.xls
rename to mongiris/data/insee/2019-01/education-ecoles-2016.xls
diff --git a/mongiris/data/insee/education-univ-2016.xls b/mongiris/data/insee/2019-01/education-univ-2016.xls
similarity index 100%
rename from mongiris/data/insee/education-univ-2016.xls
rename to mongiris/data/insee/2019-01/education-univ-2016.xls
diff --git a/mongiris/data/insee/familles-menages-2014.xls b/mongiris/data/insee/2019-01/familles-menages-2014.xls
similarity index 100%
rename from mongiris/data/insee/familles-menages-2014.xls
rename to mongiris/data/insee/2019-01/familles-menages-2014.xls
diff --git a/mongiris/data/insee/fr-en-annuaire-education.geojson b/mongiris/data/insee/2019-01/fr-en-annuaire-education.geojson
similarity index 100%
rename from mongiris/data/insee/fr-en-annuaire-education.geojson
rename to mongiris/data/insee/2019-01/fr-en-annuaire-education.geojson
diff --git a/mongiris/data/insee/fr-en-reussite-au-baccalaureat-origine-sociale.json b/mongiris/data/insee/2019-01/fr-en-reussite-au-baccalaureat-origine-sociale.json
similarity index 100%
rename from mongiris/data/insee/fr-en-reussite-au-baccalaureat-origine-sociale.json
rename to mongiris/data/insee/2019-01/fr-en-reussite-au-baccalaureat-origine-sociale.json
diff --git a/mongiris/data/insee/insee-geo-ontologie.rdf b/mongiris/data/insee/2019-01/insee-geo-ontologie.rdf
similarity index 100%
rename from mongiris/data/insee/insee-geo-ontologie.rdf
rename to mongiris/data/insee/2019-01/insee-geo-ontologie.rdf
diff --git a/mongiris/data/insee/insee-geo-ontologie.ttl b/mongiris/data/insee/2019-01/insee-geo-ontologie.ttl
similarity index 100%
rename from mongiris/data/insee/insee-geo-ontologie.ttl
rename to mongiris/data/insee/2019-01/insee-geo-ontologie.ttl
diff --git a/mongiris/data/insee/logement-2014.xls b/mongiris/data/insee/2019-01/logement-2014.xls
similarity index 100%
rename from mongiris/data/insee/logement-2014.xls
rename to mongiris/data/insee/2019-01/logement-2014.xls
diff --git a/mongiris/data/insee/medical-para-2016.xls b/mongiris/data/insee/2019-01/medical-para-2016.xls
similarity index 100%
rename from mongiris/data/insee/medical-para-2016.xls
rename to mongiris/data/insee/2019-01/medical-para-2016.xls
diff --git a/mongiris/data/insee/mobilite-residentielle-2015.xls b/mongiris/data/insee/2019-01/mobilite-residentielle-2015.xls
similarity index 100%
rename from mongiris/data/insee/mobilite-residentielle-2015.xls
rename to mongiris/data/insee/2019-01/mobilite-residentielle-2015.xls
diff --git a/mongiris/data/insee/population-2014.xls b/mongiris/data/insee/2019-01/population-2014.xls
similarity index 100%
rename from mongiris/data/insee/population-2014.xls
rename to mongiris/data/insee/2019-01/population-2014.xls
diff --git a/mongiris/data/insee/revenus-declares-2014.xls b/mongiris/data/insee/2019-01/revenus-declares-2014.xls
similarity index 100%
rename from mongiris/data/insee/revenus-declares-2014.xls
rename to mongiris/data/insee/2019-01/revenus-declares-2014.xls
diff --git a/mongiris/data/insee/services-2016.xls b/mongiris/data/insee/2019-01/services-2016.xls
similarity index 100%
rename from mongiris/data/insee/services-2016.xls
rename to mongiris/data/insee/2019-01/services-2016.xls
diff --git a/mongiris/data/insee/sport-loisirs-2016.csv b/mongiris/data/insee/2019-01/sport-loisirs-2016.csv
similarity index 100%
rename from mongiris/data/insee/sport-loisirs-2016.csv
rename to mongiris/data/insee/2019-01/sport-loisirs-2016.csv
diff --git a/mongiris/data/insee/sport-loisirs-2016.xls b/mongiris/data/insee/2019-01/sport-loisirs-2016.xls
similarity index 100%
rename from mongiris/data/insee/sport-loisirs-2016.xls
rename to mongiris/data/insee/2019-01/sport-loisirs-2016.xls
diff --git a/mongiris/data/insee/tourisme-transports-2016.xls b/mongiris/data/insee/2019-01/tourisme-transports-2016.xls
similarity index 100%
rename from mongiris/data/insee/tourisme-transports-2016.xls
rename to mongiris/data/insee/2019-01/tourisme-transports-2016.xls
diff --git a/mongiris/integrator.py b/mongiris/integrator.py
index 06a29e951c94e5348f54b500414f2a7657255b40..09abc06c624e288516a5684c65abfac099adfeb8 100644
--- a/mongiris/integrator.py
+++ b/mongiris/integrator.py
@@ -6,182 +6,104 @@
 # =============================================================================
 
 import os
-from . import config
-import json_utils
-import xls_utils
-import mongiris
 import logging
-#from vizliris import regroupement_indicateurs
+from mongiris import config
+from mongiris import xls_utils
+from mongiris import main
 
-#TODO delete this but check for index build - an index function should be created in mongiris
-'''
-def convert_geojson_files_to_mongo(self, ):
-    """
-    This method should not be used (already run once). Implemented for inserting geojson files into MongoDB.
-    :return: the number of iris inserted in the collection
-    """
-    import os
-    from os import path
-    # print(os.getcwd())  # check working directory, and set it to Hil-quartiers in project settings
-    path_hil = path.join('..', 'HiL-recommender')
-    web_dir = path.join(path_hil, 'static', 'data')  # from HiL-recommender.config
-    geojson_integrated_output_departement_dir = path.join(web_dir, 'iris_by_departments')  # from HiL-recommender.config
-    self.logger.info("#documents in collection " + str(self.count_documents(self.iris_collection, {})))
-    self.iris_collection.delete_many({})  # empty collection
-    self.logger.info("#documents in collection " + str(self.count_documents(self.iris_collection, {})))
-    for file in os.listdir(geojson_integrated_output_departement_dir):
-        if file.endswith('.geojson'):  # read each geojson (department) file and insert each iris in MongoDB
-            geojson = self._parse_json_to_dict(path.join(geojson_integrated_output_departement_dir, file))
-            try:
-                # all_iris = geojson['features']  # get a list of iris (geojson dict)
-                # result = iris_collection.insert_many(all_iris)  # pb of 592730103 iris, and many 59 iris not inserted
-                for iris in geojson['features']:  # inserting each iris one by one
-                    if iris["properties"]["CODE_IRIS"] != "592730103":  # pb of 592730103 iris in Gravelines
-                        result = self.iris_collection.insert_one(iris)  # result.inserted_id
-                logging.info("Documents inserted for file " + file)
-            except Exception as e:
-                self.logger.error('Error with MongoDB connection: ' + str(e))
-    nb_docs = self.count_documents(self.iris_collection, {})
-    self.logger.info("#documents in collection " + str(nb_docs))
-    self.logger.info("Creating index on 'geometry' using " + pymongo.GEOSPHERE)
-    self.iris_collection.create_index([("geometry", pymongo.GEOSPHERE)])
-    self.logger.info("Index created")
-    assert (nb_docs == 49403), 'Error: expecting 49403 IRIS to be stored in MongoDB, but stored %i' % nb_docs
-    return nb_docs
-'''
-
-
-def build_dictionary_indicators(dict_indicators, new_short_fieldnames, new_long_fieldnames, from_insee_file):
-    """
-    Adds new indicators (code, label and source file) in the dictionary dict_variables
-    :param dict_indicators: a dict containing information about relevant indicators
-    :param new_short_fieldnames: indicators codes to be added
-    :param new_long_fieldnames: indicators full labels to be added
-    :param from_insee_file: filepath of the INSEE file in which new indicators are extracted
-    :return: res_dict_indicators: an updated version of dict_variables
-    {ind1: {label: indicator1, insee_files=[file1, file2], ...}, ind2: {...}, ...}
-    """
-    res_dict_indicators = dict(dict_indicators)
-    for i in range(0, len(new_short_fieldnames)):
-        shortname = new_short_fieldnames[i]
-        if shortname not in res_dict_indicators:
-            res_dict_indicators[shortname] = dict()
-            # res_dict_indicators[shortname]["short_fieldname"] = shortname
-            res_dict_indicators[shortname][config.geojson_longname_label] = new_long_fieldnames[i]
-        if config.geojson_insee_files_label not in res_dict_indicators[shortname]:
-            res_dict_indicators[shortname][config.geojson_insee_files_label] = list()
-        if from_insee_file not in res_dict_indicators[shortname][config.geojson_insee_files_label]:
-            res_dict_indicators[shortname][config.geojson_insee_files_label].append(from_insee_file)
-    return res_dict_indicators
-
-
-def integrate_xls_file(iris_dict, indicators_xls):
-    """
-    Integrate IRIS data with indicators (about IRIS) and produce a dict of IRIS.
-    Careful : indicators are not available for all IRIS, and some indicators concern IRIS #69029ZZZZ (ZZZZ meaning the
-    neighbourhood, not the IRIS).
-    :param iris_dict: a dictionary with IRIS data (geojson format)
-    :param indicators_xls: a csv file path containing INSEE indicators
-    :return: short_fieldnames: a list containing field ID (or abbreviated field names)
-    :return: long_fieldnames: a list containing the complete field names
-    :return: res_iris_dict: a geojson merged dict that integrates both IRIS data and INSEE indicators
-    """
-    short_fieldnames, long_fieldnames, indicators = xls_utils.parse_xls_to_dict(indicators_xls)
-    ''' # only store relevant indicators (those in config.indicators_ids)
-    sf = list(short_fieldnames)  # need to create a temp list
-    for field in sf:  
-        if field not in config.indicators_ids:  # todo : delete if we store all indicators
-            index_field = short_fieldnames.index(field)
-            del short_fieldnames[index_field]
-            del long_fieldnames[index_field]
-            for key in indicators.keys():
-                del indicators[key][field]
-    '''
 
-    res_iris_dict = dict(iris_dict)
-    for key, prop_values in indicators.items():
-        for feature in res_iris_dict["features"]:
-            if key == feature["properties"]["CODE_IRIS"]:  # indicator record concerns an iris
-                if config.geojson_indicators_label not in feature["properties"]:
-                    feature["properties"][config.geojson_indicators_label] = dict()
-                for prop, value in prop_values.items():
-                    if prop not in feature["properties"] and prop in config.metadata_about_iris: # adding a metadata
-                        feature["properties"][prop] = value
-                    if prop not in feature["properties"][config.geojson_indicators_label] and prop not in feature["properties"]:
-                        feature["properties"][config.geojson_indicators_label][prop] = value
-                break
-    return short_fieldnames, long_fieldnames, res_iris_dict
-
-
-def build_store_index(input_iris_dict, index_output_filepath):
-    """
-    Create an index for iris and store it in a JSON file.s
-    :param input_iris_dict: a dictionary with IRIS data (geojson format)
-    :param index_output_filepath: a filepath to the json file in which the index is stored
-    :return:
-    """
-    index = dict()
-    nb_iris = len(input_iris_dict["features"])
-    for i in range(0, nb_iris):
-        code_iris = input_iris_dict["features"][i]["properties"]["CODE_IRIS"]
-        index[code_iris] = i
-    json_utils.save_dict_to_json(index_output_filepath, index)  # store index
+def get_all_xlsx_files():
+    # generate a list of INSEE xlsx files to be integrated
+    insee_files = list()
+    for file in os.listdir(config.insee_dir):
+        filepath = os.path.join(config.insee_dir, file)
+        if os.path.isfile(filepath) and filepath.endswith(".xls"):
+            insee_files.append(filepath)
+    return insee_files
 
 
-def integrate_from_to(input_iris_dict, dict_indicators, iris_indicators_output_file):
+def integrate_xls_file(xls_file):
     """
-    Main integration program, integrates XLS data from a geojson dict and store the result as a geojson with indicators.
-    All iris are (possibly) enriched with raw and grouped indicators.
-    :param input_iris_dict: a dictionary with IRIS data (geojson format)
-    :param dict_indicators: a dictionary with information about indicators (shortname, longname, etc.)
-    :param iris_indicators_output_file: output filename for integrated geojson file
-    :return: nothing :(
+    Integrate data from the xsl file to update an IRIS and its indicators.
+    :param xls_file: a csv file path containing INSEE indicators
+    :return: nothing ;)
     """
+    indicator_metadata, indicators, source_metadata = xls_utils.parse_xls_to_dict(xls_file)
 
-    config.logger.info("Integrating raw indicators")
-    for f in config.indicators_files:  # integrate each xlsx INSEE file (both indicators in IRIS and indicators dict)
-        config.logger.info("Integration of xls INSEE file: " + f)
-        short_fields, long_fields, input_iris_dict = integrate_xls_file(input_iris_dict, f)
-        dict_indicators = build_dictionary_indicators(dict_indicators, short_fields, long_fields, f)
+    # update iris (metadata about the iris and values of its indicators)
+    '''
+    for code_iris, prop_values in indicators.items():
+        doc = connexion.get_iris_from_code(code_iris)
+        if doc is None:
+            print(f"Oops, doc was not found for iris {code_iris}")
+            # TODO should a new doc be added?
+        else:  # update the doc
+            doc_id = doc["_id"]
+            query_clause = {"_id": doc_id}
+            dict_updates = {}
+            for prop, value in prop_values.items():
+                if prop in config.labels_dictionary_init:  # metadata, not an indicator
+                    dict_updates["properties." + prop] = value
+                if prop not in config.labels_dictionary_init:  # indicator
+                    dict_updates["properties." + config.geojson_raw_indicators_label + "." + prop] = value
+            update_clause = {"$set": dict_updates}
+            #print(update_clause)
+            #connexion.update_one_document(connexion.collection_iris, query_clause, update_clause)
+    '''
 
-    # all xlsx files have been integrated, computing grouped indicators
-    config.logger.info("Computing grouped indicators")
-    dict_grouping_indicators = json_utils.parse_json_to_dict(config.grouping_indicators_file)  # how to group raw indicators
-    for iris in input_iris_dict["features"]:
-        if config.geojson_indicators_label in iris["properties"]:
-            grouped_indicators = regroupement_indicateurs.compute_grouped_indicators(iris["properties"]
-                                                            [config.geojson_indicators_label], dict_grouping_indicators)
-            iris["properties"][config.geojson_grouped_indicators_label] = grouped_indicators
+    # add the source metadata
+    doc = connexion.find_one_document(connexion.collection_sources, {"filename": xls_file})
+    if doc is None:
+        connexion.insert_one_document(connexion.collection_sources, source_metadata)
 
-    config.logger.info("Storing output file: " + iris_indicators_output_file)
-    json_utils.save_dict_to_json(iris_indicators_output_file, input_iris_dict)  # store enriched IRIS
-    # the dictionary has the same name, except it ends with "-dictionnaire.json" instead of "geojson"
-    dict_output_file = os.path.splitext(iris_indicators_output_file)[0] + "-dictionnaire.json"
-    config.logger.info("Storing output file: " + dict_output_file)
-    json_utils.save_dict_to_json(dict_output_file, dict_indicators)  # store dict indicators
-    index_output_file = os.path.splitext(iris_indicators_output_file)[0] + "-index.json"
-    config.logger.info("Building and storing output file: " + index_output_file)
-    build_store_index(input_iris_dict, index_output_file)  # create and store an index file (code_iris to iris)
+    # add the indicators labels
+    for ind in indicator_metadata:
+        short_name = ind[config.geojson_shortname_label]
+        doc = connexion.find_one_document(connexion.collection_indic, {config.geojson_shortname_label: short_name})
+        if doc is not None:  # only update field from_insee_files, $addToSet does not add duplicate values
+            connexion.update_one_document(connexion.collection_indic, {config.geojson_shortname_label: short_name},
+                                          {"$addToSet": {config.geojson_from_files_label: xls_file}})
+        else:  # add the document
+            connexion.insert_one_document(connexion.collection_indic, ind)
+    return True
 
 
+#########################
+# main integration script
+#########################
 
+if __name__ == '__main__':
 
+    logging.basicConfig(format='[%(levelname)s] - %(name)s - %(asctime)s : %(message)s')
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
 
-#######################
-# starting integration
-#######################
+    connexion = main.Mongiris()
 
-logging.basicConfig(format='[%(levelname)s] - %(name)s - %(asctime)s : %(message)s')
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
+    logger.info("Searching xlsx files...")
+    insee_files = get_all_xlsx_files()  # get the list of all xlsx files to be integrated
+    insee_files = [os.path.join(config.insee_dir, 'population-2014.xls'), os.path.join(config.insee_dir, 'logement-2014.xls')]
+    logger.info(f'Found {len(insee_files)} xlsx files to be integrated.')
 
-connexion_db = mongiris.Mongiris()
-logger.info("Reading dictionary files")
-dict_indicators = dict(config.indicators_dictionary_init)  # the initial dictionary (labels of main indicators)
-logger.info("Reading IRIS geojson file")
+    logger.info("Initializing dictionary files")
+    dict_labels = dict(config.labels_dictionary_init)  # the initial dictionary (labels of main indicators)
 
+    logger.info("Integrating sources files (metadata for source and indicators, data for iris)")
+    for file in insee_files:  # integrate each xlsx INSEE file (both indicators in IRIS and indicators dict)
+        logger.info(f"\t- INSEE xlsx file: {file}")
+        integrate_xls_file(file)
+
+    # TODO check why update is soooo long
+    # TODO update the grouped indicators of each document/iris
+    '''
+    config.logger.info("Computing grouped indicators")
+    dict_grouping_indicators = json_utils.parse_json_to_dict(config.grouping_indicators_file)  # how to group raw indicators
+    for iris in input_iris_dict["features"]:
+        if config.geojson_indicators_label in iris["properties"]:
+            grouped_indicators = regroupement_indicateurs.compute_grouped_indicators(iris["properties"]
+                                                                                     [config.geojson_indicators_label],
+                                                                                     dict_grouping_indicators)
+            iris["properties"][config.geojson_grouped_indicators_label] = grouped_indicators
+    '''
 
-#######################
-# end integration
-#######################
-logger.info("Done !")
+    logger.info("Done !")
diff --git a/mongiris/mongiris.py b/mongiris/main.py
similarity index 85%
rename from mongiris/mongiris.py
rename to mongiris/main.py
index 62a77f62edd1e870dc3401fdb8f53fc525d2e76b..adb2615ae92b4de34ed76a003621f65e602a18df 100755
--- a/mongiris/mongiris.py
+++ b/mongiris/main.py
@@ -3,6 +3,7 @@
 # =============================================================================
 # Abstraction layer for the MongoDB database
 # Performs operations such as find, update, convert_geojson_files, intersect, etc.
+# Some methods are not static because they require a valid DB connection (performed in __init__)
 # =============================================================================
 # Path to MongoDB tools (under MacOS): /Applications/MongoDB.app/Contents/Resources/Vendor/mongodb/bin/
 # Export et import d'une collection MongoDB (plus rapide, inclut index et métadonnes mais binaire, option --gzip)
@@ -20,7 +21,7 @@ import pymongo
 from bson import json_util  # used to convert BSON to JSON (especially ObjectId type of "_id")
 import json
 import logging
-from . import config
+from mongiris import config
 
 
 class Mongiris:
@@ -32,7 +33,8 @@ class Mongiris:
         self.connection = self.init_connection()  # default MongoDB connection on 'localhost', 27017
         self.database = self.connection[config.database_iris]  # database for HiL project
         self.collection_iris = self.database[config.collection_iris]
-        self.collection_meta = self.database[config.collection_meta]
+        self.collection_indic = self.database[config.collection_indic]
+        self.collection_sources = self.database[config.collection_sources]
 
     @staticmethod
     def bson_to_json(doc_bson):
@@ -53,16 +55,24 @@ class Mongiris:
             self.logger.error('Could not connect to the MongoDB database ! Have you launched MongoDB ? ' + str(e))
         return connection
 
-    def _parse_json_to_dict(self, json_file_path):
+    @staticmethod
+    def _parse_json_to_dict(json_file_path):
         with open(json_file_path) as data_file:
             data = json.load(data_file)
             data_file.close()
             return data
 
-    def _save_dict_to_json(self, json_file_path, dict_geo):
+    @staticmethod
+    def _save_dict_to_json(json_file_path, dict_geo):
         with open(json_file_path, 'w') as data_file:
             json.dump(dict_geo, data_file)
 
+    def create_index(self, iris_collection):
+        # this method is used in case of restoration/import
+        self.logger.info("Creating index on 'geometry' using " + pymongo.GEOSPHERE)
+        iris_collection.create_index([("geometry", pymongo.GEOSPHERE)])
+        self.logger.info("Index created")
+
     def count_documents(self, collection, json_query):
         """
         Counts the number of documents that satisfy json_query in the given collection
@@ -105,6 +115,36 @@ class Mongiris:
         doc_json = Mongiris.bson_to_json(random_iris)
         return doc_json
 
+    def update_one_document(self, collection, json_query, json_updates):
+        """
+        Updates the first document found by json_query by setting new values from json_updates
+        :param collection: the collection to update into
+        :param json_query: the query criteria
+        :param json_updates: a json document containing values to be updates (using $set operator)
+        :return: json_result: an UpdateResult json document containing information about the update
+        """
+        json_result = collection.update_one(json_query, json_updates)
+        return json_result
+
+    def insert_one_document(self, collection, doc):
+        """
+        Insert a new document in the collection
+        :param collection: the collection to add in
+        :param doc: the document to be added
+        :return: json_result: an InsertOneResult json document containing information about the insertion
+        """
+        json_result = collection.insert_one(doc)
+        return json_result  # eg, the new _id is in json_result.inserted_id
+
+    def delete_all(self, collection):
+        """
+        Delete all document in the collection. Careful
+        :param collection: the collection to empty
+        :return:
+        """
+        collection.delete_many({})  # empty collection
+        return True
+
     def geo_within(self, collection, geometry, json_projection=None):
         """
         Find all documents from given collection and which contain totally the given geometry
diff --git a/mongiris/tests/__init__.py b/mongiris/tests/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/mongiris/tests/mongoiris_tests.py b/mongiris/tests/mongiris_tests.py
similarity index 98%
rename from mongiris/tests/mongoiris_tests.py
rename to mongiris/tests/mongiris_tests.py
index 8a877e30a58bd60908a5a8b2a9660f382dafad94..74f609599fa8dbd40b06dd900839d5989e6c4529 100644
--- a/mongiris/tests/mongoiris_tests.py
+++ b/mongiris/tests/mongiris_tests.py
@@ -4,15 +4,16 @@
 # Unit tests for mongiris
 # =============================================================================
 
-import mongiris
+from mongiris.main import Mongiris
 import unittest
 import random
 import re
 
+
 class TestCase(unittest.TestCase):
 
     def setUp(self):
-        self.db = mongiris.Mongiris()
+        self.db = Mongiris()
 
     def test_count(self):
         count = self.db.count_documents(self.db.collection_iris, {})
diff --git a/mongiris/xls_utils.py b/mongiris/xls_utils.py
index 59c6c66ddd1e335a13edfa83e70fdb825c40f519..87450f8c4e95950c40d7664a75e170ebdf18bd54 100755
--- a/mongiris/xls_utils.py
+++ b/mongiris/xls_utils.py
@@ -1,15 +1,14 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# encoding: utf-8
 # =============================================================================
 #   XLS utilities, mainly for INSEE indicators (parsing Excel files, etc.) and mobilipass data from HiL
 #   https://pypi.python.org/pypi/openpyxl/
 #   https://xlrd.readthedocs.io/en/latest/api.html
 # =============================================================================
 
-from vizliris import config
-import unittest
 import sys
-import xlrd  # for old Excel files (.xls) and new files (.xlsx Excel 2010)
+from mongiris import config
+import xlrd # for old Excel files (.xls) and new files (.xlsx Excel 2010)
 #import openpyxl  # for new Excel files (.xlsx Excel 2010)
 
 
@@ -17,25 +16,45 @@ def parse_xls_to_dict(xls_file_path):
     """
     Parse an XLS file (excel/calc) produced by INSEE and containing indicators about IRIS.
     :param xls_file_path: the path to the XLS file to be parsed
-    :return: short_fieldnames: a list containing field ID (or abbreviated field names)
-    :return: long_fieldnames: a list containing the complete field names
-    :return: indicators: a dictionary such as {id_iris1: {ind1: val1, ind2: val2, ...}, id_iris2: {ind1: val1, ind2: val2, ...}, ...}
+    :return: indicator_metadata: a list of dict containing information about indicator labels (shortname, fullname)
+    :return: indicators: a dictionary such as {id_iris1: {ind1: val1, ind2: val2, ...}, id_iris2: {ind1: val1, ...}, ...}
+    :return: source_metadata: a dictionary containing metadata information about the document (title, filepath, etc.)
     """
-    logger = config.logging.getLogger(__name__)
     indicators = dict()
+    source_metadata = dict()  # metadata about the document (name, date mise en ligne, infoGeo, etc.)
+    indicator_metadata = list()  # list of dict, each containing metadata about an indicator (short and full labels)
     try:
         wb = xlrd.open_workbook(xls_file_path,
                                 ragged_rows=True)  # ragged_rows to True to avoid empty cells at the end of rows)
     except Exception as e:
-        sys.exit('Error while parsing XLS file {}: {}'.format(xls_file_path, e))
-    #sheet = wb.sheet_by_name("IRIS")  # data is stored in the sheet "IRIS", else wb.sheet_names()
+        sys.exit(f'Error while parsing xlsx file {xls_file_path}: {e}')
+    # sheet = wb.sheet_by_name("IRIS")  # data is stored in the sheet "IRIS", else wb.sheet_names()
     sheet = wb.sheet_by_index(0)  # sheet are sometimes called IRIS, also IRIS_DEC
+
+    # extracting  source metadata
+    source_metadata['filepath'] = xls_file_path  # filepath of the document
+    source_metadata['title'] = sheet.cell_value(0, 0)  # title of the document
+    source_metadata['infoGeo'] = sheet.cell_value(1, 0)  # geographic information (area + level of granularity)
+    cell_dates = sheet.cell_value(2, 0)
+    if cell_dates.startswith("Mise en ligne le "):
+        cell_dates = cell_dates[17:27]
+    source_metadata['datePublication'] = cell_dates  # date of online publication
+
+    # extracting labels/fieldnames
     long_fieldnames = sheet.row_values(4)  # row 4 contains the long labels
     short_fieldnames = sheet.row_values(5)  # row 5 contains the short labels (usually not meaningful)
+    for i in range(0, len(short_fieldnames)):
+        shortname = short_fieldnames[i]
+        longname = long_fieldnames[i]
+        ind_dict = {config.geojson_shortname_label: shortname, config.geojson_longname_label: longname,
+                    config.geojson_from_files_label: [xls_file_path]}
+        indicator_metadata.append(ind_dict)
+
+    # extracting indicators values
     nb_fields = len(short_fieldnames)
     for i in range(6, sheet.nrows, 1):
         iris_id = sheet.cell_value(i, 0)  # IRIS id is in the first column
-        #print(sheet.row_values(i))
+        # print(sheet.row_values(i))
         if sheet.row_len(i) == nb_fields:  # some rows may not include all fields
             if iris_id not in indicators:
                 indicators[iris_id] = dict()
@@ -44,12 +63,11 @@ def parse_xls_to_dict(xls_file_path):
                 val = sheet.cell_value(i, j)
                 indicators[iris_id][field] = val
         else:
-            logger.warning("Ignored row (missing fields) : " + str(sheet.row_values(i)))
-    return short_fieldnames, long_fieldnames, indicators
+            print("\tIgnored row (missing fields) : " + str(sheet.row_values(i)))
+    return indicator_metadata, indicators, source_metadata
 
 
-def parse_data_HiL_to_dict(xls_file_path):
-    logger = config.logging.getLogger(__name__)
+def parse_data_mobilipass_to_dict(xls_file_path):
     data = dict()
     try:
         wb = xlrd.open_workbook(xls_file_path,
@@ -69,31 +87,6 @@ def parse_data_HiL_to_dict(xls_file_path):
                 val = sheet.cell_value(i, j)
                 data[person_id][field] = val
         else:
-            logger.warning("Ignored row: " + str(sheet.row_values(i)))
+            print("Ignored row: " + str(sheet.row_values(i)))
     return long_fieldnames, data
 
-
-class TestCase(unittest.TestCase):
-
-    def atest_parse_xls_to_dict(self):
-        f = config.indicators_files[0]  # careful, no guarantee that the first file is always the same (new or deleted ones)
-        _, _, records = parse_xls_to_dict(f)
-        assert (len(records) == 14089), 'Error: expecting 14089 IRIS, extracted %i' % len(records)
-
-    def atest_parse_all_xls_to_dict(self):
-        for f in config.indicators_files:
-            print("Parsing: " + f)
-            parse_xls_to_dict(f)
-        print("Done !")
-
-    def test_parse_HiL_mobilipass(self):
-        f = config.xls_data_HiL_file_path
-        l, d = parse_data_HiL_to_dict(f)
-        print(l)
-        #print(d)
-
-
-if __name__ == "__main__":
-    unittest.main(verbosity=2)  # run all tests with verbose mode
-
-
diff --git a/setup.cfg b/setup.cfg
index f28c68b3c4c918bf1daf48e367d65c3a6b3243c8..00bc165c3bf84d5f46d48790a09e91972dc14af9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,7 +1,7 @@
 
 [metadata]
 name = mongiris
-version = 0.21
+version = 0.3
 description = This package is an interface for querying INSEE IRIS stored as documents in MongoDB. Requires loading the IRIS files into MongoDB prior to using this package.
 author = Fabien Duchateau
 author_email = fabien.duchateau@univ-lyon1.fr
@@ -15,6 +15,7 @@ include_package_data = True
 zip_safe = False
 install_requires =
     pymongo >= 3.7.2
+    xlrd >= 1.2.0
 
 [options.packages.find]
 exclude =