From e2cf5f705a236e82b0e278088fdf58b835db5805 Mon Sep 17 00:00:00 2001
From: Duchateau Fabien <fabien.duchateau@univ-lyon1.fr>
Date: Mon, 22 Mar 2021 14:58:29 +0100
Subject: [PATCH] [M] fix connection issues in tests.py and dummpy.py + update
 README

---
 README.md                   | 67 +++++++++++++++++++++++--------------
 import-data.sh              | 14 ++++----
 mongiris/tests/api_tests.py |  4 +--
 mongiris/tests/dummy.py     |  6 ++--
 4 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index eb50d6a..613361d 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,61 @@
 # mongiris package
 
-This Python package is an interface for querying French administrative areas ([IRIS](https://www.insee.fr/fr/metadonnees/definition/c1523), similar to neighborhoods) stored as documents in MongoDB.
+**Note: a web application (predihood) is available at [https://gitlab.com/fduchate/predihood](https://gitlab.com/fduchate/predihood) for visualizing (and predicting) neighbourhoods data.**
 
-Each IRIS includes indicators (e.g., average income, types of housings, number of bakeries or schools) that are useful for social sciences studies, for house/neighborhood recommendation, etc.
+This Python package is an interface for querying neighbourhoods stored as documents in MongoDB.
 
-In this package, the ~50,000 IRIS and their 350-650 indicators have been integrated and stored in the [GeoJSON format](https://geojson.org/), and an API enables the manipulation of these data.
+The package mongiris includes a dataset of 50,000 French neighbourhoods (`hil`) used in a [research project](https://imu.universite-lyon.fr/appels-en-cours-et-bilans/2017-en-cours/hil-artificial-intelligence-to-facilitate-property-searches-system-of-recommendations-with-spatial-and-non-spatial-visualisation-for-property-search-2017/) to predict the environment of a neighbourhood (e.g., social class, type of landscape) based on hundreds of indicators (about population, shops, buildings, etc.). In this context, neighbourhoods are perceived as [IRIS](https://www.insee.fr/fr/metadonnees/definition/c1523) (a French administrative unit area).
 
-## Prerequisites
+The tool also includes a small test dataset (`bird-migration`) to demonstrate how to add new datasets. 
+
+## Installation
+
+### Installation using Docker
+
+Check [https://gitlab.com/fduchate/predihood](https://gitlab.com/fduchate/predihood) for the Docker installation.
+
+### Manual installation
+
+Prerequisites :
 
 - Python, version >=3
-- [MongoDB](https://www.mongodb.com/), version >=4, in which it is necessary to import the IRIS database (see Installation).
+- [MongoDB](https://www.mongodb.com/), version >=4, in which it is necessary to import the datasets.
 
-## Installation
+First, clone the repository with the following command:
 
-To install mongiris (and its dependencies):
+```
+git clone https://gitlab.liris.cnrs.fr/fduchate/mongiris
+```
+
+To install mongiris (and its dependencies), go in the `mongiris` directory and run:
 
 ```
-python3 -m pip install git+https://fduchate@gitlab.liris.cnrs.fr/fduchate/mongiris.git#egg=mongiris
+python3 -m pip install -e .
 ```
 
+Then import datasets into the MongoDB database: run the MongoDB server (`mongod`) and execute the following commands (from the MongoDB's executable directory if needed):
 
-Next, you need to load the IRIS data into MongoDB (using the `mongorestore` tool).
-- download the [dump of the database](https://gitlab.liris.cnrs.fr/fduchate/mongiris/raw/master/mongiris/data/dump/dump-dbinsee.bin) (724 MB)
-- open a terminal and run:
 ```
-mongorestore --archive=/path/to/dump-dbinsee.bin
+# import dataset 'hil' as a MongoDB dump
+./mongorestore --archive=/path/to/dump-dbinsee.bin
+# import dataset 'bird-migration' as a collection of JSON documents
+./mongoimport --db=dbmigration -c=collmigration --file=/path/to/dump-bird-neighbourhoods.json	
+./mongoimport --db=dbmigration -c=collindic --file=/path/to/dump-bird-indicators.json
 ```
 
-where `/path/to/` indicates the path to the downloaded dump database. <!--(provided with the source package mongiris in `mongiris/data/dump/dump-dbinsee.bin`).--> 
-This restoration may take a few minutes as the geospatial indexes are rebuilt.
+where `/path/to/` is the path to the dataset files (provided with the package mongiris in `mongiris/data/dumps/`). A tip is to move the dataset files into the MongoDB binary (`PATH/TO/MONGODB/bin`). You may have to create these folders for Mongodb: `data/db` under `PATH/TO/MONGODB/bin` and run `./mongod --dbpath=./data/db`. 
+This restoration may take a few minutes as the geospatial indexes are rebuilt for dataset _hil_.
 
-## Usage
+## Datasets
+
+More details about the datasets are provided at [https://gitlab.com/fduchate/predihood](https://gitlab.com/fduchate/predihood).
 
-In MongoDB, the database is named `dbinsee`. It contains three collections: 
-- `collsources` stores information about original data sources (title, release date, geographical information)
-- `collindic` stores information about indicators (short label, full label, data sources in which it appears). 
-- `colliris` is the main collection, which stores each IRIS with its indicators (according to the [GeoJSON format](https://geojson.org/)) 
+## Usage
 
-To manipulate the database, simply connect to MongoDB by creating an object of the `Mongiris` class.
-Using this object, twenty methods are available for querying the data.
+To manipulate the database, simply connect to MongoDB (default on '127.0.0.1:27017') by creating an object of the `Mongiris` class.
+Using this object, around 20 methods are available for querying the data.
 
-Below is a minimal example of connection and queries (from `tests/dummy.py` file):
+Below is a minimal example of connection and queries (from `mongiris/tests/dummy.py` file):
 
 ```
 from mongiris.api import Mongiris
@@ -48,14 +63,14 @@ from mongiris.api import Mongiris
 db = Mongiris()
 
 # return the number of documents in a collection
-counts = db.count_documents(db.collection_indic, {})
+counts = db.count_documents(db.collection_indicators, {})
 
 # get complete information about iris identified with code 593500203
-iris = db.find_one_document(db.collection_iris, {"properties.CODE_IRIS": "593500203"})
+iris = db.find_one_document(db.collection_neighbourhoods, {"properties.CODE_IRIS": "593500203"})
 print(iris)
 
 # get iris which contains coordinates 3.685111, 46.514643
-iris2 = db.point_in_which_iris([3.685111, 46.514643])
+iris2 = db.point_in_which_neighbourhood([3.685111, 46.514643])
 print(iris2)
 ```
 
@@ -64,7 +79,7 @@ More examples, including testing geospatial queries, are available in the `tests
 
 ## Contributors
 
-- Fabien Duchateau, Franck Favetta (laboratory [LIRIS](https://liris.cnrs.fr/), Université Lyon 1)
+- Fabien Duchateau, Franck Favetta, Nelly Barret (laboratory [LIRIS](https://liris.cnrs.fr/), Université Lyon 1)
 
 - Loïc Bonneval (laboratory [CMW](https://www.centre-max-weber.fr/), Université Lyon 2)
 
diff --git a/import-data.sh b/import-data.sh
index e4953c4..68ecb8d 100644
--- a/import-data.sh
+++ b/import-data.sh
@@ -1,4 +1,4 @@
-# import-data.sh: restores mongodata if needed
+# import-data.sh: restores Mongo data if needed
 # mongod should be already launched by the container
 
 echo "Executing import-data.sh"
@@ -6,21 +6,21 @@ echo "Executing import-data.sh"
 dbs=$(mongo --eval 'db.getMongo().getDBNames()')
 echo "$dbs";
 
-# teste si la BD dbinsee existe, et importe les données du dataset dbinsee si besoin 
+# check whether database dbinsee (dataset hil) exists, and imports data if needed 
 if [ $(mongo --eval 'db.getMongo().getDBNames().indexOf("dbinsee")' --quiet) -lt 0 ]; then
-    echo "Database dbinsee does not exist, restoring dbinsee."
+    echo "Database dbinsee does not exist (dataset 'hil'), restoring dbinsee."
 	mongorestore --archive=/tmp/dumps/dump-dbinsee.bin;
 else
-    echo "Database dbinsee already exists, not restoring data."
+    echo "Database dbinsee already exists (dataset 'hil'), not restoring data."
 fi
 
-# teste si la BD dbmigration existe, et importe les données du dataset bird migration si besoin
+# check whether database dbmigration exists, and imports data if needed 
 if [ $(mongo --eval 'db.getMongo().getDBNames().indexOf("dbmigration")' --quiet) -lt 0 ]; then
-    echo "Database dbmigration does not exist, importing collections in dbmigration."
+    echo "Database dbmigration does not exist (dataset 'bird-migration'), importing collections in dbmigration."
 	mongoimport --db=dbmigration -c=collmigration --file=/tmp/dumps/dump-bird-neighbourhoods.json	
 	mongoimport --db=dbmigration -c=collindic --file=/tmp/dumps/dump-bird-indicators.json
 else
-    echo "Database dbmigration already exists, not restoring data."
+    echo "Database dbmigration already exists (dataset 'bird-migration'), not restoring data."
 fi
 
 echo "End of script import-data.sh"
diff --git a/mongiris/tests/api_tests.py b/mongiris/tests/api_tests.py
index bcde16e..3bd2191 100755
--- a/mongiris/tests/api_tests.py
+++ b/mongiris/tests/api_tests.py
@@ -13,8 +13,8 @@ import unittest
 import random
 import re
 
-host="db-mongihood"
-port=27017
+host = "localhost"
+port = 27017
 
 class TestCase(unittest.TestCase):
     """
diff --git a/mongiris/tests/dummy.py b/mongiris/tests/dummy.py
index 686ddcc..73711c5 100644
--- a/mongiris/tests/dummy.py
+++ b/mongiris/tests/dummy.py
@@ -12,13 +12,13 @@ import json
 db = Mongiris()
 
 # return the number of documents in a collection
-counts = db.count_documents(db.collection_indic, {})
+counts = db.count_documents(db.collection_indicators, {})
 
 # get complete information about iris identified with code 593500203
-iris = db.find_one_document(db.collection_iris, {"properties.CODE_IRIS": "593500203"})
+iris = db.find_one_document(db.collection_neighbourhoods, {"properties.CODE_IRIS": "593500203"})
 
 # get iris which contains coordinates 3.685111, 46.514643
-iris2 = db.point_in_which_iris([3.685111, 46.514643])
+iris2 = db.point_in_which_neighbourhood([3.685111, 46.514643])
 
 print(counts)
 print(iris)
-- 
GitLab