From e2cf5f705a236e82b0e278088fdf58b835db5805 Mon Sep 17 00:00:00 2001 From: Duchateau Fabien <fabien.duchateau@univ-lyon1.fr> Date: Mon, 22 Mar 2021 14:58:29 +0100 Subject: [PATCH] [M] fix connection issues in tests.py and dummpy.py + update README --- README.md | 67 +++++++++++++++++++++++-------------- import-data.sh | 14 ++++---- mongiris/tests/api_tests.py | 4 +-- mongiris/tests/dummy.py | 6 ++-- 4 files changed, 53 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index eb50d6a..613361d 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,61 @@ # mongiris package -This Python package is an interface for querying French administrative areas ([IRIS](https://www.insee.fr/fr/metadonnees/definition/c1523), similar to neighborhoods) stored as documents in MongoDB. +**Note: a web application (predihood) is available at [https://gitlab.com/fduchate/predihood](https://gitlab.com/fduchate/predihood) for visualizing (and predicting) neighbourhoods data.** -Each IRIS includes indicators (e.g., average income, types of housings, number of bakeries or schools) that are useful for social sciences studies, for house/neighborhood recommendation, etc. +This Python package is an interface for querying neighbourhoods stored as documents in MongoDB. -In this package, the ~50,000 IRIS and their 350-650 indicators have been integrated and stored in the [GeoJSON format](https://geojson.org/), and an API enables the manipulation of these data. +The package mongiris includes a dataset of 50,000 French neighbourhoods (`hil`) used in a [research project](https://imu.universite-lyon.fr/appels-en-cours-et-bilans/2017-en-cours/hil-artificial-intelligence-to-facilitate-property-searches-system-of-recommendations-with-spatial-and-non-spatial-visualisation-for-property-search-2017/) to predict the environment of a neighbourhood (e.g., social class, type of landscape) based on hundreds of indicators (about population, shops, buildings, etc.). In this context, neighbourhoods are perceived as [IRIS](https://www.insee.fr/fr/metadonnees/definition/c1523) (a French administrative unit area). -## Prerequisites +The tool also includes a small test dataset (`bird-migration`) to demonstrate how to add new datasets. + +## Installation + +### Installation using Docker + +Check [https://gitlab.com/fduchate/predihood](https://gitlab.com/fduchate/predihood) for the Docker installation. + +### Manual installation + +Prerequisites : - Python, version >=3 -- [MongoDB](https://www.mongodb.com/), version >=4, in which it is necessary to import the IRIS database (see Installation). +- [MongoDB](https://www.mongodb.com/), version >=4, in which it is necessary to import the datasets. -## Installation +First, clone the repository with the following command: -To install mongiris (and its dependencies): +``` +git clone https://gitlab.liris.cnrs.fr/fduchate/mongiris +``` + +To install mongiris (and its dependencies), go in the `mongiris` directory and run: ``` -python3 -m pip install git+https://fduchate@gitlab.liris.cnrs.fr/fduchate/mongiris.git#egg=mongiris +python3 -m pip install -e . ``` +Then import datasets into the MongoDB database: run the MongoDB server (`mongod`) and execute the following commands (from the MongoDB's executable directory if needed): -Next, you need to load the IRIS data into MongoDB (using the `mongorestore` tool). -- download the [dump of the database](https://gitlab.liris.cnrs.fr/fduchate/mongiris/raw/master/mongiris/data/dump/dump-dbinsee.bin) (724 MB) -- open a terminal and run: ``` -mongorestore --archive=/path/to/dump-dbinsee.bin +# import dataset 'hil' as a MongoDB dump +./mongorestore --archive=/path/to/dump-dbinsee.bin +# import dataset 'bird-migration' as a collection of JSON documents +./mongoimport --db=dbmigration -c=collmigration --file=/path/to/dump-bird-neighbourhoods.json +./mongoimport --db=dbmigration -c=collindic --file=/path/to/dump-bird-indicators.json ``` -where `/path/to/` indicates the path to the downloaded dump database. <!--(provided with the source package mongiris in `mongiris/data/dump/dump-dbinsee.bin`).--> -This restoration may take a few minutes as the geospatial indexes are rebuilt. +where `/path/to/` is the path to the dataset files (provided with the package mongiris in `mongiris/data/dumps/`). A tip is to move the dataset files into the MongoDB binary (`PATH/TO/MONGODB/bin`). You may have to create these folders for Mongodb: `data/db` under `PATH/TO/MONGODB/bin` and run `./mongod --dbpath=./data/db`. +This restoration may take a few minutes as the geospatial indexes are rebuilt for dataset _hil_. -## Usage +## Datasets + +More details about the datasets are provided at [https://gitlab.com/fduchate/predihood](https://gitlab.com/fduchate/predihood). -In MongoDB, the database is named `dbinsee`. It contains three collections: -- `collsources` stores information about original data sources (title, release date, geographical information) -- `collindic` stores information about indicators (short label, full label, data sources in which it appears). -- `colliris` is the main collection, which stores each IRIS with its indicators (according to the [GeoJSON format](https://geojson.org/)) +## Usage -To manipulate the database, simply connect to MongoDB by creating an object of the `Mongiris` class. -Using this object, twenty methods are available for querying the data. +To manipulate the database, simply connect to MongoDB (default on '127.0.0.1:27017') by creating an object of the `Mongiris` class. +Using this object, around 20 methods are available for querying the data. -Below is a minimal example of connection and queries (from `tests/dummy.py` file): +Below is a minimal example of connection and queries (from `mongiris/tests/dummy.py` file): ``` from mongiris.api import Mongiris @@ -48,14 +63,14 @@ from mongiris.api import Mongiris db = Mongiris() # return the number of documents in a collection -counts = db.count_documents(db.collection_indic, {}) +counts = db.count_documents(db.collection_indicators, {}) # get complete information about iris identified with code 593500203 -iris = db.find_one_document(db.collection_iris, {"properties.CODE_IRIS": "593500203"}) +iris = db.find_one_document(db.collection_neighbourhoods, {"properties.CODE_IRIS": "593500203"}) print(iris) # get iris which contains coordinates 3.685111, 46.514643 -iris2 = db.point_in_which_iris([3.685111, 46.514643]) +iris2 = db.point_in_which_neighbourhood([3.685111, 46.514643]) print(iris2) ``` @@ -64,7 +79,7 @@ More examples, including testing geospatial queries, are available in the `tests ## Contributors -- Fabien Duchateau, Franck Favetta (laboratory [LIRIS](https://liris.cnrs.fr/), Université Lyon 1) +- Fabien Duchateau, Franck Favetta, Nelly Barret (laboratory [LIRIS](https://liris.cnrs.fr/), Université Lyon 1) - Loïc Bonneval (laboratory [CMW](https://www.centre-max-weber.fr/), Université Lyon 2) diff --git a/import-data.sh b/import-data.sh index e4953c4..68ecb8d 100644 --- a/import-data.sh +++ b/import-data.sh @@ -1,4 +1,4 @@ -# import-data.sh: restores mongodata if needed +# import-data.sh: restores Mongo data if needed # mongod should be already launched by the container echo "Executing import-data.sh" @@ -6,21 +6,21 @@ echo "Executing import-data.sh" dbs=$(mongo --eval 'db.getMongo().getDBNames()') echo "$dbs"; -# teste si la BD dbinsee existe, et importe les données du dataset dbinsee si besoin +# check whether database dbinsee (dataset hil) exists, and imports data if needed if [ $(mongo --eval 'db.getMongo().getDBNames().indexOf("dbinsee")' --quiet) -lt 0 ]; then - echo "Database dbinsee does not exist, restoring dbinsee." + echo "Database dbinsee does not exist (dataset 'hil'), restoring dbinsee." mongorestore --archive=/tmp/dumps/dump-dbinsee.bin; else - echo "Database dbinsee already exists, not restoring data." + echo "Database dbinsee already exists (dataset 'hil'), not restoring data." fi -# teste si la BD dbmigration existe, et importe les données du dataset bird migration si besoin +# check whether database dbmigration exists, and imports data if needed if [ $(mongo --eval 'db.getMongo().getDBNames().indexOf("dbmigration")' --quiet) -lt 0 ]; then - echo "Database dbmigration does not exist, importing collections in dbmigration." + echo "Database dbmigration does not exist (dataset 'bird-migration'), importing collections in dbmigration." mongoimport --db=dbmigration -c=collmigration --file=/tmp/dumps/dump-bird-neighbourhoods.json mongoimport --db=dbmigration -c=collindic --file=/tmp/dumps/dump-bird-indicators.json else - echo "Database dbmigration already exists, not restoring data." + echo "Database dbmigration already exists (dataset 'bird-migration'), not restoring data." fi echo "End of script import-data.sh" diff --git a/mongiris/tests/api_tests.py b/mongiris/tests/api_tests.py index bcde16e..3bd2191 100755 --- a/mongiris/tests/api_tests.py +++ b/mongiris/tests/api_tests.py @@ -13,8 +13,8 @@ import unittest import random import re -host="db-mongihood" -port=27017 +host = "localhost" +port = 27017 class TestCase(unittest.TestCase): """ diff --git a/mongiris/tests/dummy.py b/mongiris/tests/dummy.py index 686ddcc..73711c5 100644 --- a/mongiris/tests/dummy.py +++ b/mongiris/tests/dummy.py @@ -12,13 +12,13 @@ import json db = Mongiris() # return the number of documents in a collection -counts = db.count_documents(db.collection_indic, {}) +counts = db.count_documents(db.collection_indicators, {}) # get complete information about iris identified with code 593500203 -iris = db.find_one_document(db.collection_iris, {"properties.CODE_IRIS": "593500203"}) +iris = db.find_one_document(db.collection_neighbourhoods, {"properties.CODE_IRIS": "593500203"}) # get iris which contains coordinates 3.685111, 46.514643 -iris2 = db.point_in_which_iris([3.685111, 46.514643]) +iris2 = db.point_in_which_neighbourhood([3.685111, 46.514643]) print(counts) print(iris) -- GitLab