From 5c852d89113feafb53c3ac425086dfab60f37c54 Mon Sep 17 00:00:00 2001
From: Ludovic Moncla <300641+ludovicmoncla@users.noreply.github.com>
Date: Thu, 29 Sep 2022 10:57:54 +0200
Subject: [PATCH] Update Tutoriel-geoparsing.ipynb

---
 Tutoriel-geoparsing.ipynb | 1331 +++++++++++++++++++++++++++++++++++--
 1 file changed, 1262 insertions(+), 69 deletions(-)

diff --git a/Tutoriel-geoparsing.ipynb b/Tutoriel-geoparsing.ipynb
index 7ad8796..b41b762 100644
--- a/Tutoriel-geoparsing.ipynb
+++ b/Tutoriel-geoparsing.ipynb
@@ -114,6 +114,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# On définit une fonction qui prend en paramètre le chemin d'un fichier et qui retourne sont contenu\n",
+    "\n",
     "def load_txt(filepath):\n",
     "    with open(filepath) as f:\n",
     "        return f.read()"
@@ -125,6 +127,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# On utilise la fonction précédente pour récupérer le contenu de l'article encyclopédique 'Arques' (volume01-4083.txt) présent dans le dossier data\n",
     "arques = load_txt('data/volume01-4083.txt')"
    ]
   },
@@ -158,7 +161,7 @@
    "source": [
     "### 3.2 Chargement d'un jeu de données à partir de la librairie Perdido\n",
     "\n",
-    "La libraire de geoparsing Perdido embarque deux jeux de données : \n",
+    "La libraire de geoparsing [Perdido](https://github.com/ludovicmoncla/perdido) embarque deux jeux de données : \n",
     " 1. des articles encyclopédiques (volume 7 de l'Encyclopédie de Diderot et d'Alembert (1751-1772)), fournit par l'[ARTFL](https://encyclopedie.uchicago.edu) dans le cadre du projet [GEODE](https://geode-project.github.io) ;\n",
     " 2. des descriptions de randonnées (chaque description est associée à sa trace GPS. Elles proviennent du site [www.visorando.fr](https://www.visorando.com) et ont été collectées dans le cadre du projet [ANR CHOUCAS](http://choucas.ign.fr).\n",
     "\n",
@@ -450,7 +453,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -506,7 +509,7 @@
        "5  FRONTIGNAN, (Géog.) petite ville de France. au...  "
       ]
      },
-     "execution_count": 13,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -525,7 +528,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -555,7 +558,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -583,7 +586,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -694,7 +697,7 @@
        "38  Fruit, (art de conserver le) Economie rustiq. ...  "
       ]
      },
-     "execution_count": 17,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -722,7 +725,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -743,7 +746,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -780,25 +783,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "author\n",
-       "Desmarest                            1\n",
-       "Diderot                              1\n",
-       "Jaucourt                           476\n",
-       "La Condamine                         1\n",
-       "Mallet                               1\n",
-       "Robert de Vaugondy                   2\n",
-       "Robert de Vaugondy & d'Alembert      1\n",
-       "unsigned                            13\n",
+       "Anonymous5                 1\n",
+       "Beauzée & Douchet          1\n",
+       "Boucher d'Argis           14\n",
+       "Bouchu                     1\n",
+       "Desmarest                  1\n",
+       "Diderot                    2\n",
+       "Jaucourt                 141\n",
+       "Le Blond                   1\n",
+       "Le Blond & d'Alembert      1\n",
+       "Le Roy                     1\n",
+       "Lucotte5                   1\n",
+       "Mallet                     1\n",
+       "Quesnay                    1\n",
+       "Robert de Vaugondy         1\n",
+       "Tressan                    1\n",
+       "Voltaire                   2\n",
+       "d'Alembert                 2\n",
+       "d'Holbach                  1\n",
+       "unsigned                   3\n",
        "Name: filename, dtype: int64"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -818,12 +832,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 5. Reconnaissance d'Entités Nommées (NER)\n",
+    "## 4. Reconnaissance d'Entités Nommées (NER)\n",
     "\n",
     "La reconnaissance d'entités nommées, *Named Entity Recognition* (NER) en anglais, est une tâche très importante et incontournable en traitement automatique des langues (TAL) et en compréhension du langage naturel (NLU en anglais). \n",
     "Cette tâche consiste à rechercher des objets textuels (un mot, ou un groupe de mots, souvent associés aux noms propres) catégorisables dans des classes telles que noms de personnes, noms d'organisations ou d'entreprises, noms de lieux, quantités, distances, valeurs, dates, etc.\n",
+    "Les typologies et les jeux d'étiquettes sont dépendents de chaque outil.\n",
     "\n",
-    "Dans cet atelier nous allons expérimenter et comparer trois outils de NER. \n",
+    "Dans cet atelier nous allons expérimenter et comparer trois outils de NER :\n",
     "\n",
     "1. [Stanza](https://stanfordnlp.github.io/stanza/index.html)\n",
     "2. [spaCy](https://spacy.io)\n",
@@ -834,7 +849,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 5.1 Stanza NER\n",
+    "### 4.1 Stanza NER\n",
     "\n",
     "`Stanza` est une librairie Python de traitement du langage naturel. Elle contient des outils, qui peuvent être utilisés dans une chaîne de traitement, pour convertir du texte en listes de phrases et de mots, pour générer les formes de base de ces mots, leurs parties du discours et leurs caractéristiques morphologiques, pour produire une analyse syntaxique de dépendance, et pour reconnaître les entités nommées. \n",
     "\n",
@@ -847,17 +862,55 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "* Importer la librairie `Stanza` et télécharger le modèle pré-entrainé pour le français : "
+    "* Importer la librairie `Stanza` : "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import stanza\n",
-    "\n",
+    "import stanza"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* Télécharger le modèle pré-entrainé pour le français : "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e23cc371ca6e46e695d7e4200dbcee84",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.2.2.json:   0%|   …"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-09-29 08:23:00 INFO: Downloading default packages for language: fr (French)...\n",
+      "2022-09-29 08:23:01 INFO: File exists: /Users/lmoncla/stanza_resources/fr/default.zip.\n",
+      "2022-09-29 08:23:05 INFO: Finished downloading models and saved to /Users/lmoncla/stanza_resources.\n"
+     ]
+    }
+   ],
+   "source": [
     "stanza.download('fr')"
    ]
   },
@@ -870,9 +923,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-09-29 08:23:58 WARNING: Language fr package default expects mwt, which has been added\n",
+      "2022-09-29 08:23:58 INFO: Loading these models for language: fr (French):\n",
+      "=======================\n",
+      "| Processor | Package |\n",
+      "-----------------------\n",
+      "| tokenize  | gsd     |\n",
+      "| mwt       | gsd     |\n",
+      "| ner       | wikiner |\n",
+      "=======================\n",
+      "\n",
+      "2022-09-29 08:23:58 INFO: Use device: cpu\n",
+      "2022-09-29 08:23:58 INFO: Loading: tokenize\n",
+      "2022-09-29 08:23:58 INFO: Loading: mwt\n",
+      "2022-09-29 08:23:58 INFO: Loading: ner\n",
+      "2022-09-29 08:23:59 INFO: Done loading processors!\n"
+     ]
+    }
+   ],
    "source": [
     "stanza_parser = stanza.Pipeline(lang='fr', processors='tokenize,ner')"
    ]
@@ -886,9 +961,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* ARQUES, (Géog.) petite ville de France, en Normandie, au pays de Caux, sur la petite riviere d'Arques. Long. 18. 50. lat. 49. 54.\n"
+     ]
+    }
+   ],
    "source": [
     "print(arques)"
    ]
@@ -902,7 +985,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -918,10 +1001,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
+    "# On définit une fonction qui prend en paramètre le retour du traitement par Stanza, qui parcourt et affiche la liste des entités et leur type\n",
     "def show_ents(stanza_output):\n",
     "    for ent in stanza_output.ents:\n",
     "        print(ent.text, ent.type)"
@@ -929,10 +1013,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 27,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ARQUES LOC\n",
+      "France LOC\n",
+      "Normandie LOC\n",
+      "pays de Caux LOC\n",
+      "Arques LOC\n"
+     ]
+    }
+   ],
    "source": [
+    "# On utilise la fonction précédente pour afficher la liste des entités repérées\n",
     "show_ents(arques_stanza)"
    ]
   },
@@ -940,11 +1037,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 5.2 SpaCy NER\n",
+    "### 4.2 SpaCy NER\n",
     "\n",
     "\n",
     "`spaCy` est également une librairie Python de traitement du langage naturel. \n",
-    "Elle se compose de modèles pré-entrainés et supporte actuellement la tokenisation et l'entrainement pour plus de 60 langues. Elle est doté de modèles de réseaux neuronaux pour le balisage, l'analyse syntaxique, la reconnaissance d'entités nommées, la classification de textes, l'apprentissage multi-tâches avec des transformateurs pré-entraînés comme BERT, ainsi qu'un système d'entraînement prêt pour la production et un déploiement simple des modèles. `spaCy` est un logiciel commercial, publié en open-source sous la licence MIT.\n",
+    "Elle se compose de modèles pré-entrainés et supporte actuellement la tokenisation et l'entrainement pour plus de 60 langues. Elle est dotée de modèles de réseaux de neuronnes pour l'étiquettage, l'analyse syntaxique, la reconnaissance d'entités nommées, la classification de textes, l'apprentissage multi-tâches avec des transformateurs pré-entraînés comme BERT, ainsi qu'un système d'entraînement prêt pour la production et un déploiement simple des modèles. `spaCy` est un logiciel commercial, publié en open-source sous la licence MIT.\n",
     "\n",
     "Dans cette partie nous allons voir comment utiliser `spaCy` pour la reconnaissance d'entités nommées toujours à partir de notre exemple en français."
    ]
@@ -958,9 +1055,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting fr-core-news-sm==3.3.0\n",
+      "  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.3.0/fr_core_news_sm-3.3.0-py3-none-any.whl (16.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.3/16.3 MB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: spacy<3.4.0,>=3.3.0.dev0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from fr-core-news-sm==3.3.0) (3.3.1)\n",
+      "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (2.28.1)\n",
+      "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.9 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (3.0.10)\n",
+      "Requirement already satisfied: packaging>=20.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (21.3)\n",
+      "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (2.0.6)\n",
+      "Requirement already satisfied: jinja2 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (3.1.2)\n",
+      "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (2.4.4)\n",
+      "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (1.0.8)\n",
+      "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (4.64.1)\n",
+      "Requirement already satisfied: pathy>=0.3.5 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (0.6.2)\n",
+      "Requirement already satisfied: thinc<8.1.0,>=8.0.14 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (8.0.17)\n",
+      "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (3.3.0)\n",
+      "Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (0.7.8)\n",
+      "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (3.0.7)\n",
+      "Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (0.10.1)\n",
+      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (1.8.2)\n",
+      "Requirement already satisfied: setuptools in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (65.3.0)\n",
+      "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (2.0.8)\n",
+      "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (1.0.3)\n",
+      "Requirement already satisfied: numpy>=1.15.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (1.23.3)\n",
+      "Requirement already satisfied: typer<0.5.0,>=0.3.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (0.4.2)\n",
+      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from packaging>=20.0->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (3.0.9)\n",
+      "Requirement already satisfied: smart-open<6.0.0,>=5.2.1 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from pathy>=0.3.5->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (5.2.1)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (4.3.0)\n",
+      "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (2.1.1)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (2022.9.14)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (3.4)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (1.26.12)\n",
+      "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from typer<0.5.0,>=0.3.0->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (8.1.3)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39/lib/python3.9/site-packages (from jinja2->spacy<3.4.0,>=3.3.0.dev0->fr-core-news-sm==3.3.0) (2.1.1)\n",
+      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
+      "You can now load the package via spacy.load('fr_core_news_sm')\n"
+     ]
+    }
+   ],
    "source": [
     "!python -m spacy download fr_core_news_sm"
    ]
@@ -974,7 +1113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -990,7 +1129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1006,7 +1145,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1022,9 +1161,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ARQUES LOC\n",
+      "Géog LOC\n",
+      "de France LOC\n",
+      "Normandie LOC\n",
+      "pays de Caux LOC\n",
+      "Arques LOC\n",
+      "Long LOC\n",
+      "lat LOC\n"
+     ]
+    }
+   ],
    "source": [
     "for ent in arques_spacy.ents:\n",
     "    print(ent.text, ent.label_)"
@@ -1034,14 +1188,67 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "* Mais SpaCy fournit également une fonction pour effectuer un rendu plus graphique des annotations avec `displaCy` :"
+    "* `spaCy` fournit également une fonction pour effectuer un rendu plus graphique des annotations avec `displaCy` :"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">* \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    ARQUES\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ", (\n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Géog\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ".) petite ville \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    de France\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ", en \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Normandie\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ", au \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    pays de Caux\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ", sur la petite riviere d'\n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Arques\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ". \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Long\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ". 18. 50. \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    lat\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ". 49. 54.</div></span>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "displacy.render(arques_spacy, style=\"ent\", jupyter=True) "
    ]
@@ -1050,7 +1257,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 5.3 Perdido Geoparser"
+    "On remarque des différences entre les résultats de Stanza et de spaCy. En particulier spaCy repère trois entités à tord (faux positifs) : `Géog`, `Long` et `lat`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.3 Perdido Geoparser\n",
+    "\n",
+    "\n",
+    "`Perdido` est une librairie Python pour le geoparsing de texte en français. Le geoparsing se décompose en deux tâches : le geotagging et le geocoding. Le geotagging est similaire à la tâche de reconnaissance des entités nommées avec un focus particulier pour le repérage d'information spatiale. En plus des entités nommées, nous nous intéressons en particuliers aux relations entres ces entités telles que les relations spatiales (distances, topologie, orientation, etc.).\n",
+    "Le geocoding (ou résolution de toponymes) a pour rôle d'attribuer aux entités de lieux des coordonnées géographiques non ambigues.\n",
+    "`Perdido` s'appuie sur une approche hybride principalement construite à base de règles pour la repérage et la classification des entités nommées. La librairie est disponible en 2 versions : une version standard et une version spécialement adaptée pour les articles encyclopédiques.\n",
+    "\n",
+    "Dans cette partie nous allons voir comment utiliser `Perdido` pour la reconnaissance d'entités nommées toujours à partir de notre exemple `Arques`."
    ]
   },
   {
@@ -1062,7 +1283,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1078,13 +1299,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
     "arques_perdido = geoparser(arques)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Perdido effectuant la tâche de geocoding en plus du NER, le temps de traitement est plus long qu'avec Stanza ou spaCy, du fait de l'interrogation de ressources geographiques externes pour chaque nom de lieu repéré."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1094,9 +1322,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ARQUES place\n",
+      "France place\n",
+      "Normandie place\n",
+      "Caux place\n",
+      "Arques place\n",
+      "Long . 18 . 50 . lat . 49 . 54 . latlong\n"
+     ]
+    }
+   ],
    "source": [
     "for ent in arques_perdido.named_entities:\n",
     "    print(ent.text, ent.tag)"
@@ -1111,9 +1352,52 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">* \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    ARQUES\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       " , ( Géog . ) petite ville de \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    France\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       " , en \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Normandie\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       " , au pays de \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Caux\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       " , sur la petite riviere d' \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Arques\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       " . \n",
+       "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Long . 18 . 50 . lat . 49 . 54 .\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n",
+       "</mark>\n",
+       " </div></span>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "displacy.render(arques_perdido.to_spacy_doc(), style=\"ent\", jupyter=True)"
    ]
@@ -1122,14 +1406,352 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "* Un rendu similaire mais qui permet de visualiser les inclusions (`style=\"ent\"` -> `style=\"span\"`) :"
+    "* Un rendu similaire mais qui permet de visualiser les entités imbriquées (`style=\"ent\"` -> `style=\"span\"`) :"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<span class=\"tex2jax_ignore\"><div class=\"spans\" style=\"line-height: 2.5; direction: ltr\">\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    *\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ddd; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        MISC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    ARQUES\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       ", ( Géog . ) \n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    petite\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    ville\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    de\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    France\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       ", en \n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    Normandie\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       ", au \n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    pays\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    de\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    Caux\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       ", sur \n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    la\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    petite\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    riviere\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    d'\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    Arques\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       ". \n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    Long\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ddd; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        MISC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    18\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    50\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    lat\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    49\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    54\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "</div></span>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "displacy.render(arques_perdido.to_spacy_doc(), style=\"span\", jupyter=True)"
    ]
@@ -1138,16 +1760,39 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 5.4 Expérimentations et comparaison"
+    "Cet exemple permet d'illustrer les différences qu'il peut y avoir entre des outils de NER généraliste et ou un outil de geoparsing. On observe ici que Perdido permet une annotation plus fine grâce aux entités imbriquées (ville de, petite rivière) ainsi que le repérage des coordonnées géographiques. En fonction du besoin le repérage de ces éléments peut etre utile pour les traitements suivants ou les analyses qui s'appuient sur ces résultats. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.4 Expérimentations et comparaison"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* Charger l'article `Beaufort` (volume 2, numéro 1365) disponible dans le dossier `data` :"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Beaufort, (Géog.) ville de Savoie, sur la riviere \n",
+      "d'Oron. Long. 24. 18. lat. 45. 40.\n"
+     ]
+    }
+   ],
    "source": [
-    "beaufort = load('data/volume02-1365.txt')\n",
+    "beaufort = load_txt('data/volume02-1365.txt')\n",
     "\n",
     "print(beaufort)"
    ]
@@ -1161,9 +1806,301 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">* \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Beaufort\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       " , ( Géog . ) ville de \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Savoie\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       " , sur la riviere d' \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Oron\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       " . \n",
+       "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Long . 24 . 18 . lat . 45 . 40 .\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n",
+       "</mark>\n",
+       " </div></span>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<span class=\"tex2jax_ignore\"><div class=\"spans\" style=\"line-height: 2.5; direction: ltr\">\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    *\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ddd; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        MISC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    Beaufort\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       ", ( Géog . ) \n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    ville\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    de\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    Savoie\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       ", sur \n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    la\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    riviere\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    d'\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    Oron\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        LOC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       ". \n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    Long\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "    <span style=\"background: #ddd; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n",
+       "        MISC\n",
+       "    </span>\n",
+       "</span>\n",
+       "\n",
+       "\n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    24\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    18\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    lat\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    45\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    40\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "\n",
+       "<span style=\"font-weight: bold; display: inline-block; position: relative;\">\n",
+       "    .\n",
+       "    \n",
+       "<span style=\"background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n",
+       "</span>\n",
+       "\n",
+       "    \n",
+       "</span>\n",
+       "</div></span>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "beaufort_perdido = geoparser(beaufort)\n",
     "displacy.render(beaufort_perdido.to_spacy_doc(), style=\"ent\", jupyter=True)\n",
@@ -1179,9 +2116,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">* \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Beaufort\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ", (\n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Géog\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ".) ville de \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Savoie\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ", sur la riviere </br>d'Oron. Long. 24. 18. \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    lat\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ". 45. 40.</div></span>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "beaufort_spacy = spacy_parser(beaufort)\n",
     "displacy.render(beaufort_spacy, style=\"ent\", jupyter=True) "
@@ -1191,17 +2161,240 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "La largeur de ligne du texte brut, due à la largeur de la colonne dans l'œuvre originale, semble avoir été conservée. Essayons de «lisser» ces caractéristiques pour voir s'il est possible d'améliorer la reconnaissance."
+    "Le retour à la ligne entre `riviere` et `d'Oron` est due à la largeur de la colonne dans l'œuvre originale. \n",
+    "Ce retour semble perturber spaCy qui ne reconnait pas `Oron` comme une entité nommée.\n",
+    "\n",
+    "![CNRS](img/beaufort_originale.png)\n",
+    "\n",
+    "Pour vérifier cette hypothèse, modifions le texte en supprimant ce saut de ligne pour voir s'il est possible d'améliorer la reconnaissance."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "# packages in environment at /usr/local/Caskroom/miniforge/base/envs/tdm-geoparsing-py39:\n",
+      "#\n",
+      "# Name                    Version                   Build  Channel\n",
+      "appnope                   0.1.3                    pypi_0    pypi\n",
+      "argon2-cffi               21.3.0                   pypi_0    pypi\n",
+      "argon2-cffi-bindings      21.2.0                   pypi_0    pypi\n",
+      "asttokens                 2.0.8                    pypi_0    pypi\n",
+      "attrs                     22.1.0                   pypi_0    pypi\n",
+      "backcall                  0.2.0                    pypi_0    pypi\n",
+      "beautifulsoup4            4.11.1                   pypi_0    pypi\n",
+      "bleach                    5.0.1                    pypi_0    pypi\n",
+      "branca                    0.5.0                    pypi_0    pypi\n",
+      "brotlipy                  0.7.0           py39h63b48b0_1004    conda-forge\n",
+      "bzip2                     1.0.8                h0d85af4_4    conda-forge\n",
+      "ca-certificates           2022.9.24            h033912b_0    conda-forge\n",
+      "catalogue                 2.0.8            py39h6e9494a_0    conda-forge\n",
+      "certifi                   2022.9.14                pypi_0    pypi\n",
+      "cffi                      1.15.1           py39hae9ecf2_0    conda-forge\n",
+      "charset-normalizer        2.1.1              pyhd8ed1ab_0    conda-forge\n",
+      "click                     8.1.3            py39h6e9494a_0    conda-forge\n",
+      "click-plugins             1.1.1                    pypi_0    pypi\n",
+      "cligj                     0.7.2                    pypi_0    pypi\n",
+      "colorama                  0.4.5              pyhd8ed1ab_0    conda-forge\n",
+      "contourpy                 1.0.5                    pypi_0    pypi\n",
+      "cryptography              37.0.4           py39h9c2a9ce_0    conda-forge\n",
+      "cycler                    0.11.0                   pypi_0    pypi\n",
+      "cymem                     2.0.6            py39hfd1d529_3    conda-forge\n",
+      "cython-blis               0.7.8            py39h15b18c7_0    conda-forge\n",
+      "dataclasses               0.8                pyhc8e2a94_3    conda-forge\n",
+      "debugpy                   1.6.3                    pypi_0    pypi\n",
+      "decorator                 5.1.1                    pypi_0    pypi\n",
+      "defusedxml                0.7.1                    pypi_0    pypi\n",
+      "entrypoints               0.4                      pypi_0    pypi\n",
+      "executing                 1.0.0                    pypi_0    pypi\n",
+      "fastjsonschema            2.16.2                   pypi_0    pypi\n",
+      "fiona                     1.8.21                   pypi_0    pypi\n",
+      "folium                    0.12.1.post1             pypi_0    pypi\n",
+      "fonttools                 4.37.3                   pypi_0    pypi\n",
+      "fr-core-news-sm           3.3.0                    pypi_0    pypi\n",
+      "geojson                   2.5.0                    pypi_0    pypi\n",
+      "geopandas                 0.11.1                   pypi_0    pypi\n",
+      "gpxpy                     1.5.0                    pypi_0    pypi\n",
+      "idna                      3.4                pyhd8ed1ab_0    conda-forge\n",
+      "importlib-metadata        4.12.0                   pypi_0    pypi\n",
+      "ipykernel                 6.15.3                   pypi_0    pypi\n",
+      "ipython                   8.5.0                    pypi_0    pypi\n",
+      "ipython-genutils          0.2.0                    pypi_0    pypi\n",
+      "ipywidgets                8.0.2                    pypi_0    pypi\n",
+      "jedi                      0.18.1                   pypi_0    pypi\n",
+      "jinja2                    3.1.2              pyhd8ed1ab_1    conda-forge\n",
+      "joblib                    1.2.0                    pypi_0    pypi\n",
+      "jsonschema                4.16.0                   pypi_0    pypi\n",
+      "jupyter                   1.0.0                    pypi_0    pypi\n",
+      "jupyter-client            7.3.5                    pypi_0    pypi\n",
+      "jupyter-console           6.4.4                    pypi_0    pypi\n",
+      "jupyter-core              4.11.1                   pypi_0    pypi\n",
+      "jupyterlab-pygments       0.2.2                    pypi_0    pypi\n",
+      "jupyterlab-widgets        3.0.3                    pypi_0    pypi\n",
+      "kiwisolver                1.4.4                    pypi_0    pypi\n",
+      "langcodes                 3.3.0              pyhd8ed1ab_0    conda-forge\n",
+      "libblas                   3.9.0           16_osx64_openblas    conda-forge\n",
+      "libcblas                  3.9.0           16_osx64_openblas    conda-forge\n",
+      "libcxx                    14.0.6               hccf4f1f_0    conda-forge\n",
+      "libffi                    3.4.2                h0d85af4_5    conda-forge\n",
+      "libgfortran               5.0.0           10_4_0_h97931a8_25    conda-forge\n",
+      "libgfortran5              11.3.0              h082f757_25    conda-forge\n",
+      "liblapack                 3.9.0           16_osx64_openblas    conda-forge\n",
+      "libopenblas               0.3.21          openmp_h429af6e_3    conda-forge\n",
+      "libsqlite                 3.39.3               ha978bb4_0    conda-forge\n",
+      "libzlib                   1.2.12               hfd90126_3    conda-forge\n",
+      "llvm-openmp               14.0.4               ha654fa7_0    conda-forge\n",
+      "lxml                      4.9.1                    pypi_0    pypi\n",
+      "markupsafe                2.1.1            py39h63b48b0_1    conda-forge\n",
+      "matplotlib                3.6.0                    pypi_0    pypi\n",
+      "matplotlib-inline         0.1.6                    pypi_0    pypi\n",
+      "mistune                   2.0.4                    pypi_0    pypi\n",
+      "munch                     2.5.0                    pypi_0    pypi\n",
+      "murmurhash                1.0.8            py39hd91caee_0    conda-forge\n",
+      "nbclient                  0.6.8                    pypi_0    pypi\n",
+      "nbconvert                 7.0.0                    pypi_0    pypi\n",
+      "nbformat                  5.5.0                    pypi_0    pypi\n",
+      "ncurses                   6.3                  h96cf925_1    conda-forge\n",
+      "nest-asyncio              1.5.5                    pypi_0    pypi\n",
+      "notebook                  6.4.12                   pypi_0    pypi\n",
+      "numpy                     1.23.3           py39h34843a6_0    conda-forge\n",
+      "openssl                   1.1.1q               hfe4f2af_0    conda-forge\n",
+      "packaging                 21.3               pyhd8ed1ab_0    conda-forge\n",
+      "pandas                    1.5.0                    pypi_0    pypi\n",
+      "pandocfilters             1.5.0                    pypi_0    pypi\n",
+      "parso                     0.8.3                    pypi_0    pypi\n",
+      "pathy                     0.6.2              pyhd8ed1ab_0    conda-forge\n",
+      "perdido                   0.1.27                   pypi_0    pypi\n",
+      "pexpect                   4.8.0                    pypi_0    pypi\n",
+      "pickleshare               0.7.5                    pypi_0    pypi\n",
+      "pillow                    9.2.0                    pypi_0    pypi\n",
+      "pip                       22.2.2             pyhd8ed1ab_0    conda-forge\n",
+      "preshed                   3.0.7            py39hd91caee_0    conda-forge\n",
+      "prometheus-client         0.14.1                   pypi_0    pypi\n",
+      "prompt-toolkit            3.0.31                   pypi_0    pypi\n",
+      "protobuf                  4.21.6                   pypi_0    pypi\n",
+      "psutil                    5.9.2                    pypi_0    pypi\n",
+      "ptyprocess                0.7.0                    pypi_0    pypi\n",
+      "pure-eval                 0.2.2                    pypi_0    pypi\n",
+      "pycparser                 2.21               pyhd8ed1ab_0    conda-forge\n",
+      "pydantic                  1.8.2                    pypi_0    pypi\n",
+      "pygments                  2.13.0                   pypi_0    pypi\n",
+      "pyopenssl                 22.0.0             pyhd8ed1ab_1    conda-forge\n",
+      "pyparsing                 3.0.9              pyhd8ed1ab_0    conda-forge\n",
+      "pyproj                    3.4.0                    pypi_0    pypi\n",
+      "pyrsistent                0.18.1                   pypi_0    pypi\n",
+      "pysocks                   1.7.1              pyha2e5f31_6    conda-forge\n",
+      "python                    3.9.13          h57e37ff_0_cpython    conda-forge\n",
+      "python-dateutil           2.8.2                    pypi_0    pypi\n",
+      "python_abi                3.9                      2_cp39    conda-forge\n",
+      "pytz                      2022.2.1                 pypi_0    pypi\n",
+      "pyzmq                     24.0.1                   pypi_0    pypi\n",
+      "qtconsole                 5.3.2                    pypi_0    pypi\n",
+      "qtpy                      2.2.0                    pypi_0    pypi\n",
+      "readline                  8.1.2                h3899abd_0    conda-forge\n",
+      "requests                  2.28.1             pyhd8ed1ab_1    conda-forge\n",
+      "scikit-learn              1.1.2                    pypi_0    pypi\n",
+      "scipy                     1.9.1                    pypi_0    pypi\n",
+      "send2trash                1.8.0                    pypi_0    pypi\n",
+      "setuptools                65.3.0             pyhd8ed1ab_1    conda-forge\n",
+      "shapely                   1.8.4                    pypi_0    pypi\n",
+      "shellingham               1.5.0              pyhd8ed1ab_0    conda-forge\n",
+      "six                       1.16.0                   pypi_0    pypi\n",
+      "smart_open                5.2.1              pyhd8ed1ab_0    conda-forge\n",
+      "soupsieve                 2.3.2.post1              pypi_0    pypi\n",
+      "spacy                     3.3.1                    pypi_0    pypi\n",
+      "spacy-legacy              3.0.10             pyhd8ed1ab_0    conda-forge\n",
+      "spacy-loggers             1.0.3              pyhd8ed1ab_0    conda-forge\n",
+      "sqlite                    3.39.3               h9ae0607_0    conda-forge\n",
+      "srsly                     2.4.4            py39hd408605_0    conda-forge\n",
+      "stack-data                0.5.0                    pypi_0    pypi\n",
+      "stanza                    1.2.3                    pypi_0    pypi\n",
+      "terminado                 0.15.0                   pypi_0    pypi\n",
+      "thinc                     8.0.17                   pypi_0    pypi\n",
+      "threadpoolctl             3.1.0                    pypi_0    pypi\n",
+      "tinycss2                  1.1.1                    pypi_0    pypi\n",
+      "tk                        8.6.12               h5dbffcc_0    conda-forge\n",
+      "torch                     1.12.1                   pypi_0    pypi\n",
+      "tornado                   6.2                      pypi_0    pypi\n",
+      "tqdm                      4.64.1             pyhd8ed1ab_0    conda-forge\n",
+      "traitlets                 5.4.0                    pypi_0    pypi\n",
+      "typer                     0.4.2              pyhd8ed1ab_0    conda-forge\n",
+      "typing-extensions         4.3.0                hd8ed1ab_0    conda-forge\n",
+      "typing_extensions         4.3.0              pyha770c72_0    conda-forge\n",
+      "tzdata                    2022c                h191b570_0    conda-forge\n",
+      "urllib3                   1.26.12                  pypi_0    pypi\n",
+      "wasabi                    0.10.1                   pypi_0    pypi\n",
+      "wcwidth                   0.2.5                    pypi_0    pypi\n",
+      "webencodings              0.5.1                    pypi_0    pypi\n",
+      "wheel                     0.37.1             pyhd8ed1ab_0    conda-forge\n",
+      "widgetsnbextension        4.0.3                    pypi_0    pypi\n",
+      "xz                        5.2.6                h775f41a_0    conda-forge\n",
+      "zipp                      3.8.1                    pypi_0    pypi\n"
+     ]
+    }
+   ],
+   "source": [
+    "!conda list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">* \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Beaufort\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ", (\n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Géog\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ".) ville de \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Savoie\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ", sur la riviere d'\n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Oron\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ". \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    Long\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ". 24. 18. \n",
+       "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
+       "    lat\n",
+       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n",
+       "</mark>\n",
+       ". 45. 40.</div></span>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "normalized_beaufort = beaufort.replace('\\n', '')\n",
+    "\n",
     "normalized_beaufort_spacy = spacy_parser(normalized_beaufort)\n",
+    "\n",
     "displacy.render(normalized_beaufort_spacy, style=\"ent\", jupyter=True) "
    ]
   },
@@ -1784,7 +2977,7 @@
    "toc_visible": true
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.9.13 ('tdm-geoparsing-py39')",
    "language": "python",
    "name": "python3"
   },
@@ -1802,7 +2995,7 @@
   },
   "vscode": {
    "interpreter": {
-    "hash": "ac6d00d97418b1a9db80d25edbc60018a184534aba90cc2f485de2791198ec07"
+    "hash": "6acc50d30b16b763dee1f4b66a6227d64d7935f29ff57904338346e9b44e9e56"
    }
   }
  },
-- 
GitLab