diff --git a/Tutoriel-geoparsing.ipynb b/Tutoriel-geoparsing.ipynb index fec22156a7f761e5d74590075e0e80e480496db6..e91f397f9afcc7ecad5fac9fc639e379ef50a67d 100644 --- a/Tutoriel-geoparsing.ipynb +++ b/Tutoriel-geoparsing.ipynb @@ -121,7 +121,9 @@ "metadata": {}, "outputs": [], "source": [ - "filepath = 'data/volume01-4083.txt'" + "def load(filepath):\n", + " with open(filepath) as f:\n", + " return f.read()" ] }, { @@ -130,8 +132,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open(filepath) as f:\n", - " content = f.read()" + "arques = load('data/volume01-4083.txt')" ] }, { @@ -155,7 +156,7 @@ } ], "source": [ - "print(content)" + "print(arques)" ] }, { @@ -512,14 +513,14 @@ "5 FRONTIGNAN, (Géog.) petite ville de France. au... " ] }, - "execution_count": 11, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frontignan = data_artfl.loc[data_artfl['head'] == 'FRONTIGNAN']\n", - "frontignan " + "frontignan" ] }, { @@ -890,7 +891,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "* On utilise la variable `content` qui contient le texte chargé précédemment à partir du fichier" + "* On utilise la variable `arques` qui contient le texte chargé précédemment à partir du fichier" ] }, { @@ -907,7 +908,7 @@ } ], "source": [ - "print(content)" + "print(arques)" ] }, { @@ -923,14 +924,25 @@ "metadata": {}, "outputs": [], "source": [ - "doc = stanza_parser(content)" + "arques_stanza = stanza_parser(arques)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "* Afficher la liste des entités nommées repérées :" + "* Afficher la liste des entités nommées repérées. Avec Stanza, le résultat de l'analyse est un itérateur:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": {}, + "outputs": [], + "source": [ + "def show_ents(stanza_output):\n", + " for ent in stanza_output.ents:\n", + " print(ent.text, ent.type)" ] }, { @@ -951,8 +963,7 @@ } ], "source": [ - "for ent in doc.ents:\n", - " print(ent.text, ent.type)" + "show_ents(arques_stanza)" ] }, { @@ -1029,14 +1040,14 @@ "metadata": {}, "outputs": [], "source": [ - "doc = spacy_parser(content)" + "arques_spacy = spacy_parser(arques)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "* Afficher la liste des entités nommées repérées :" + "* Afficher la liste des entités nommées repérées. Les sorties de SpaCy sont dans un format similaire à celui de Stanza mais les étiquettes sont portées par l'attribut `label_` et pas `type`:" ] }, { @@ -1060,7 +1071,7 @@ } ], "source": [ - "for ent in doc.ents:\n", + "for ent in arques_spacy.ents:\n", " print(ent.text, ent.label_)" ] }, @@ -1068,7 +1079,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "* Afficher de manière graphique les entités nommées avec `displaCy` :" + "* Mais SpaCy fournit également une fonction pour effectuer un rendu plus graphique des annotations avec `displaCy` :" ] }, { @@ -1130,7 +1141,7 @@ } ], "source": [ - "displacy.render(doc, style=\"ent\", jupyter=True) " + "displacy.render(arques_spacy, style=\"ent\", jupyter=True) " ] }, { @@ -1169,7 +1180,7 @@ "metadata": {}, "outputs": [], "source": [ - "doc = geoparser(content)" + "arques_perdido = geoparser(arques)" ] }, { @@ -1198,7 +1209,7 @@ } ], "source": [ - "for ent in doc.named_entities:\n", + "for ent in arques_perdido.named_entities:\n", " print(ent.text, ent.tag)" ] }, @@ -1206,7 +1217,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "* Afficher de manière graphique les entités nommées avec `displaCy` :" + "* Afficher de manière graphique les entités nommées avec `displaCy` grâce à la méthode de conversion `to_spacy_doc`:" ] }, { @@ -1258,14 +1269,14 @@ } ], "source": [ - "displacy.render(doc.to_spacy_doc(), style=\"ent\", jupyter=True)" + "displacy.render(arques_perdido.to_spacy_doc(), style=\"ent\", jupyter=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "* Afficher de manière graphique les entités nommées étendues avec `displaCy` :" + "* Un rendu similaire mais qui permet de visualiser les inclusions (`style=\"ent\"` -> `style=\"span\"`) :" ] }, { @@ -1612,7 +1623,7 @@ } ], "source": [ - "displacy.render(doc.to_spacy_doc(), style=\"span\", jupyter=True)" + "displacy.render(arques_perdido.to_spacy_doc(), style=\"span\", jupyter=True)" ] }, { @@ -1637,11 +1648,9 @@ } ], "source": [ - "filepath = 'data/volume02-1365.txt'\n", - "with open(filepath) as f:\n", - " beaufort_article = f.read()\n", + "beaufort = load('data/volume02-1365.txt')\n", "\n", - "print(beaufort_article)" + "print(beaufort)" ] }, { @@ -1949,9 +1958,9 @@ } ], "source": [ - "doc = geoparser(beaufort_article)\n", - "displacy.render(doc.to_spacy_doc(), style=\"ent\", jupyter=True)\n", - "displacy.render(doc.to_spacy_doc(), style=\"span\", jupyter=True)" + "beaufort_perdido = geoparser(beaufort)\n", + "displacy.render(beaufort_perdido.to_spacy_doc(), style=\"ent\", jupyter=True)\n", + "displacy.render(beaufort_perdido.to_spacy_doc(), style=\"span\", jupyter=True)" ] }, { @@ -2000,14 +2009,16 @@ } ], "source": [ - "doc = spacy_parser(beaufort_article)\n", - "displacy.render(doc, style=\"ent\", jupyter=True) " + "beaufort_spacy = spacy_parser(beaufort)\n", + "displacy.render(beaufort_spacy, style=\"ent\", jupyter=True) " ] }, { "cell_type": "markdown", "metadata": {}, - "source": [] + "source": [ + "La largeur de ligne du texte brut, due à la largeur de la colonne dans l'œuvre originale, semble avoir été conservée. Essayons de «lisser» ces caractéristiques pour voir s'il est possible d'améliorer la reconnaissance." + ] }, { "cell_type": "code", @@ -2058,15 +2069,17 @@ } ], "source": [ - "beaufort_norm = beaufort_article.replace('\\n', '')\n", - "doc = spacy_parser(beaufort_norm)\n", - "displacy.render(doc, style=\"ent\", jupyter=True) " + "normalized_beaufort = beaufort.replace('\\n', '')\n", + "normalized_beaufort_spacy = spacy_parser(normalized_beaufort)\n", + "displacy.render(normalized_beaufort_spacy, style=\"ent\", jupyter=True) " ] }, { "cell_type": "markdown", "metadata": {}, - "source": [] + "source": [ + "Apparemment ça n'améliore rien, mais il manque encore l'accent à «rivière»." + ] }, { "cell_type": "code", @@ -2117,16 +2130,25 @@ } ], "source": [ - "beaufort_norm = beaufort_norm.replace('riviere', 'rivière')\n", - "doc = spacy_parser(beaufort_norm)\n", - "displacy.render(doc, style=\"ent\", jupyter=True) " + "normalized_beaufort = normalized_beaufort.replace('riviere', 'rivière')\n", + "normalized_beaufort_spacy = spacy_parser(normalized_beaufort)\n", + "displacy.render(normalized_beaufort_spacy, style=\"ent\", jupyter=True) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cette fois l'entité étendue incluant le nom commun «rivière» a été reconnu par SpaCy, qui a pu ainsi corriger le type de l'entité nommée et se rendre compte que l'Oron était un endroit et pas une personne.\n", + "\n", + "Essayons maintenant avec Stanza." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "* Stanza" + "- Stanza" ] }, { @@ -2139,258 +2161,686 @@ "output_type": "stream", "text": [ "Beaufort LOC\n", + "Géog LOC\n", "Savoie LOC\n", "Oron LOC\n" ] } ], "source": [ - "doc = stanza_parser(beaufort_article)\n", - "for ent in doc.ents:\n", - " print(ent.text, ent.type)" + "beaufort_stanza = stanza_parser(beaufort)\n", + "show_ents(beaufort_stanza)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 6. Geoparsing / Geocoding\n", + "Stanza a directement repéré que l'Oron était un lieu mais veut, comme SpaCy, annoter «Géog» qui ne devrait pas l'être.\n", "\n", - "En complément de la tâche de reconnaissance des entités nommées la librairie `Perdido` propose également celle de résolution des toponymes, on parle alors de *Geoparsing*. Cette tâche consiste a associer à un nom de lieu des coordonnées géographiques non ambigus. De manière classique elle s'appuie sur le repérage des entités spatiales identifées lors de la reconnaissance des entités nommées et fait appel à des ressources externes de type *gazetier* (ou dictionnaires topographique) pour localiser les lieux." + "Regardons maintenant ce que l'on dit sur la même ville de Beaufort un peu plus d'un siècle plus tard, fin XIXème siecle, dans [La Grande Encyclopédie](https://www.collexpersee.eu/projet/disco-lge/) (LGE)." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BEAUFORT ou Beaufort-sur-Doron ou Saint-Maxime-\n", + "de-Bf.aufort. Ch.-l. de cant. du dép. de la Savoie, arr.\n", + "d’Albertville, au débouché de trois vallées dont les tor¬\n", + "rents forment le Doron de Beaufort ; 2,393 hab. Les\n", + "superbes pâturages de la vallée nourrissent de nombreux\n", + "troupeaux ; il se fait à Beaufort un commerce important de\n", + "fromages et de bestiaux. Un assez grand nombre d’habi¬\n", + "tants quittent le pays pendant l’hiver. De l’ancien château\n", + "de la Salle qu’Henri IV habita à deux reprises pendant la\n", + "guerre qu’il soutint contre le duc de Savoie, il subsiste\n", + "trois tours. Ancienne chapelle, reconstruite en 1841,\n", + "qui est le but d’un pèlerinage très fréquenté.\n", + "\n" + ] + } + ], "source": [ - "### 6.1 Perdido Geoparser" + "lge_beaufort = load('data/beaufort.txt')\n", + "print(lge_beaufort)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "* On re-execute Perdido sur l'exemple de l'article `ARQUES`" + "Cette fois l'article est un peu plus long et comporte des césures de lignes importantes, définissons donc une fonction pour recoller les morceaux:" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "content = \"* ARQUES, (Géog.) petite ville de France, en Normandie, au pays de Caux, sur la petite riviere d'Arques. Long. 18. 50. lat. 49. 54.\"\n", - "doc = geoparser(content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* En plus de pouvoir afficher la liste des entités nommées comme nous l'avons fait précédemmment, nous pouvons directement afficher la carte des lieux localisés" + "def join_lines(s):\n", + " return s.replace('¬\\n', '').replace('-\\n', '').replace('\\n', ' ')" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">* \n", - "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", - " ARQUES\n", - " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", - "</mark>\n", - " , ( Géog . ) petite ville de \n", - "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", - " France\n", - " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", - "</mark>\n", - " , en \n", - "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", - " Normandie\n", - " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", - "</mark>\n", - " , au pays de \n", - "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", - " Caux\n", - " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", - "</mark>\n", - " , sur la petite riviere d' \n", - "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", - " Arques\n", - " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", - "</mark>\n", - " . \n", - "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", - " Long . 18 . 50 . lat . 49 . 54 .\n", - " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n", - "</mark>\n", - " </div></span>" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "displacy.render(doc.to_spacy_doc(), style=\"ent\", jupyter=True)" + "lge_beaufort_perdido = geoparser(join_lines(lge_beaufort))" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "<div style=\"width:100%;\"><div style=\"position:relative;width:100%;height:0;padding-bottom:60%;\"><span style=\"color:#565656\">Make this Notebook Trusted to load map: File -> Trust Notebook</span><iframe srcdoc=\"<!DOCTYPE html>\n", - "<head> \n", - " <meta http-equiv="content-type" content="text/html; charset=UTF-8" />\n", + "<span class=\"tex2jax_ignore\"><div class=\"spans\" style=\"line-height: 2.5; direction: ltr\">\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " BEAUFORT\n", " \n", - " <script>\n", - " L_NO_TOUCH = false;\n", - " L_DISABLE_3D = false;\n", - " </script>\n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", " \n", - " <style>html, body {width: 100%;height: 100%;margin: 0;padding: 0;}</style>\n", - " <style>#map {position:absolute;top:0;bottom:0;right:0;left:0;}</style>\n", - " <script src="https://cdn.jsdelivr.net/npm/leaflet@1.6.0/dist/leaflet.js"></script>\n", - " <script src="https://code.jquery.com/jquery-1.12.4.min.js"></script>\n", - " <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js"></script>\n", - " <script src="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.js"></script>\n", - " <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/leaflet@1.6.0/dist/leaflet.css"/>\n", - " <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css"/>\n", - " <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap-theme.min.css"/>\n", - " <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css"/>\n", - " <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.css"/>\n", - " <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/python-visualization/folium/folium/templates/leaflet.awesome.rotate.min.css"/>\n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "ou \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Beaufort-sur-Doron\n", " \n", - " <meta name="viewport" content="width=device-width,\n", - " initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />\n", - " <style>\n", - " #map_6dbf584961a53874bc9d540badef0612 {\n", - " position: relative;\n", - " width: 100.0%;\n", - " height: 100.0%;\n", - " left: 0.0%;\n", - " top: 0.0%;\n", - " }\n", - " </style>\n", - " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", " \n", - " <style>\n", - " .foliumtooltip {\n", - " \n", - " }\n", - " .foliumtooltip table{\n", - " margin: auto;\n", - " }\n", - " .foliumtooltip tr{\n", - " text-align: left;\n", - " }\n", - " .foliumtooltip th{\n", - " padding: 2px; padding-right: 8px;\n", - " }\n", - " </style>\n", - " \n", - "</head>\n", - "<body> \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "ou \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Saint-Maxime-\n", " \n", - " <div class="folium-map" id="map_6dbf584961a53874bc9d540badef0612" ></div>\n", - " \n", - "</body>\n", - "<script> \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", " \n", - " var map_6dbf584961a53874bc9d540badef0612 = L.map(\n", - " "map_6dbf584961a53874bc9d540badef0612",\n", - " {\n", - " center: [0, 0],\n", - " crs: L.CRS.EPSG3857,\n", - " zoom: 1,\n", - " zoomControl: true,\n", - " preferCanvas: false,\n", - " }\n", - " );\n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", "\n", - " \n", "\n", - " \n", + "</span>\n", + "de-Bf.aufort . \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Ch.-l\n", " \n", - " var tile_layer_2f85a522000c17f5b0d7c28778e46700 = L.tileLayer(\n", - " "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png",\n", - " {"attribution": "Data by \\u0026copy; \\u003ca href=\\"http://openstreetmap.org\\"\\u003eOpenStreetMap\\u003c/a\\u003e, under \\u003ca href=\\"http://www.openstreetmap.org/copyright\\"\\u003eODbL\\u003c/a\\u003e.", "detectRetina": false, "maxNativeZoom": 18, "maxZoom": 18, "minZoom": 0, "noWrap": false, "opacity": 1, "subdomains": "abc", "tms": false}\n", - " ).addTo(map_6dbf584961a53874bc9d540badef0612);\n", - " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", " \n", - " map_6dbf584961a53874bc9d540badef0612.fitBounds(\n", - " [[43.509654, 0.313853], [50.739664, 3.363576]],\n", - " {}\n", - " );\n", - " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + ". de cant . du dép . de la \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Savoie\n", " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", "\n", - " function geo_json_281df9179c3992231a899246639ff237_onEachFeature(feature, layer) {\n", - " layer.on({\n", - " });\n", - " };\n", - " var geo_json_281df9179c3992231a899246639ff237 = L.geoJson(null, {\n", - " onEachFeature: geo_json_281df9179c3992231a899246639ff237_onEachFeature,\n", - " \n", - " });\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", "\n", - " function geo_json_281df9179c3992231a899246639ff237_add (data) {\n", - " geo_json_281df9179c3992231a899246639ff237\n", - " .addData(data)\n", - " .addTo(map_6dbf584961a53874bc9d540badef0612);\n", - " }\n", - " geo_json_281df9179c3992231a899246639ff237_add({"features": [{"geometry": {"coordinates": [2.306207, 50.739664], "type": "Point"}, "properties": {"country": "France", "id": "en.3", "name": "ARQUES", "source": "nominatim", "sourceName": "Arques, Saint-Omer, Pas-de-Calais, Hauts-de-France, France m\\u00e9tropolitaine, 62510, France", "type": "administrative"}, "type": "Feature"}, {"geometry": {"coordinates": [1.888334, 46.603354], "type": "Point"}, "properties": {"country": "France", "id": "en.7", "name": "France", "source": "nominatim", "sourceName": "France", "type": "administrative"}, "type": "Feature"}, {"geometry": {"coordinates": [0.313853, 49.067771], "type": "Point"}, "properties": {"country": "France", "id": "en.9", "name": "Normandie", "source": "nominatim", "sourceName": "Normandie, France m\\u00e9tropolitaine, France", "type": "administrative"}, "type": "Feature"}, {"geometry": {"coordinates": [3.363576, 43.509654], "type": "Point"}, "properties": {"country": "France", "id": "en.13", "name": "Caux", "source": "nominatim", "sourceName": "Caux, B\\u00e9ziers, H\\u00e9rault, Occitanie, France m\\u00e9tropolitaine, 34720, France", "type": "administrative"}, "type": "Feature"}, {"geometry": {"coordinates": [2.306207, 50.739664], "type": "Point"}, "properties": {"country": "France", "id": "en.17", "name": "Arques", "source": "nominatim", "sourceName": "Arques, Saint-Omer, Pas-de-Calais, Hauts-de-France, France m\\u00e9tropolitaine, 62510, France", "type": "administrative"}, "type": "Feature"}], "type": "FeatureCollection"});\n", "\n", - " \n", + "</span>\n", + ", arr . d' \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Albertville\n", " \n", - " geo_json_281df9179c3992231a899246639ff237.bindTooltip(\n", - " function(layer){\n", - " let div = L.DomUtil.create('div');\n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", " \n", - " let handleObject = feature=>typeof(feature)=='object' ? JSON.stringify(feature) : feature;\n", - " let fields = ["name", "source"];\n", - " let aliases = ["name", "source"];\n", - " let table = '<table>' +\n", - " String(\n", - " fields.map(\n", - " (v,i)=>\n", - " `<tr>\n", - " <th>${aliases[i].toLocaleString()}</th>\n", - " \n", - " <td>${handleObject(layer.feature.properties[v]).toLocaleString()}</td>\n", - " </tr>`).join(''))\n", - " +'</table>';\n", - " div.innerHTML=table;\n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + ", au débouché de trois vallées dont les torrents forment le \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Doron\n", " \n", - " return div\n", - " }\n", - " ,{"className": "foliumtooltip", "sticky": true});\n", - " \n", - "</script>\" style=\"position:absolute;width:100%;height:100%;left:0;top:0;border:none !important;\" allowfullscreen webkitallowfullscreen mozallowfullscreen></iframe></div></div>" - ], - "text/plain": [ - "<folium.folium.Map at 0x1393fac40>" - ] + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " de\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Beaufort\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + "; 2,393 hab . Les superbes pâturages de la vallée nourrissent de nombreux troupeaux ; il se fait à \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Beaufort\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "un commerce important de fromages et de bestiaux . Un assez grand nombre d' habitants quittent le pays pendant l' hiver . De l' \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " ancien\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " château\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " de\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " la\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 77px;\">\n", + " Salle\n", + " \n", + "<span style=\"background: #ff9561; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "qu' \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " Henri\n", + " \n", + "<span style=\"background: #aa9cfc; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "<span style=\"background: #aa9cfc; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #aa9cfc; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " PERSON\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " IV\n", + " \n", + "<span style=\"background: #aa9cfc; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + "habita à deux reprises pendant la guerre qu' il soutint contre \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " le\n", + " \n", + "<span style=\"background: #aa9cfc; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "<span style=\"background: #aa9cfc; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #aa9cfc; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " PERSON\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " duc\n", + " \n", + "<span style=\"background: #aa9cfc; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " de\n", + " \n", + "<span style=\"background: #aa9cfc; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 77px;\">\n", + " Savoie\n", + " \n", + "<span style=\"background: #aa9cfc; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + "<span style=\"background: #ff9561; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "<span style=\"background: #ff9561; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #ff9561; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " LOC\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + ", il subsiste trois tours . Ancienne chapelle , reconstruite \n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " en\n", + " \n", + "<span style=\"background: #bfe1d9; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "<span style=\"background: #bfe1d9; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + " <span style=\"background: #bfe1d9; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px\">\n", + " DATE\n", + " </span>\n", + "</span>\n", + "\n", + "\n", + "</span>\n", + "\n", + "<span style=\"font-weight: bold; display: inline-block; position: relative; height: 60px;\">\n", + " 1841\n", + " \n", + "<span style=\"background: #bfe1d9; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;\">\n", + "</span>\n", + "\n", + " \n", + "</span>\n", + ", qui est le but d' un pèlerinage très fréquenté . </div></span>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "displacy.render(lge_beaufort_perdido.to_spacy_doc(), style=\"span\", jupyter=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [], + "source": [ + "lge_beaufort_spacy = spacy_parser(join_lines(lge_beaufort))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n", + "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " BEAUFORT\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n", + "</mark>\n", + " ou \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Beaufort-sur-Doron\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " ou Saint-Maximede-Bf.aufort. Ch.-l. de cant. du dép. de la \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Savoie\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + ", arr. \n", + "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " d’Albertville\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", + "</mark>\n", + ", au débouché de trois vallées dont les torrents forment le \n", + "<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Doron de Beaufort\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", + "</mark>\n", + " ; 2,393 hab. Les superbes pâturages de la vallée nourrissent de nombreux troupeaux ; il se fait à \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Beaufort\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " un commerce important de fromages et de bestiaux. Un assez grand nombre d’habitants quittent le pays pendant l’hiver. De l’ancien \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " château de la Salle\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " qu’Henri IV habita à deux reprises pendant la guerre qu’il soutint contre le \n", + "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " duc de Savoie\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", + "</mark>\n", + ", il subsiste trois tours. Ancienne chapelle, reconstruite en 1841, qui est le but d’un pèlerinage très fréquenté. </div></span>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] }, - "execution_count": 42, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "displacy.render(lge_beaufort_spacy, style=\"ent\", jupyter=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BEAUFORT LOC\n", + "Beaufort-sur-Doron LOC\n", + "Saint-Maxime-\n", + "de-Bf.aufort PER\n", + "Savoie LOC\n", + "Albertville LOC\n", + "Doron de Beaufort LOC\n", + "Beaufort LOC\n", + "château\n", + "de la Salle LOC\n", + "Henri IV PER\n", + "duc de Savoie PER\n" + ] + } + ], + "source": [ + "lge_beaufort_stanza = stanza_parser(lge_beaufort)\n", + "show_ents(lge_beaufort_stanza)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "L'analyse prend plus de temps avec Stanza mais les résultats ont l'air un peu plus précis sur cet exemple. Il y a également une meilleure couverture: Henri IV et 1841 sont annotés, comme avec Perdido, jusqu'à Saint-Maxime-de-Bf.aufort qui a été identifié malgré l'erreur d'OCR, bien que mal classé." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Geoparsing / Geocoding\n", + "\n", + "En complément de la tâche de reconnaissance des entités nommées la librairie `Perdido` propose également celle de résolution des toponymes, on parle alors de *Geoparsing*. Cette tâche consiste a associer à un nom de lieu des coordonnées géographiques non ambigus. De manière classique elle s'appuie sur le repérage des entités spatiales identifées lors de la reconnaissance des entités nommées et fait appel à des ressources externes de type *gazetier* (ou dictionnaires topographique) pour localiser les lieux." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6.1 Perdido Geoparser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Revenons à l'article `ARQUES`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* ARQUES, (Géog.) petite ville de France, en Normandie, au pays de Caux, sur la petite riviere d'Arques. Long. 18. 50. lat. 49. 54.\n" + ] + }, + { + "data": { + "text/html": [ + "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">* \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " ARQUES\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " , ( Géog . ) petite ville de \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " France\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " , en \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Normandie\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " , au pays de \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Caux\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " , sur la petite riviere d' \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Arques\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " . \n", + "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Long . 18 . 50 . lat . 49 . 54 .\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n", + "</mark>\n", + " </div></span>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(arques)\n", + "displacy.render(arques_perdido.to_spacy_doc(), style=\"ent\", jupyter=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* En plus de pouvoir afficher la liste des entités nommées comme nous l'avons fait précédemmment, nous pouvons directement afficher la carte des lieux localisés" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div style=\"width:100%;\"><div style=\"position:relative;width:100%;height:0;padding-bottom:60%;\"><iframe src=\"data:text/html;charset=utf-8;base64,PCFET0NUWVBFIGh0bWw+CjxoZWFkPiAgICAKICAgIDxtZXRhIGh0dHAtZXF1aXY9ImNvbnRlbnQtdHlwZSIgY29udGVudD0idGV4dC9odG1sOyBjaGFyc2V0PVVURi04IiAvPgogICAgCiAgICAgICAgPHNjcmlwdD4KICAgICAgICAgICAgTF9OT19UT1VDSCA9IGZhbHNlOwogICAgICAgICAgICBMX0RJU0FCTEVfM0QgPSBmYWxzZTsKICAgICAgICA8L3NjcmlwdD4KICAgIAogICAgPHN0eWxlPmh0bWwsIGJvZHkge3dpZHRoOiAxMDAlO2hlaWdodDogMTAwJTttYXJnaW46IDA7cGFkZGluZzogMDt9PC9zdHlsZT4KICAgIDxzdHlsZT4jbWFwIHtwb3NpdGlvbjphYnNvbHV0ZTt0b3A6MDtib3R0b206MDtyaWdodDowO2xlZnQ6MDt9PC9zdHlsZT4KICAgIDxzY3JpcHQgc3JjPSJodHRwczovL2Nkbi5qc2RlbGl2ci5uZXQvbnBtL2xlYWZsZXRAMS42LjAvZGlzdC9sZWFmbGV0LmpzIj48L3NjcmlwdD4KICAgIDxzY3JpcHQgc3JjPSJodHRwczovL2NvZGUuanF1ZXJ5LmNvbS9qcXVlcnktMS4xMi40Lm1pbi5qcyI+PC9zY3JpcHQ+CiAgICA8c2NyaXB0IHNyYz0iaHR0cHM6Ly9tYXhjZG4uYm9vdHN0cmFwY2RuLmNvbS9ib290c3RyYXAvMy4yLjAvanMvYm9vdHN0cmFwLm1pbi5qcyI+PC9zY3JpcHQ+CiAgICA8c2NyaXB0IHNyYz0iaHR0cHM6Ly9jZG5qcy5jbG91ZGZsYXJlLmNvbS9hamF4L2xpYnMvTGVhZmxldC5hd2Vzb21lLW1hcmtlcnMvMi4wLjIvbGVhZmxldC5hd2Vzb21lLW1hcmtlcnMuanMiPjwvc2NyaXB0PgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL2Nkbi5qc2RlbGl2ci5uZXQvbnBtL2xlYWZsZXRAMS42LjAvZGlzdC9sZWFmbGV0LmNzcyIvPgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL21heGNkbi5ib290c3RyYXBjZG4uY29tL2Jvb3RzdHJhcC8zLjIuMC9jc3MvYm9vdHN0cmFwLm1pbi5jc3MiLz4KICAgIDxsaW5rIHJlbD0ic3R5bGVzaGVldCIgaHJlZj0iaHR0cHM6Ly9tYXhjZG4uYm9vdHN0cmFwY2RuLmNvbS9ib290c3RyYXAvMy4yLjAvY3NzL2Jvb3RzdHJhcC10aGVtZS5taW4uY3NzIi8+CiAgICA8bGluayByZWw9InN0eWxlc2hlZXQiIGhyZWY9Imh0dHBzOi8vbWF4Y2RuLmJvb3RzdHJhcGNkbi5jb20vZm9udC1hd2Vzb21lLzQuNi4zL2Nzcy9mb250LWF3ZXNvbWUubWluLmNzcyIvPgogICAgPGxpbmsgcmVsPSJzdHlsZXNoZWV0IiBocmVmPSJodHRwczovL2NkbmpzLmNsb3VkZmxhcmUuY29tL2FqYXgvbGlicy9MZWFmbGV0LmF3ZXNvbWUtbWFya2Vycy8yLjAuMi9sZWFmbGV0LmF3ZXNvbWUtbWFya2Vycy5jc3MiLz4KICAgIDxsaW5rIHJlbD0ic3R5bGVzaGVldCIgaHJlZj0iaHR0cHM6Ly9jZG4uanNkZWxpdnIubmV0L2doL3B5dGhvbi12aXN1YWxpemF0aW9uL2ZvbGl1bS9mb2xpdW0vdGVtcGxhdGVzL2xlYWZsZXQuYXdlc29tZS5yb3RhdGUubWluLmNzcyIvPgogICAgCiAgICAgICAgICAgIDxtZXRhIG5hbWU9InZpZXdwb3J0IiBjb250ZW50PSJ3aWR0aD1kZXZpY2Utd2lkdGgsCiAgICAgICAgICAgICAgICBpbml0aWFsLXNjYWxlPTEuMCwgbWF4aW11bS1zY2FsZT0xLjAsIHVzZXItc2NhbGFibGU9bm8iIC8+CiAgICAgICAgICAgIDxzdHlsZT4KICAgICAgICAgICAgICAgICNtYXBfYjFkMjMyNTNkMzgwNDcwNGFhNDhkMWY0YjlkODEyNjAgewogICAgICAgICAgICAgICAgICAgIHBvc2l0aW9uOiByZWxhdGl2ZTsKICAgICAgICAgICAgICAgICAgICB3aWR0aDogMTAwLjAlOwogICAgICAgICAgICAgICAgICAgIGhlaWdodDogMTAwLjAlOwogICAgICAgICAgICAgICAgICAgIGxlZnQ6IDAuMCU7CiAgICAgICAgICAgICAgICAgICAgdG9wOiAwLjAlOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICA8L3N0eWxlPgogICAgICAgIAogICAgCiAgICAgICAgICAgICAgICAgICAgPHN0eWxlPgogICAgICAgICAgICAgICAgICAgICAgICAuZm9saXVtdG9vbHRpcCB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgICAgIC5mb2xpdW10b29sdGlwIHRhYmxlewogICAgICAgICAgICAgICAgICAgICAgICAgICAgbWFyZ2luOiBhdXRvOwogICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgIC5mb2xpdW10b29sdGlwIHRyewogICAgICAgICAgICAgICAgICAgICAgICAgICAgdGV4dC1hbGlnbjogbGVmdDsKICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgICAgICAuZm9saXVtdG9vbHRpcCB0aHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhZGRpbmc6IDJweDsgcGFkZGluZy1yaWdodDogOHB4OwogICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgPC9zdHlsZT4KICAgICAgICAgICAgCjwvaGVhZD4KPGJvZHk+ICAgIAogICAgCiAgICAgICAgICAgIDxkaXYgY2xhc3M9ImZvbGl1bS1tYXAiIGlkPSJtYXBfYjFkMjMyNTNkMzgwNDcwNGFhNDhkMWY0YjlkODEyNjAiID48L2Rpdj4KICAgICAgICAKPC9ib2R5Pgo8c2NyaXB0PiAgICAKICAgIAogICAgICAgICAgICB2YXIgbWFwX2IxZDIzMjUzZDM4MDQ3MDRhYTQ4ZDFmNGI5ZDgxMjYwID0gTC5tYXAoCiAgICAgICAgICAgICAgICAibWFwX2IxZDIzMjUzZDM4MDQ3MDRhYTQ4ZDFmNGI5ZDgxMjYwIiwKICAgICAgICAgICAgICAgIHsKICAgICAgICAgICAgICAgICAgICBjZW50ZXI6IFswLCAwXSwKICAgICAgICAgICAgICAgICAgICBjcnM6IEwuQ1JTLkVQU0czODU3LAogICAgICAgICAgICAgICAgICAgIHpvb206IDEsCiAgICAgICAgICAgICAgICAgICAgem9vbUNvbnRyb2w6IHRydWUsCiAgICAgICAgICAgICAgICAgICAgcHJlZmVyQ2FudmFzOiBmYWxzZSwKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgKTsKCiAgICAgICAgICAgIAoKICAgICAgICAKICAgIAogICAgICAgICAgICB2YXIgdGlsZV9sYXllcl8wMjBjOWNkODhiYTU0MDIwYWJhYmI3MWE4NWM0ZTM1MyA9IEwudGlsZUxheWVyKAogICAgICAgICAgICAgICAgImh0dHBzOi8ve3N9LnRpbGUub3BlbnN0cmVldG1hcC5vcmcve3p9L3t4fS97eX0ucG5nIiwKICAgICAgICAgICAgICAgIHsiYXR0cmlidXRpb24iOiAiRGF0YSBieSBcdTAwMjZjb3B5OyBcdTAwM2NhIGhyZWY9XCJodHRwOi8vb3BlbnN0cmVldG1hcC5vcmdcIlx1MDAzZU9wZW5TdHJlZXRNYXBcdTAwM2MvYVx1MDAzZSwgdW5kZXIgXHUwMDNjYSBocmVmPVwiaHR0cDovL3d3dy5vcGVuc3RyZWV0bWFwLm9yZy9jb3B5cmlnaHRcIlx1MDAzZU9EYkxcdTAwM2MvYVx1MDAzZS4iLCAiZGV0ZWN0UmV0aW5hIjogZmFsc2UsICJtYXhOYXRpdmVab29tIjogMTgsICJtYXhab29tIjogMTgsICJtaW5ab29tIjogMCwgIm5vV3JhcCI6IGZhbHNlLCAib3BhY2l0eSI6IDEsICJzdWJkb21haW5zIjogImFiYyIsICJ0bXMiOiBmYWxzZX0KICAgICAgICAgICAgKS5hZGRUbyhtYXBfYjFkMjMyNTNkMzgwNDcwNGFhNDhkMWY0YjlkODEyNjApOwogICAgICAgIAogICAgCiAgICAgICAgICAgIG1hcF9iMWQyMzI1M2QzODA0NzA0YWE0OGQxZjRiOWQ4MTI2MC5maXRCb3VuZHMoCiAgICAgICAgICAgICAgICBbWzQzLjUwOTY1NCwgMC4zMTM4NTNdLCBbNTAuNzM5NjY0LCAzLjM2MzU3Nl1dLAogICAgICAgICAgICAgICAge30KICAgICAgICAgICAgKTsKICAgICAgICAKICAgIAoKICAgICAgICBmdW5jdGlvbiBnZW9fanNvbl8zYzgwZThhMGZkYTY0ZTkxOGM2OTI0OTBiZDU5ZDE2NF9vbkVhY2hGZWF0dXJlKGZlYXR1cmUsIGxheWVyKSB7CiAgICAgICAgICAgIGxheWVyLm9uKHsKICAgICAgICAgICAgfSk7CiAgICAgICAgfTsKICAgICAgICB2YXIgZ2VvX2pzb25fM2M4MGU4YTBmZGE2NGU5MThjNjkyNDkwYmQ1OWQxNjQgPSBMLmdlb0pzb24obnVsbCwgewogICAgICAgICAgICAgICAgb25FYWNoRmVhdHVyZTogZ2VvX2pzb25fM2M4MGU4YTBmZGE2NGU5MThjNjkyNDkwYmQ1OWQxNjRfb25FYWNoRmVhdHVyZSwKICAgICAgICAgICAgCiAgICAgICAgfSk7CgogICAgICAgIGZ1bmN0aW9uIGdlb19qc29uXzNjODBlOGEwZmRhNjRlOTE4YzY5MjQ5MGJkNTlkMTY0X2FkZCAoZGF0YSkgewogICAgICAgICAgICBnZW9fanNvbl8zYzgwZThhMGZkYTY0ZTkxOGM2OTI0OTBiZDU5ZDE2NAogICAgICAgICAgICAgICAgLmFkZERhdGEoZGF0YSkKICAgICAgICAgICAgICAgIC5hZGRUbyhtYXBfYjFkMjMyNTNkMzgwNDcwNGFhNDhkMWY0YjlkODEyNjApOwogICAgICAgIH0KICAgICAgICAgICAgZ2VvX2pzb25fM2M4MGU4YTBmZGE2NGU5MThjNjkyNDkwYmQ1OWQxNjRfYWRkKHsiZmVhdHVyZXMiOiBbeyJnZW9tZXRyeSI6IHsiY29vcmRpbmF0ZXMiOiBbMi4zMDYyMDcsIDUwLjczOTY2NF0sICJ0eXBlIjogIlBvaW50In0sICJwcm9wZXJ0aWVzIjogeyJjb3VudHJ5IjogIkZyYW5jZSIsICJpZCI6ICJlbi4zIiwgIm5hbWUiOiAiQVJRVUVTIiwgInNvdXJjZSI6ICJub21pbmF0aW0iLCAic291cmNlTmFtZSI6ICJBcnF1ZXMsIFNhaW50LU9tZXIsIFBhcy1kZS1DYWxhaXMsIEhhdXRzLWRlLUZyYW5jZSwgRnJhbmNlIG1cdTAwZTl0cm9wb2xpdGFpbmUsIDYyNTEwLCBGcmFuY2UiLCAidHlwZSI6ICJhZG1pbmlzdHJhdGl2ZSJ9LCAidHlwZSI6ICJGZWF0dXJlIn0sIHsiZ2VvbWV0cnkiOiB7ImNvb3JkaW5hdGVzIjogWzEuODg4MzM0LCA0Ni42MDMzNTRdLCAidHlwZSI6ICJQb2ludCJ9LCAicHJvcGVydGllcyI6IHsiY291bnRyeSI6ICJGcmFuY2UiLCAiaWQiOiAiZW4uNyIsICJuYW1lIjogIkZyYW5jZSIsICJzb3VyY2UiOiAibm9taW5hdGltIiwgInNvdXJjZU5hbWUiOiAiRnJhbmNlIiwgInR5cGUiOiAiYWRtaW5pc3RyYXRpdmUifSwgInR5cGUiOiAiRmVhdHVyZSJ9LCB7Imdlb21ldHJ5IjogeyJjb29yZGluYXRlcyI6IFswLjMxMzg1MywgNDkuMDY3NzcxXSwgInR5cGUiOiAiUG9pbnQifSwgInByb3BlcnRpZXMiOiB7ImNvdW50cnkiOiAiRnJhbmNlIiwgImlkIjogImVuLjkiLCAibmFtZSI6ICJOb3JtYW5kaWUiLCAic291cmNlIjogIm5vbWluYXRpbSIsICJzb3VyY2VOYW1lIjogIk5vcm1hbmRpZSwgRnJhbmNlIG1cdTAwZTl0cm9wb2xpdGFpbmUsIEZyYW5jZSIsICJ0eXBlIjogImFkbWluaXN0cmF0aXZlIn0sICJ0eXBlIjogIkZlYXR1cmUifSwgeyJnZW9tZXRyeSI6IHsiY29vcmRpbmF0ZXMiOiBbMy4zNjM1NzYsIDQzLjUwOTY1NF0sICJ0eXBlIjogIlBvaW50In0sICJwcm9wZXJ0aWVzIjogeyJjb3VudHJ5IjogIkZyYW5jZSIsICJpZCI6ICJlbi4xMyIsICJuYW1lIjogIkNhdXgiLCAic291cmNlIjogIm5vbWluYXRpbSIsICJzb3VyY2VOYW1lIjogIkNhdXgsIEJcdTAwZTl6aWVycywgSFx1MDBlOXJhdWx0LCBPY2NpdGFuaWUsIEZyYW5jZSBtXHUwMGU5dHJvcG9saXRhaW5lLCAzNDcyMCwgRnJhbmNlIiwgInR5cGUiOiAiYWRtaW5pc3RyYXRpdmUifSwgInR5cGUiOiAiRmVhdHVyZSJ9LCB7Imdlb21ldHJ5IjogeyJjb29yZGluYXRlcyI6IFsyLjMwNjIwNywgNTAuNzM5NjY0XSwgInR5cGUiOiAiUG9pbnQifSwgInByb3BlcnRpZXMiOiB7ImNvdW50cnkiOiAiRnJhbmNlIiwgImlkIjogImVuLjE3IiwgIm5hbWUiOiAiQXJxdWVzIiwgInNvdXJjZSI6ICJub21pbmF0aW0iLCAic291cmNlTmFtZSI6ICJBcnF1ZXMsIFNhaW50LU9tZXIsIFBhcy1kZS1DYWxhaXMsIEhhdXRzLWRlLUZyYW5jZSwgRnJhbmNlIG1cdTAwZTl0cm9wb2xpdGFpbmUsIDYyNTEwLCBGcmFuY2UiLCAidHlwZSI6ICJhZG1pbmlzdHJhdGl2ZSJ9LCAidHlwZSI6ICJGZWF0dXJlIn1dLCAidHlwZSI6ICJGZWF0dXJlQ29sbGVjdGlvbiJ9KTsKCiAgICAgICAgCiAgICAKICAgIGdlb19qc29uXzNjODBlOGEwZmRhNjRlOTE4YzY5MjQ5MGJkNTlkMTY0LmJpbmRUb29sdGlwKAogICAgZnVuY3Rpb24obGF5ZXIpewogICAgbGV0IGRpdiA9IEwuRG9tVXRpbC5jcmVhdGUoJ2RpdicpOwogICAgCiAgICBsZXQgaGFuZGxlT2JqZWN0ID0gZmVhdHVyZT0+dHlwZW9mKGZlYXR1cmUpPT0nb2JqZWN0JyA/IEpTT04uc3RyaW5naWZ5KGZlYXR1cmUpIDogZmVhdHVyZTsKICAgIGxldCBmaWVsZHMgPSBbIm5hbWUiLCAic291cmNlIl07CiAgICBsZXQgYWxpYXNlcyA9IFsibmFtZSIsICJzb3VyY2UiXTsKICAgIGxldCB0YWJsZSA9ICc8dGFibGU+JyArCiAgICAgICAgU3RyaW5nKAogICAgICAgIGZpZWxkcy5tYXAoCiAgICAgICAgKHYsaSk9PgogICAgICAgIGA8dHI+CiAgICAgICAgICAgIDx0aD4ke2FsaWFzZXNbaV0udG9Mb2NhbGVTdHJpbmcoKX08L3RoPgogICAgICAgICAgICAKICAgICAgICAgICAgPHRkPiR7aGFuZGxlT2JqZWN0KGxheWVyLmZlYXR1cmUucHJvcGVydGllc1t2XSkudG9Mb2NhbGVTdHJpbmcoKX08L3RkPgogICAgICAgIDwvdHI+YCkuam9pbignJykpCiAgICArJzwvdGFibGU+JzsKICAgIGRpdi5pbm5lckhUTUw9dGFibGU7CiAgICAKICAgIHJldHVybiBkaXYKICAgIH0KICAgICx7ImNsYXNzTmFtZSI6ICJmb2xpdW10b29sdGlwIiwgInN0aWNreSI6IHRydWV9KTsKICAgICAgICAgICAgICAgICAgICAgCjwvc2NyaXB0Pg==\" style=\"position:absolute;width:100%;height:100%;left:0;top:0;border:none !important;\" allowfullscreen webkitallowfullscreen mozallowfullscreen></iframe></div></div>" + ], + "text/plain": [ + "<folium.folium.Map at 0x7f2cfcda8bb0>" + ] + }, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# afficher la carte des lieux localisés\n", - "doc.get_folium_map()" + "arques_perdido.get_folium_map()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">* \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " ARQUES\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " , ( Géog . ) petite ville de \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " France\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " , en \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Normandie\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " , au pays de \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Caux\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " , sur la petite riviere d' \n", + "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Arques\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", + "</mark>\n", + " . \n", + "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", + " Long . 18 . 50 . lat . 49 . 54 .\n", + " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n", + "</mark>\n", + " </div></span>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "displacy.render(doc.to_spacy_doc(), style=\"ent\", jupyter=True)" ] }, { diff --git a/data/beaufort.txt b/data/beaufort.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea15483f6fef8a1e3cd0ad027c490f3993c68c01 --- /dev/null +++ b/data/beaufort.txt @@ -0,0 +1,12 @@ +BEAUFORT ou Beaufort-sur-Doron ou Saint-Maxime- +de-Bf.aufort. Ch.-l. de cant. du dép. de la Savoie, arr. +d’Albertville, au débouché de trois vallées dont les tor¬ +rents forment le Doron de Beaufort ; 2,393 hab. Les +superbes pâturages de la vallée nourrissent de nombreux +troupeaux ; il se fait à Beaufort un commerce important de +fromages et de bestiaux. Un assez grand nombre d’habi¬ +tants quittent le pays pendant l’hiver. De l’ancien château +de la Salle qu’Henri IV habita à deux reprises pendant la +guerre qu’il soutint contre le duc de Savoie, il subsiste +trois tours. Ancienne chapelle, reconstruite en 1841, +qui est le but d’un pèlerinage très fréquenté.