From da3edf34fcc99b9f7c1e58a9416c399d0473c601 Mon Sep 17 00:00:00 2001 From: Ludovic Moncla <moncla.ludovic@gmail.com> Date: Fri, 9 Sep 2022 19:09:20 +0200 Subject: [PATCH] Update Tutoriel-geoparsing.ipynb --- Tutoriel-geoparsing.ipynb | 123 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 4 deletions(-) diff --git a/Tutoriel-geoparsing.ipynb b/Tutoriel-geoparsing.ipynb index 84bf715..81392cd 100644 --- a/Tutoriel-geoparsing.ipynb +++ b/Tutoriel-geoparsing.ipynb @@ -184,19 +184,51 @@ "\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Importer la librairie `Stanza` et télécharger le modèles pré-entrainé pour le français : " + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import stanza\n", + "\n", + "stanza.download('fr')" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "stanza_parser = stanza.Pipeline(lang='fr', processors='tokenize,ner')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "doc = stanza_parser(content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for ent in doc.ents:\n", + " print(ent.text, ent.type)" + ] }, { "cell_type": "markdown", @@ -205,19 +237,102 @@ "### 5.2 SpaCy NER" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Installer le modèle français pré-entrainé de `spaCy` :" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "!python -m spacy download fr_core_news_sm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Importer la librarie `spaCy` :" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import spacy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Charger le modèle français pré-entrainé de `spaCy`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spacy_parser = spacy.load('fr_core_news_sm')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Executer la reconnaissance d'entités nommées :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "doc = spacy_parser(content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Afficher la liste des entités nommées repérées :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for ent in doc.ents:\n", + " print(ent.text, ent.label_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Afficher de manière graphique les entités nommées avec `displaCy` :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "displacy.render(doc, style=\"ent\", jupyter=True) " + ] }, { "cell_type": "markdown", -- GitLab