diff --git a/Tutoriel-geoparsing.ipynb b/Tutoriel-geoparsing.ipynb index 84bf715ac4226015e0c768b60a89ee4fa64a946e..81392cdc69e47ec5edf1cdaf9c05af2a8dc930e9 100644 --- a/Tutoriel-geoparsing.ipynb +++ b/Tutoriel-geoparsing.ipynb @@ -184,19 +184,51 @@ "\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Importer la librairie `Stanza` et télécharger le modèles pré-entrainé pour le français : " + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import stanza\n", + "\n", + "stanza.download('fr')" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "stanza_parser = stanza.Pipeline(lang='fr', processors='tokenize,ner')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "doc = stanza_parser(content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for ent in doc.ents:\n", + " print(ent.text, ent.type)" + ] }, { "cell_type": "markdown", @@ -205,19 +237,102 @@ "### 5.2 SpaCy NER" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Installer le modèle français pré-entrainé de `spaCy` :" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "!python -m spacy download fr_core_news_sm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Importer la librarie `spaCy` :" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import spacy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Charger le modèle français pré-entrainé de `spaCy`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spacy_parser = spacy.load('fr_core_news_sm')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Executer la reconnaissance d'entités nommées :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "doc = spacy_parser(content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Afficher la liste des entités nommées repérées :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for ent in doc.ents:\n", + " print(ent.text, ent.label_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Afficher de manière graphique les entités nommées avec `displaCy` :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "displacy.render(doc, style=\"ent\", jupyter=True) " + ] }, { "cell_type": "markdown",