From e6ada53c866daab8b385f8fdd19c6396930d4d3d Mon Sep 17 00:00:00 2001
From: Ludovic Moncla <moncla.ludovic@gmail.com>
Date: Tue, 14 Mar 2023 08:10:31 +0100
Subject: [PATCH] Update Predict.ipynb

---
 notebooks/Predict.ipynb | 316 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 315 insertions(+), 1 deletion(-)

diff --git a/notebooks/Predict.ipynb b/notebooks/Predict.ipynb
index 0da102c..57eced6 100644
--- a/notebooks/Predict.ipynb
+++ b/notebooks/Predict.ipynb
@@ -1070,10 +1070,324 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 22,
       "metadata": {
         "id": "7TD1mbKj_fXH"
       },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>author</th>\n",
+              "      <th>edda_class</th>\n",
+              "      <th>enccre_id</th>\n",
+              "      <th>enccre_class</th>\n",
+              "      <th>content</th>\n",
+              "      <th>content_without_designant</th>\n",
+              "      <th>first_paragraph</th>\n",
+              "      <th>nb_words</th>\n",
+              "      <th>super_domain</th>\n",
+              "      <th>superdomainBert</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>24</th>\n",
+              "      <td>1</td>\n",
+              "      <td>26</td>\n",
+              "      <td>A</td>\n",
+              "      <td>Diderot</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-9-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\n* A, s. petite riviere de France, qui a sa s...</td>\n",
+              "      <td>\\n* A, s. petite riviere de France, qui a sa s...</td>\n",
+              "      <td>\\n* A, s. petite riviere de France, qui a sa s...</td>\n",
+              "      <td>15</td>\n",
+              "      <td>Unclassified</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>25</th>\n",
+              "      <td>1</td>\n",
+              "      <td>27</td>\n",
+              "      <td>AA</td>\n",
+              "      <td>Diderot</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-10-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\n* AA, s. f. riviere de France, qui prend sa ...</td>\n",
+              "      <td>\\n* AA, s. f. riviere de France, qui prend sa ...</td>\n",
+              "      <td>\\n* AA, s. f. riviere de France, qui prend sa ...</td>\n",
+              "      <td>46</td>\n",
+              "      <td>Unclassified</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>27</th>\n",
+              "      <td>1</td>\n",
+              "      <td>29</td>\n",
+              "      <td>AACH ou ACH</td>\n",
+              "      <td>Diderot</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-12-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\n* AACH ou ACH, s. f. petite ville d'Allemagn...</td>\n",
+              "      <td>\\n* AACH ou ACH, s. f. petite ville d'Allemagn...</td>\n",
+              "      <td>\\n* AACH ou ACH, s. f. petite ville d'Allemagn...</td>\n",
+              "      <td>24</td>\n",
+              "      <td>Unclassified</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>28</th>\n",
+              "      <td>1</td>\n",
+              "      <td>30</td>\n",
+              "      <td>AAHUS</td>\n",
+              "      <td>Diderot</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-13-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\n* AAHUS, s. petite ville d'Allemagne dans le...</td>\n",
+              "      <td>\\n* AAHUS, s. petite ville d'Allemagne dans le...</td>\n",
+              "      <td>\\n* AAHUS, s. petite ville d'Allemagne dans le...</td>\n",
+              "      <td>21</td>\n",
+              "      <td>Unclassified</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>30</th>\n",
+              "      <td>1</td>\n",
+              "      <td>32</td>\n",
+              "      <td>AAR</td>\n",
+              "      <td>Diderot</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-15-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\n* AAR, s. grande riviere qui a sa source pro...</td>\n",
+              "      <td>\\n* AAR, s. grande riviere qui a sa source pro...</td>\n",
+              "      <td>\\n* AAR, s. grande riviere qui a sa source pro...</td>\n",
+              "      <td>30</td>\n",
+              "      <td>Unclassified</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>74051</th>\n",
+              "      <td>17</td>\n",
+              "      <td>3070</td>\n",
+              "      <td>ZYGRIS</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>Géographie ancienne</td>\n",
+              "      <td>v17-2068-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\nZYGRIS, (Géog. anc.) ville du nôme de Lybie\\...</td>\n",
+              "      <td>\\nZYGRIS,  ville du nôme de Lybie\\nsur la côte...</td>\n",
+              "      <td>\\nZYGRIS,  ville du nôme de Lybie\\nsur la côte...</td>\n",
+              "      <td>38</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>74054</th>\n",
+              "      <td>17</td>\n",
+              "      <td>3073</td>\n",
+              "      <td>ZYRAS</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>Géographie ancienne</td>\n",
+              "      <td>v17-2071-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\nZYRAS, (Géog. anc.) fleuve de Thrace. Pline,...</td>\n",
+              "      <td>\\nZYRAS,  fleuve de Thrace. Pline,\\nliv. IV. c...</td>\n",
+              "      <td>\\nZYRAS,  fleuve de Thrace. Pline,\\nliv. IV. c...</td>\n",
+              "      <td>28</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>74055</th>\n",
+              "      <td>17</td>\n",
+              "      <td>3074</td>\n",
+              "      <td>ZZUÉNÉ ou ZZEUENE</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>Géographie ancienne</td>\n",
+              "      <td>v17-2072-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\nZZUÉNÉ ou ZZEUENE, (Géog. anc.) ville située...</td>\n",
+              "      <td>\\nZZUÉNÉ ou ZZEUENE,  ville située\\nsur la riv...</td>\n",
+              "      <td>\\nZZUÉNÉ ou ZZEUENE,  ville située\\nsur la riv...</td>\n",
+              "      <td>149</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>74080</th>\n",
+              "      <td>17</td>\n",
+              "      <td>3099</td>\n",
+              "      <td>CABOTAGE</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>Navigation</td>\n",
+              "      <td>v17-2097-0</td>\n",
+              "      <td>Marine</td>\n",
+              "      <td>\\nCABOTAGE, s. m. (Navigation.) le cabotage es...</td>\n",
+              "      <td>\\nCABOTAGE, s. m.  le cabotage est\\nune naviga...</td>\n",
+              "      <td>\\nCABOTAGE, s. m.  le cabotage est\\nune naviga...</td>\n",
+              "      <td>192</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>74165</th>\n",
+              "      <td>17</td>\n",
+              "      <td>3184</td>\n",
+              "      <td>GUAYAQUIL</td>\n",
+              "      <td>La Condamine</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>v17-2177-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\nGUAYAQUIL, (Géograph.) nom d'une ville &amp;\\nd'...</td>\n",
+              "      <td>\\nGUAYAQUIL,  nom d'une ville &amp;\\nd'une grande ...</td>\n",
+              "      <td>\\nGUAYAQUIL,  nom d'une ville &amp;\\nd'une grande ...</td>\n",
+              "      <td>446</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>15383 rows × 13 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "       volume  numero               head        author           edda_class  \\\n",
+              "24          1      26                  A       Diderot         unclassified   \n",
+              "25          1      27                 AA       Diderot         unclassified   \n",
+              "27          1      29        AACH ou ACH       Diderot         unclassified   \n",
+              "28          1      30              AAHUS       Diderot         unclassified   \n",
+              "30          1      32                AAR       Diderot         unclassified   \n",
+              "...       ...     ...                ...           ...                  ...   \n",
+              "74051      17    3070             ZYGRIS      Jaucourt  Géographie ancienne   \n",
+              "74054      17    3073              ZYRAS      Jaucourt  Géographie ancienne   \n",
+              "74055      17    3074  ZZUÉNÉ ou ZZEUENE      Jaucourt  Géographie ancienne   \n",
+              "74080      17    3099           CABOTAGE      Jaucourt           Navigation   \n",
+              "74165      17    3184          GUAYAQUIL  La Condamine           Géographie   \n",
+              "\n",
+              "        enccre_id enccre_class  \\\n",
+              "24         v1-9-0   Géographie   \n",
+              "25        v1-10-0   Géographie   \n",
+              "27        v1-12-0   Géographie   \n",
+              "28        v1-13-0   Géographie   \n",
+              "30        v1-15-0   Géographie   \n",
+              "...           ...          ...   \n",
+              "74051  v17-2068-0   Géographie   \n",
+              "74054  v17-2071-0   Géographie   \n",
+              "74055  v17-2072-0   Géographie   \n",
+              "74080  v17-2097-0       Marine   \n",
+              "74165  v17-2177-0   Géographie   \n",
+              "\n",
+              "                                                 content  \\\n",
+              "24     \\n* A, s. petite riviere de France, qui a sa s...   \n",
+              "25     \\n* AA, s. f. riviere de France, qui prend sa ...   \n",
+              "27     \\n* AACH ou ACH, s. f. petite ville d'Allemagn...   \n",
+              "28     \\n* AAHUS, s. petite ville d'Allemagne dans le...   \n",
+              "30     \\n* AAR, s. grande riviere qui a sa source pro...   \n",
+              "...                                                  ...   \n",
+              "74051  \\nZYGRIS, (Géog. anc.) ville du nôme de Lybie\\...   \n",
+              "74054  \\nZYRAS, (Géog. anc.) fleuve de Thrace. Pline,...   \n",
+              "74055  \\nZZUÉNÉ ou ZZEUENE, (Géog. anc.) ville située...   \n",
+              "74080  \\nCABOTAGE, s. m. (Navigation.) le cabotage es...   \n",
+              "74165  \\nGUAYAQUIL, (Géograph.) nom d'une ville &\\nd'...   \n",
+              "\n",
+              "                               content_without_designant  \\\n",
+              "24     \\n* A, s. petite riviere de France, qui a sa s...   \n",
+              "25     \\n* AA, s. f. riviere de France, qui prend sa ...   \n",
+              "27     \\n* AACH ou ACH, s. f. petite ville d'Allemagn...   \n",
+              "28     \\n* AAHUS, s. petite ville d'Allemagne dans le...   \n",
+              "30     \\n* AAR, s. grande riviere qui a sa source pro...   \n",
+              "...                                                  ...   \n",
+              "74051  \\nZYGRIS,  ville du nôme de Lybie\\nsur la côte...   \n",
+              "74054  \\nZYRAS,  fleuve de Thrace. Pline,\\nliv. IV. c...   \n",
+              "74055  \\nZZUÉNÉ ou ZZEUENE,  ville située\\nsur la riv...   \n",
+              "74080  \\nCABOTAGE, s. m.  le cabotage est\\nune naviga...   \n",
+              "74165  \\nGUAYAQUIL,  nom d'une ville &\\nd'une grande ...   \n",
+              "\n",
+              "                                         first_paragraph  nb_words  \\\n",
+              "24     \\n* A, s. petite riviere de France, qui a sa s...        15   \n",
+              "25     \\n* AA, s. f. riviere de France, qui prend sa ...        46   \n",
+              "27     \\n* AACH ou ACH, s. f. petite ville d'Allemagn...        24   \n",
+              "28     \\n* AAHUS, s. petite ville d'Allemagne dans le...        21   \n",
+              "30     \\n* AAR, s. grande riviere qui a sa source pro...        30   \n",
+              "...                                                  ...       ...   \n",
+              "74051  \\nZYGRIS,  ville du nôme de Lybie\\nsur la côte...        38   \n",
+              "74054  \\nZYRAS,  fleuve de Thrace. Pline,\\nliv. IV. c...        28   \n",
+              "74055  \\nZZUÉNÉ ou ZZEUENE,  ville située\\nsur la riv...       149   \n",
+              "74080  \\nCABOTAGE, s. m.  le cabotage est\\nune naviga...       192   \n",
+              "74165  \\nGUAYAQUIL,  nom d'une ville &\\nd'une grande ...       446   \n",
+              "\n",
+              "       super_domain superdomainBert  \n",
+              "24     Unclassified      Géographie  \n",
+              "25     Unclassified      Géographie  \n",
+              "27     Unclassified      Géographie  \n",
+              "28     Unclassified      Géographie  \n",
+              "30     Unclassified      Géographie  \n",
+              "...             ...             ...  \n",
+              "74051    Géographie      Géographie  \n",
+              "74054    Géographie      Géographie  \n",
+              "74055    Géographie      Géographie  \n",
+              "74080    Géographie      Géographie  \n",
+              "74165    Géographie      Géographie  \n",
+              "\n",
+              "[15383 rows x 13 columns]"
+            ]
+          },
+          "execution_count": 22,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df.loc[(df['superdomainBert'] == 'Géographie')]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
       "outputs": [],
       "source": []
     }
-- 
GitLab