From e6ada53c866daab8b385f8fdd19c6396930d4d3d Mon Sep 17 00:00:00 2001 From: Ludovic Moncla <moncla.ludovic@gmail.com> Date: Tue, 14 Mar 2023 08:10:31 +0100 Subject: [PATCH] Update Predict.ipynb --- notebooks/Predict.ipynb | 316 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 315 insertions(+), 1 deletion(-) diff --git a/notebooks/Predict.ipynb b/notebooks/Predict.ipynb index 0da102c..57eced6 100644 --- a/notebooks/Predict.ipynb +++ b/notebooks/Predict.ipynb @@ -1070,10 +1070,324 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "id": "7TD1mbKj_fXH" }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>volume</th>\n", + " <th>numero</th>\n", + " <th>head</th>\n", + " <th>author</th>\n", + " <th>edda_class</th>\n", + " <th>enccre_id</th>\n", + " <th>enccre_class</th>\n", + " <th>content</th>\n", + " <th>content_without_designant</th>\n", + " <th>first_paragraph</th>\n", + " <th>nb_words</th>\n", + " <th>super_domain</th>\n", + " <th>superdomainBert</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>1</td>\n", + " <td>26</td>\n", + " <td>A</td>\n", + " <td>Diderot</td>\n", + " <td>unclassified</td>\n", + " <td>v1-9-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\n* A, s. petite riviere de France, qui a sa s...</td>\n", + " <td>\\n* A, s. petite riviere de France, qui a sa s...</td>\n", + " <td>\\n* A, s. petite riviere de France, qui a sa s...</td>\n", + " <td>15</td>\n", + " <td>Unclassified</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>1</td>\n", + " <td>27</td>\n", + " <td>AA</td>\n", + " <td>Diderot</td>\n", + " <td>unclassified</td>\n", + " <td>v1-10-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\n* AA, s. f. riviere de France, qui prend sa ...</td>\n", + " <td>\\n* AA, s. f. riviere de France, qui prend sa ...</td>\n", + " <td>\\n* AA, s. f. riviere de France, qui prend sa ...</td>\n", + " <td>46</td>\n", + " <td>Unclassified</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>1</td>\n", + " <td>29</td>\n", + " <td>AACH ou ACH</td>\n", + " <td>Diderot</td>\n", + " <td>unclassified</td>\n", + " <td>v1-12-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\n* AACH ou ACH, s. f. petite ville d'Allemagn...</td>\n", + " <td>\\n* AACH ou ACH, s. f. petite ville d'Allemagn...</td>\n", + " <td>\\n* AACH ou ACH, s. f. petite ville d'Allemagn...</td>\n", + " <td>24</td>\n", + " <td>Unclassified</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>1</td>\n", + " <td>30</td>\n", + " <td>AAHUS</td>\n", + " <td>Diderot</td>\n", + " <td>unclassified</td>\n", + " <td>v1-13-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\n* AAHUS, s. petite ville d'Allemagne dans le...</td>\n", + " <td>\\n* AAHUS, s. petite ville d'Allemagne dans le...</td>\n", + " <td>\\n* AAHUS, s. petite ville d'Allemagne dans le...</td>\n", + " <td>21</td>\n", + " <td>Unclassified</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>1</td>\n", + " <td>32</td>\n", + " <td>AAR</td>\n", + " <td>Diderot</td>\n", + " <td>unclassified</td>\n", + " <td>v1-15-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\n* AAR, s. grande riviere qui a sa source pro...</td>\n", + " <td>\\n* AAR, s. grande riviere qui a sa source pro...</td>\n", + " <td>\\n* AAR, s. grande riviere qui a sa source pro...</td>\n", + " <td>30</td>\n", + " <td>Unclassified</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74051</th>\n", + " <td>17</td>\n", + " <td>3070</td>\n", + " <td>ZYGRIS</td>\n", + " <td>Jaucourt</td>\n", + " <td>Géographie ancienne</td>\n", + " <td>v17-2068-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\nZYGRIS, (Géog. anc.) ville du nôme de Lybie\\...</td>\n", + " <td>\\nZYGRIS, ville du nôme de Lybie\\nsur la côte...</td>\n", + " <td>\\nZYGRIS, ville du nôme de Lybie\\nsur la côte...</td>\n", + " <td>38</td>\n", + " <td>Géographie</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74054</th>\n", + " <td>17</td>\n", + " <td>3073</td>\n", + " <td>ZYRAS</td>\n", + " <td>Jaucourt</td>\n", + " <td>Géographie ancienne</td>\n", + " <td>v17-2071-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\nZYRAS, (Géog. anc.) fleuve de Thrace. Pline,...</td>\n", + " <td>\\nZYRAS, fleuve de Thrace. Pline,\\nliv. IV. c...</td>\n", + " <td>\\nZYRAS, fleuve de Thrace. Pline,\\nliv. IV. c...</td>\n", + " <td>28</td>\n", + " <td>Géographie</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74055</th>\n", + " <td>17</td>\n", + " <td>3074</td>\n", + " <td>ZZUÉNÉ ou ZZEUENE</td>\n", + " <td>Jaucourt</td>\n", + " <td>Géographie ancienne</td>\n", + " <td>v17-2072-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\nZZUÉNÉ ou ZZEUENE, (Géog. anc.) ville située...</td>\n", + " <td>\\nZZUÉNÉ ou ZZEUENE, ville située\\nsur la riv...</td>\n", + " <td>\\nZZUÉNÉ ou ZZEUENE, ville située\\nsur la riv...</td>\n", + " <td>149</td>\n", + " <td>Géographie</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74080</th>\n", + " <td>17</td>\n", + " <td>3099</td>\n", + " <td>CABOTAGE</td>\n", + " <td>Jaucourt</td>\n", + " <td>Navigation</td>\n", + " <td>v17-2097-0</td>\n", + " <td>Marine</td>\n", + " <td>\\nCABOTAGE, s. m. (Navigation.) le cabotage es...</td>\n", + " <td>\\nCABOTAGE, s. m. le cabotage est\\nune naviga...</td>\n", + " <td>\\nCABOTAGE, s. m. le cabotage est\\nune naviga...</td>\n", + " <td>192</td>\n", + " <td>Géographie</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74165</th>\n", + " <td>17</td>\n", + " <td>3184</td>\n", + " <td>GUAYAQUIL</td>\n", + " <td>La Condamine</td>\n", + " <td>Géographie</td>\n", + " <td>v17-2177-0</td>\n", + " <td>Géographie</td>\n", + " <td>\\nGUAYAQUIL, (Géograph.) nom d'une ville &\\nd'...</td>\n", + " <td>\\nGUAYAQUIL, nom d'une ville &\\nd'une grande ...</td>\n", + " <td>\\nGUAYAQUIL, nom d'une ville &\\nd'une grande ...</td>\n", + " <td>446</td>\n", + " <td>Géographie</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>15383 rows × 13 columns</p>\n", + "</div>" + ], + "text/plain": [ + " volume numero head author edda_class \\\n", + "24 1 26 A Diderot unclassified \n", + "25 1 27 AA Diderot unclassified \n", + "27 1 29 AACH ou ACH Diderot unclassified \n", + "28 1 30 AAHUS Diderot unclassified \n", + "30 1 32 AAR Diderot unclassified \n", + "... ... ... ... ... ... \n", + "74051 17 3070 ZYGRIS Jaucourt Géographie ancienne \n", + "74054 17 3073 ZYRAS Jaucourt Géographie ancienne \n", + "74055 17 3074 ZZUÉNÉ ou ZZEUENE Jaucourt Géographie ancienne \n", + "74080 17 3099 CABOTAGE Jaucourt Navigation \n", + "74165 17 3184 GUAYAQUIL La Condamine Géographie \n", + "\n", + " enccre_id enccre_class \\\n", + "24 v1-9-0 Géographie \n", + "25 v1-10-0 Géographie \n", + "27 v1-12-0 Géographie \n", + "28 v1-13-0 Géographie \n", + "30 v1-15-0 Géographie \n", + "... ... ... \n", + "74051 v17-2068-0 Géographie \n", + "74054 v17-2071-0 Géographie \n", + "74055 v17-2072-0 Géographie \n", + "74080 v17-2097-0 Marine \n", + "74165 v17-2177-0 Géographie \n", + "\n", + " content \\\n", + "24 \\n* A, s. petite riviere de France, qui a sa s... \n", + "25 \\n* AA, s. f. riviere de France, qui prend sa ... \n", + "27 \\n* AACH ou ACH, s. f. petite ville d'Allemagn... \n", + "28 \\n* AAHUS, s. petite ville d'Allemagne dans le... \n", + "30 \\n* AAR, s. grande riviere qui a sa source pro... \n", + "... ... \n", + "74051 \\nZYGRIS, (Géog. anc.) ville du nôme de Lybie\\... \n", + "74054 \\nZYRAS, (Géog. anc.) fleuve de Thrace. Pline,... \n", + "74055 \\nZZUÉNÉ ou ZZEUENE, (Géog. anc.) ville située... \n", + "74080 \\nCABOTAGE, s. m. (Navigation.) le cabotage es... \n", + "74165 \\nGUAYAQUIL, (Géograph.) nom d'une ville &\\nd'... \n", + "\n", + " content_without_designant \\\n", + "24 \\n* A, s. petite riviere de France, qui a sa s... \n", + "25 \\n* AA, s. f. riviere de France, qui prend sa ... \n", + "27 \\n* AACH ou ACH, s. f. petite ville d'Allemagn... \n", + "28 \\n* AAHUS, s. petite ville d'Allemagne dans le... \n", + "30 \\n* AAR, s. grande riviere qui a sa source pro... \n", + "... ... \n", + "74051 \\nZYGRIS, ville du nôme de Lybie\\nsur la côte... \n", + "74054 \\nZYRAS, fleuve de Thrace. Pline,\\nliv. IV. c... \n", + "74055 \\nZZUÉNÉ ou ZZEUENE, ville située\\nsur la riv... \n", + "74080 \\nCABOTAGE, s. m. le cabotage est\\nune naviga... \n", + "74165 \\nGUAYAQUIL, nom d'une ville &\\nd'une grande ... \n", + "\n", + " first_paragraph nb_words \\\n", + "24 \\n* A, s. petite riviere de France, qui a sa s... 15 \n", + "25 \\n* AA, s. f. riviere de France, qui prend sa ... 46 \n", + "27 \\n* AACH ou ACH, s. f. petite ville d'Allemagn... 24 \n", + "28 \\n* AAHUS, s. petite ville d'Allemagne dans le... 21 \n", + "30 \\n* AAR, s. grande riviere qui a sa source pro... 30 \n", + "... ... ... \n", + "74051 \\nZYGRIS, ville du nôme de Lybie\\nsur la côte... 38 \n", + "74054 \\nZYRAS, fleuve de Thrace. Pline,\\nliv. IV. c... 28 \n", + "74055 \\nZZUÉNÉ ou ZZEUENE, ville située\\nsur la riv... 149 \n", + "74080 \\nCABOTAGE, s. m. le cabotage est\\nune naviga... 192 \n", + "74165 \\nGUAYAQUIL, nom d'une ville &\\nd'une grande ... 446 \n", + "\n", + " super_domain superdomainBert \n", + "24 Unclassified Géographie \n", + "25 Unclassified Géographie \n", + "27 Unclassified Géographie \n", + "28 Unclassified Géographie \n", + "30 Unclassified Géographie \n", + "... ... ... \n", + "74051 Géographie Géographie \n", + "74054 Géographie Géographie \n", + "74055 Géographie Géographie \n", + "74080 Géographie Géographie \n", + "74165 Géographie Géographie \n", + "\n", + "[15383 rows x 13 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[(df['superdomainBert'] == 'Géographie')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [] } -- GitLab