From f13251b8ac454393aa97b03e2ab690e9acabbced Mon Sep 17 00:00:00 2001 From: Ludovic Moncla <moncla.ludovic@gmail.com> Date: Mon, 28 Nov 2022 13:29:59 +0100 Subject: [PATCH] Update Predict_LGE.ipynb --- notebooks/Predict_LGE.ipynb | 903 ++---------------------------------- 1 file changed, 36 insertions(+), 867 deletions(-) diff --git a/notebooks/Predict_LGE.ipynb b/notebooks/Predict_LGE.ipynb index b74e579..590b96f 100644 --- a/notebooks/Predict_LGE.ipynb +++ b/notebooks/Predict_LGE.ipynb @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "id": "SkErnwgMMbRj" }, @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -128,15 +128,7 @@ "id": "dPOU-Efhf4ui", "outputId": "121dd21e-f98c-483d-d6d1-2838f732a4e2" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We will use the GPU\n" - ] - } - ], + "outputs": [], "source": [ " # If there's a GPU available...\n", "if torch.cuda.is_available(): \n", @@ -295,9 +287,16 @@ "## 3. Load Data" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 LGE (Nakala)" + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -319,13 +318,13 @@ "metadata": {}, "outputs": [], "source": [ - "#input_path = \"/Users/lmoncla/Documents/Data/Corpus/LGE/Text\"\n", - "input_path = \"./Text\"" + "input_path = \"/Users/lmoncla/Documents/Data/Corpus/LGE/Text\"\n", + "#input_path = \"./Text\"" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -336,129 +335,18 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>id</th>\n", - " <th>tome</th>\n", - " <th>rank</th>\n", - " <th>domain</th>\n", - " <th>remark</th>\n", - " <th>content</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>abrabeses-0</td>\n", - " <td>1</td>\n", - " <td>623</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>accius-0</td>\n", - " <td>1</td>\n", - " <td>1076</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>achenbach-2</td>\n", - " <td>1</td>\n", - " <td>1357</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>acireale-0</td>\n", - " <td>1</td>\n", - " <td>1513</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>actée-0</td>\n", - " <td>1</td>\n", - " <td>1731</td>\n", - " <td>botany</td>\n", - " <td>NaN</td>\n", - " <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " id tome rank domain remark \\\n", - "0 abrabeses-0 1 623 geography NaN \n", - "1 accius-0 1 1076 biography NaN \n", - "2 achenbach-2 1 1357 biography NaN \n", - "3 acireale-0 1 1513 geography NaN \n", - "4 actée-0 1 1731 botany NaN \n", - "\n", - " content \n", - "0 ABRABESES. Village d’Espagne de la prov. de Za... \n", - "1 ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po... \n", - "2 ACHENBACH(Henri), administrateur prussien, né ... \n", - "3 ACIREALE. Yille de Sicile, de la province et d... \n", - "4 ACTÉE(Actœa L.). Genre de plantes de la famill... " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_LGE.head()" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(310, 6)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_LGE.shape" ] @@ -474,59 +362,38 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#path = \"drive/MyDrive/Classification-EDdA/\"\n", "path = \"../\"\n", "model_name = \"bert-base-multilingual-cased\"\n", - "#model_name = \"camembert-base\"\n", "model_path = path + \"models/model_\" + model_name + \"_s10000.pt\"" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading Bert Tokenizer...\n" - ] - } - ], + "outputs": [], "source": [ - "if model_name == 'bert-base-multilingual-cased' :\n", - " print('Loading Bert Tokenizer...')\n", - " tokenizer = BertTokenizer.from_pretrained(model_name)\n", - "elif model_name == 'camembert-base':\n", - " print('Loading Camembert Tokenizer...')\n", - " tokenizer = CamembertTokenizer.from_pretrained(model_name)" + "print('Loading Bert Tokenizer...')\n", + "tokenizer = BertTokenizer.from_pretrained(model_name)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Token indices sequence length is longer than the specified maximum sequence length for this model (1204 > 512). Running this sequence through the model will result in indexing errors\n" - ] - } - ], + "outputs": [], "source": [ "data_loader = generate_dataloader(tokenizer, data_LGE)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -535,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -550,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -561,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -570,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -579,714 +446,16 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>id</th>\n", - " <th>tome</th>\n", - " <th>rank</th>\n", - " <th>domain</th>\n", - " <th>remark</th>\n", - " <th>content</th>\n", - " <th>class_bert</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>abrabeses-0</td>\n", - " <td>1</td>\n", - " <td>623</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>accius-0</td>\n", - " <td>1</td>\n", - " <td>1076</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n", - " <td>Belles-lettres - Poésie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>achenbach-2</td>\n", - " <td>1</td>\n", - " <td>1357</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>acireale-0</td>\n", - " <td>1</td>\n", - " <td>1513</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>actée-0</td>\n", - " <td>1</td>\n", - " <td>1731</td>\n", - " <td>botany</td>\n", - " <td>NaN</td>\n", - " <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>adulteration-0</td>\n", - " <td>1</td>\n", - " <td>2197</td>\n", - " <td>NaN</td>\n", - " <td>cross reference</td>\n", - " <td>ADULTERATION. Altération d’un médicament, d’un...</td>\n", - " <td>Chimie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>aérides-0</td>\n", - " <td>1</td>\n", - " <td>2334</td>\n", - " <td>botany</td>\n", - " <td>NaN</td>\n", - " <td>AÉRIDES{Aérides Lour.). Genres de plantes de l...</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>ager-0</td>\n", - " <td>1</td>\n", - " <td>2710</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>AGERouAGERIUS (Nicolaus), médecin alsacien, né...</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>aigu-1</td>\n", - " <td>1</td>\n", - " <td>3160</td>\n", - " <td>NaN</td>\n", - " <td>cross reference</td>\n", - " <td>AIGU1 LH E (V. Raimond d’).\\n</td>\n", - " <td>Marine</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>alavika-0</td>\n", - " <td>1</td>\n", - " <td>3664</td>\n", - " <td>theology</td>\n", - " <td>NaN</td>\n", - " <td>ALAVIKA« qui est d'Alava »(V. ce mot) : Bhikch...</td>\n", - " <td>Religion</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>allassac-0</td>\n", - " <td>2</td>\n", - " <td>755</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ALLASSAC. Com. du dép. de la Corrèze, arr. de ...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>allegretto-0</td>\n", - " <td>2</td>\n", - " <td>786</td>\n", - " <td>NaN</td>\n", - " <td>cross reference</td>\n", - " <td>ALLEGRETTO(V. Allegro).\\n</td>\n", - " <td>Musique</td>\n", - " </tr>\n", - " <tr>\n", - " <th>12</th>\n", - " <td>alleuze-0</td>\n", - " <td>2</td>\n", - " <td>908</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ALLEUZE. Com. du dép. du Cantal, arr. et cant....</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>13</th>\n", - " <td>alliat-0</td>\n", - " <td>2</td>\n", - " <td>933</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ALLIAT. Com. du dép. de l’Ariège, arr. de Foix...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>14</th>\n", - " <td>amanty-0</td>\n", - " <td>2</td>\n", - " <td>1651</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>AMANTY. Corn, du dép. de la Meuse, arr. de Com...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>âmasserah-0</td>\n", - " <td>2</td>\n", - " <td>1701</td>\n", - " <td>geography</td>\n", - " <td>explicit domain</td>\n", - " <td>ÂMASSERAH, AMASR1 ou AMASRAH (Géogr.). Ville d...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>16</th>\n", - " <td>a-118</td>\n", - " <td>2</td>\n", - " <td>2971</td>\n", - " <td>history</td>\n", - " <td>NaN</td>\n", - " <td>AN Cl LIA. Boucliers sacrés des Romains, au no...</td>\n", - " <td>Antiquité</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17</th>\n", - " <td>androclès-0</td>\n", - " <td>2</td>\n", - " <td>3261</td>\n", - " <td>mythology</td>\n", - " <td>explicit domain</td>\n", - " <td>ANDROCLÈS(Myth.), un fils d’Eole qui régna sur...</td>\n", - " <td>Antiquité</td>\n", - " </tr>\n", - " <tr>\n", - " <th>18</th>\n", - " <td>anfouson-0</td>\n", - " <td>2</td>\n", - " <td>3394</td>\n", - " <td>zoology</td>\n", - " <td>NaN</td>\n", - " <td>ANFOUSON. Nom donné à Nice au Néron brun\\n(V. ...</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>19</th>\n", - " <td>anicet-bourgeois-0</td>\n", - " <td>2</td>\n", - " <td>3717</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>ANICET-BOURGEOIS(Auguste Anicet, connu sous le...</td>\n", - " <td>Belles-lettres - Poésie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>20</th>\n", - " <td>anomalistique-0</td>\n", - " <td>3</td>\n", - " <td>238</td>\n", - " <td>astronomy</td>\n", - " <td>explicit domain</td>\n", - " <td>ANOMALISTIQUE(Astron.). On appelle révolution\\...</td>\n", - " <td>Physique - [Sciences physico-mathématiques]</td>\n", - " </tr>\n", - " <tr>\n", - " <th>21</th>\n", - " <td>anostostome-0</td>\n", - " <td>3</td>\n", - " <td>298</td>\n", - " <td>zoology</td>\n", - " <td>NaN</td>\n", - " <td>ANOSTOSTOME(Anostostoma Gray). Genre d’insecte...</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>22</th>\n", - " <td>anthoxanthème-0</td>\n", - " <td>3</td>\n", - " <td>571</td>\n", - " <td>chemistry</td>\n", - " <td>NaN</td>\n", - " <td>ANTHOXANTHÈME. L’un des deux principes coloran...</td>\n", - " <td>Pharmacie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23</th>\n", - " <td>aod-0</td>\n", - " <td>3</td>\n", - " <td>1024</td>\n", - " <td>theology</td>\n", - " <td>NaN</td>\n", - " <td>AOD, plus exactement Ehoud. personnage des com...</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>24</th>\n", - " <td>aphellan-0</td>\n", - " <td>3</td>\n", - " <td>1177</td>\n", - " <td>astronomy</td>\n", - " <td>NaN</td>\n", - " <td>APHELLAN(Astron.). Un des noms de l’étoile a2 ...</td>\n", - " <td>Physique - [Sciences physico-mathématiques]</td>\n", - " </tr>\n", - " <tr>\n", - " <th>25</th>\n", - " <td>appelle-0</td>\n", - " <td>3</td>\n", - " <td>1494</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>APPELLE. Com. du dép. du Tarn, arr. de Lavaux,...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>26</th>\n", - " <td>aragona-1</td>\n", - " <td>3</td>\n", - " <td>1841</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>ARAGONA, cardinal d’origine sicilienne, né en ...</td>\n", - " <td>Religion</td>\n", - " </tr>\n", - " <tr>\n", - " <th>27</th>\n", - " <td>araujuzon-0</td>\n", - " <td>3</td>\n", - " <td>1940</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ARAUJUZON. Com. du dép. des Basses-Pyrénées, a...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>28</th>\n", - " <td>ardant-0</td>\n", - " <td>3</td>\n", - " <td>2421</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>ARDANT(Paul-Joseph), général français, né en 1...</td>\n", - " <td>Militaire (Art) - Guerre - Arme</td>\n", - " </tr>\n", - " <tr>\n", - " <th>29</th>\n", - " <td>ariano-0</td>\n", - " <td>3</td>\n", - " <td>2839</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ARIANOdi Puglia. Ville de la prov. de principa...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>30</th>\n", - " <td>athabaska-0</td>\n", - " <td>4</td>\n", - " <td>1118</td>\n", - " <td>anthropology</td>\n", - " <td>NaN</td>\n", - " <td>ATHABASKA. Col, rivière, lac, territoire et fa...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>31</th>\n", - " <td>aslonnes-0</td>\n", - " <td>4</td>\n", - " <td>446</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>ASLONNES, corn, du dép. de la Vienne, arr. de ...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>32</th>\n", - " <td>astr0rh1za-0</td>\n", - " <td>4</td>\n", - " <td>992</td>\n", - " <td>zoology</td>\n", - " <td>explicit domain</td>\n", - " <td>ASTR0RH1ZA(Zool.).Genre deForaminifèresimperfo...</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>33</th>\n", - " <td>atthidographes-0</td>\n", - " <td>4</td>\n", - " <td>1397</td>\n", - " <td>NaN</td>\n", - " <td>cross reference</td>\n", - " <td>ATTHIDOGRAPHES(V. Atthide).\\n</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>34</th>\n", - " <td>aubery-2</td>\n", - " <td>4</td>\n", - " <td>1577</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>AUBERY(Antoine;, historien français, né le .18...</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>35</th>\n", - " <td>aula-0</td>\n", - " <td>4</td>\n", - " <td>1992</td>\n", - " <td>history</td>\n", - " <td>NaN</td>\n", - " <td>AULA. Mot latin signifiant cour, lieu découver...</td>\n", - " <td>Architecture</td>\n", - " </tr>\n", - " <tr>\n", - " <th>36</th>\n", - " <td>au-113</td>\n", - " <td>4</td>\n", - " <td>2112</td>\n", - " <td>botany</td>\n", - " <td>explicit domain</td>\n", - " <td>AUNÉE (bot.). L'Aunée, Grande Année, Année off...</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>37</th>\n", - " <td>auriol-4</td>\n", - " <td>4</td>\n", - " <td>2224</td>\n", - " <td>NaN</td>\n", - " <td>cross reference</td>\n", - " <td>AURIOL. Nom donné à Marseille au Maquereau (V....</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>38</th>\n", - " <td>ave-lalleniant-0</td>\n", - " <td>4</td>\n", - " <td>2739</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>AVE-LALLENIANT(Robert-Christian-Barthold), méd...</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>39</th>\n", - " <td>badin-2</td>\n", - " <td>4</td>\n", - " <td>3857</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>BADIN(Pierre-Adolphe), peintre français, né à ...</td>\n", - " <td>Arts et métiers</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40</th>\n", - " <td>baizieux-0</td>\n", - " <td>5</td>\n", - " <td>133</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>BAIZIEUX(Bacium, Basium). Com. du dép. de la\\n...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>41</th>\n", - " <td>balsam1te-0</td>\n", - " <td>5</td>\n", - " <td>677</td>\n", - " <td>botany</td>\n", - " <td>explicit domain</td>\n", - " <td>BALSAM1TE(Bot.) (Balsamita Desf.). Genre de Co...</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>42</th>\n", - " <td>balze-0</td>\n", - " <td>5</td>\n", - " <td>757</td>\n", - " <td>navy</td>\n", - " <td>explicit domain</td>\n", - " <td>BALZE(Mar.). Radeau delà côte occidentale de l...</td>\n", - " <td>Marine</td>\n", - " </tr>\n", - " <tr>\n", - " <th>43</th>\n", - " <td>bande-2</td>\n", - " <td>5</td>\n", - " <td>880</td>\n", - " <td>history</td>\n", - " <td>NaN</td>\n", - " <td>BANDE(Ordre delà ) ou de l’ECHARPE.Ordre milita...</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>44</th>\n", - " <td>barbosa-5</td>\n", - " <td>5</td>\n", - " <td>1580</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>BARBOSA(Antonio), jésuite et orientaliste port...</td>\n", - " <td>Religion</td>\n", - " </tr>\n", - " <tr>\n", - " <th>45</th>\n", - " <td>bati-0</td>\n", - " <td>5</td>\n", - " <td>2955</td>\n", - " <td>architecture</td>\n", - " <td>NaN</td>\n", - " <td>BATIÈRE. Toit en forme de bât se terminant à c...</td>\n", - " <td>Architecture</td>\n", - " </tr>\n", - " <tr>\n", - " <th>46</th>\n", - " <td>baveuse-0</td>\n", - " <td>5</td>\n", - " <td>3457</td>\n", - " <td>zoology</td>\n", - " <td>explicit domain</td>\n", - " <td>BAVEUSE(Zool.). Nom vulgaire par lequel les\\np...</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>47</th>\n", - " <td>beard-2</td>\n", - " <td>5</td>\n", - " <td>3728</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>BEARD(James-Henry), peintre américain contempo...</td>\n", - " <td>Beaux-arts</td>\n", - " </tr>\n", - " <tr>\n", - " <th>48</th>\n", - " <td>beaufort-4</td>\n", - " <td>5</td>\n", - " <td>3838</td>\n", - " <td>geography</td>\n", - " <td>NaN</td>\n", - " <td>BEAUFORT. Com. du dép. de la Meuse, arr. de Mo...</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>49</th>\n", - " <td>beaumont-26</td>\n", - " <td>5</td>\n", - " <td>4018</td>\n", - " <td>biography</td>\n", - " <td>NaN</td>\n", - " <td>BEAUMONT(J.-G. Leprevôt de), secrétaire du cle...</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " id tome rank domain remark \\\n", - "0 abrabeses-0 1 623 geography NaN \n", - "1 accius-0 1 1076 biography NaN \n", - "2 achenbach-2 1 1357 biography NaN \n", - "3 acireale-0 1 1513 geography NaN \n", - "4 actée-0 1 1731 botany NaN \n", - "5 adulteration-0 1 2197 NaN cross reference \n", - "6 aérides-0 1 2334 botany NaN \n", - "7 ager-0 1 2710 biography NaN \n", - "8 aigu-1 1 3160 NaN cross reference \n", - "9 alavika-0 1 3664 theology NaN \n", - "10 allassac-0 2 755 geography NaN \n", - "11 allegretto-0 2 786 NaN cross reference \n", - "12 alleuze-0 2 908 geography NaN \n", - "13 alliat-0 2 933 geography NaN \n", - "14 amanty-0 2 1651 geography NaN \n", - "15 âmasserah-0 2 1701 geography explicit domain \n", - "16 a-118 2 2971 history NaN \n", - "17 androclès-0 2 3261 mythology explicit domain \n", - "18 anfouson-0 2 3394 zoology NaN \n", - "19 anicet-bourgeois-0 2 3717 biography NaN \n", - "20 anomalistique-0 3 238 astronomy explicit domain \n", - "21 anostostome-0 3 298 zoology NaN \n", - "22 anthoxanthème-0 3 571 chemistry NaN \n", - "23 aod-0 3 1024 theology NaN \n", - "24 aphellan-0 3 1177 astronomy NaN \n", - "25 appelle-0 3 1494 geography NaN \n", - "26 aragona-1 3 1841 biography NaN \n", - "27 araujuzon-0 3 1940 geography NaN \n", - "28 ardant-0 3 2421 biography NaN \n", - "29 ariano-0 3 2839 geography NaN \n", - "30 athabaska-0 4 1118 anthropology NaN \n", - "31 aslonnes-0 4 446 geography NaN \n", - "32 astr0rh1za-0 4 992 zoology explicit domain \n", - "33 atthidographes-0 4 1397 NaN cross reference \n", - "34 aubery-2 4 1577 biography NaN \n", - "35 aula-0 4 1992 history NaN \n", - "36 au-113 4 2112 botany explicit domain \n", - "37 auriol-4 4 2224 NaN cross reference \n", - "38 ave-lalleniant-0 4 2739 biography NaN \n", - "39 badin-2 4 3857 biography NaN \n", - "40 baizieux-0 5 133 geography NaN \n", - "41 balsam1te-0 5 677 botany explicit domain \n", - "42 balze-0 5 757 navy explicit domain \n", - "43 bande-2 5 880 history NaN \n", - "44 barbosa-5 5 1580 biography NaN \n", - "45 bati-0 5 2955 architecture NaN \n", - "46 baveuse-0 5 3457 zoology explicit domain \n", - "47 beard-2 5 3728 biography NaN \n", - "48 beaufort-4 5 3838 geography NaN \n", - "49 beaumont-26 5 4018 biography NaN \n", - "\n", - " content \\\n", - "0 ABRABESES. Village d’Espagne de la prov. de Za... \n", - "1 ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po... \n", - "2 ACHENBACH(Henri), administrateur prussien, né ... \n", - "3 ACIREALE. Yille de Sicile, de la province et d... \n", - "4 ACTÉE(Actœa L.). Genre de plantes de la famill... \n", - "5 ADULTERATION. Altération d’un médicament, d’un... \n", - "6 AÉRIDES{Aérides Lour.). Genres de plantes de l... \n", - "7 AGERouAGERIUS (Nicolaus), médecin alsacien, né... \n", - "8 AIGU1 LH E (V. Raimond d’).\\n \n", - "9 ALAVIKA« qui est d'Alava »(V. ce mot) : Bhikch... \n", - "10 ALLASSAC. Com. du dép. de la Corrèze, arr. de ... \n", - "11 ALLEGRETTO(V. Allegro).\\n \n", - "12 ALLEUZE. Com. du dép. du Cantal, arr. et cant.... \n", - "13 ALLIAT. Com. du dép. de l’Ariège, arr. de Foix... \n", - "14 AMANTY. Corn, du dép. de la Meuse, arr. de Com... \n", - "15 ÂMASSERAH, AMASR1 ou AMASRAH (Géogr.). Ville d... \n", - "16 AN Cl LIA. Boucliers sacrés des Romains, au no... \n", - "17 ANDROCLÈS(Myth.), un fils d’Eole qui régna sur... \n", - "18 ANFOUSON. Nom donné à Nice au Néron brun\\n(V. ... \n", - "19 ANICET-BOURGEOIS(Auguste Anicet, connu sous le... \n", - "20 ANOMALISTIQUE(Astron.). On appelle révolution\\... \n", - "21 ANOSTOSTOME(Anostostoma Gray). Genre d’insecte... \n", - "22 ANTHOXANTHÈME. L’un des deux principes coloran... \n", - "23 AOD, plus exactement Ehoud. personnage des com... \n", - "24 APHELLAN(Astron.). Un des noms de l’étoile a2 ... \n", - "25 APPELLE. Com. du dép. du Tarn, arr. de Lavaux,... \n", - "26 ARAGONA, cardinal d’origine sicilienne, né en ... \n", - "27 ARAUJUZON. Com. du dép. des Basses-Pyrénées, a... \n", - "28 ARDANT(Paul-Joseph), général français, né en 1... \n", - "29 ARIANOdi Puglia. Ville de la prov. de principa... \n", - "30 ATHABASKA. Col, rivière, lac, territoire et fa... \n", - "31 ASLONNES, corn, du dép. de la Vienne, arr. de ... \n", - "32 ASTR0RH1ZA(Zool.).Genre deForaminifèresimperfo... \n", - "33 ATTHIDOGRAPHES(V. Atthide).\\n \n", - "34 AUBERY(Antoine;, historien français, né le .18... \n", - "35 AULA. Mot latin signifiant cour, lieu découver... \n", - "36 AUNÉE (bot.). L'Aunée, Grande Année, Année off... \n", - "37 AURIOL. Nom donné à Marseille au Maquereau (V.... \n", - "38 AVE-LALLENIANT(Robert-Christian-Barthold), méd... \n", - "39 BADIN(Pierre-Adolphe), peintre français, né à ... \n", - "40 BAIZIEUX(Bacium, Basium). Com. du dép. de la\\n... \n", - "41 BALSAM1TE(Bot.) (Balsamita Desf.). Genre de Co... \n", - "42 BALZE(Mar.). Radeau delà côte occidentale de l... \n", - "43 BANDE(Ordre delà ) ou de l’ECHARPE.Ordre milita... \n", - "44 BARBOSA(Antonio), jésuite et orientaliste port... \n", - "45 BATIÈRE. Toit en forme de bât se terminant à c... \n", - "46 BAVEUSE(Zool.). Nom vulgaire par lequel les\\np... \n", - "47 BEARD(James-Henry), peintre américain contempo... \n", - "48 BEAUFORT. Com. du dép. de la Meuse, arr. de Mo... \n", - "49 BEAUMONT(J.-G. Leprevôt de), secrétaire du cle... \n", - "\n", - " class_bert \n", - "0 Géographie \n", - "1 Belles-lettres - Poésie \n", - "2 Histoire \n", - "3 Géographie \n", - "4 Histoire naturelle \n", - "5 Chimie \n", - "6 Histoire naturelle \n", - "7 Histoire \n", - "8 Marine \n", - "9 Religion \n", - "10 Géographie \n", - "11 Musique \n", - "12 Géographie \n", - "13 Géographie \n", - "14 Géographie \n", - "15 Géographie \n", - "16 Antiquité \n", - "17 Antiquité \n", - "18 Histoire naturelle \n", - "19 Belles-lettres - Poésie \n", - "20 Physique - [Sciences physico-mathématiques] \n", - "21 Histoire naturelle \n", - "22 Pharmacie \n", - "23 Histoire \n", - "24 Physique - [Sciences physico-mathématiques] \n", - "25 Géographie \n", - "26 Religion \n", - "27 Géographie \n", - "28 Militaire (Art) - Guerre - Arme \n", - "29 Géographie \n", - "30 Géographie \n", - "31 Géographie \n", - "32 Histoire naturelle \n", - "33 Géographie \n", - "34 Histoire \n", - "35 Architecture \n", - "36 Histoire naturelle \n", - "37 Histoire naturelle \n", - "38 Histoire \n", - "39 Arts et métiers \n", - "40 Géographie \n", - "41 Histoire naturelle \n", - "42 Marine \n", - "43 Histoire \n", - "44 Religion \n", - "45 Architecture \n", - "46 Histoire naturelle \n", - "47 Beaux-arts \n", - "48 Géographie \n", - "49 Histoire " - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_LGE.head(50)" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1315,7 +484,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3.8.5", + "display_name": "Python 3.9.13 ('geode-classification-py39')", "language": "python", "name": "python3" }, @@ -1329,11 +498,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.13" }, "vscode": { "interpreter": { - "hash": "5a66862d1e699d22749b730d4d12326d6986b018faa2bf0b5fca0506fffc064f" + "hash": "16fac9c2d845f8e1f8c6fffffe3d3a0def61c7e42da17a08d00f279ad4dea797" } }, "widgets": { -- GitLab