diff --git a/notebooks/Predict.ipynb b/notebooks/Predict.ipynb index 76e7e0ac071aed40627043e8e26de73460c8d9b4..371c8083d7797ce2795f0742909ec6a5701db9a3 100644 --- a/notebooks/Predict.ipynb +++ b/notebooks/Predict.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#\n", + "# BERT Predict classification\n", "\n", "## 1. Setup the environment\n", "\n", @@ -148,7 +148,7 @@ "import pandas as pd \n", "import numpy as np\n", "\n", - "from transformers import BertTokenizer, BertForSequenceClassification, BertConfig, CamembertTokenizer, CamembertForSequenceClassification\n", + "from transformers import BertTokenizer, BertForSequenceClassification, CamembertTokenizer, CamembertForSequenceClassification\n", "from torch.utils.data import TensorDataset, DataLoader, SequentialSampler" ] }, @@ -880,7 +880,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -964,43 +964,642 @@ " <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n", " <td>Histoire naturelle</td>\n", " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>adulteration-0</td>\n", + " <td>1</td>\n", + " <td>2197</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>ADULTERATION. Altération d’un médicament, d’un...</td>\n", + " <td>Chimie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>aérides-0</td>\n", + " <td>1</td>\n", + " <td>2334</td>\n", + " <td>botany</td>\n", + " <td>NaN</td>\n", + " <td>AÉRIDES{Aérides Lour.). Genres de plantes de l...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>ager-0</td>\n", + " <td>1</td>\n", + " <td>2710</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>AGERouAGERIUS (Nicolaus), médecin alsacien, né...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>aigu-1</td>\n", + " <td>1</td>\n", + " <td>3160</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>AIGU1 LH E (V. Raimond d’).\\n</td>\n", + " <td>Marine</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>alavika-0</td>\n", + " <td>1</td>\n", + " <td>3664</td>\n", + " <td>theology</td>\n", + " <td>NaN</td>\n", + " <td>ALAVIKA« qui est d'Alava »(V. ce mot) : Bhikch...</td>\n", + " <td>Religion</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>allassac-0</td>\n", + " <td>2</td>\n", + " <td>755</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ALLASSAC. Com. du dép. de la Corrèze, arr. de ...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>allegretto-0</td>\n", + " <td>2</td>\n", + " <td>786</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>ALLEGRETTO(V. Allegro).\\n</td>\n", + " <td>Musique</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>alleuze-0</td>\n", + " <td>2</td>\n", + " <td>908</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ALLEUZE. Com. du dép. du Cantal, arr. et cant....</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>alliat-0</td>\n", + " <td>2</td>\n", + " <td>933</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ALLIAT. Com. du dép. de l’Ariège, arr. de Foix...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>amanty-0</td>\n", + " <td>2</td>\n", + " <td>1651</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>AMANTY. Corn, du dép. de la Meuse, arr. de Com...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>âmasserah-0</td>\n", + " <td>2</td>\n", + " <td>1701</td>\n", + " <td>geography</td>\n", + " <td>explicit domain</td>\n", + " <td>ÂMASSERAH, AMASR1 ou AMASRAH (Géogr.). Ville d...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>a-118</td>\n", + " <td>2</td>\n", + " <td>2971</td>\n", + " <td>history</td>\n", + " <td>NaN</td>\n", + " <td>AN Cl LIA. Boucliers sacrés des Romains, au no...</td>\n", + " <td>Antiquité</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>androclès-0</td>\n", + " <td>2</td>\n", + " <td>3261</td>\n", + " <td>mythology</td>\n", + " <td>explicit domain</td>\n", + " <td>ANDROCLÈS(Myth.), un fils d’Eole qui régna sur...</td>\n", + " <td>Antiquité</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>anfouson-0</td>\n", + " <td>2</td>\n", + " <td>3394</td>\n", + " <td>zoology</td>\n", + " <td>NaN</td>\n", + " <td>ANFOUSON. Nom donné à Nice au Néron brun\\n(V. ...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>anicet-bourgeois-0</td>\n", + " <td>2</td>\n", + " <td>3717</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ANICET-BOURGEOIS(Auguste Anicet, connu sous le...</td>\n", + " <td>Belles-lettres - Poésie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>anomalistique-0</td>\n", + " <td>3</td>\n", + " <td>238</td>\n", + " <td>astronomy</td>\n", + " <td>explicit domain</td>\n", + " <td>ANOMALISTIQUE(Astron.). On appelle révolution\\...</td>\n", + " <td>Physique - [Sciences physico-mathématiques]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>anostostome-0</td>\n", + " <td>3</td>\n", + " <td>298</td>\n", + " <td>zoology</td>\n", + " <td>NaN</td>\n", + " <td>ANOSTOSTOME(Anostostoma Gray). Genre d’insecte...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>anthoxanthème-0</td>\n", + " <td>3</td>\n", + " <td>571</td>\n", + " <td>chemistry</td>\n", + " <td>NaN</td>\n", + " <td>ANTHOXANTHÈME. L’un des deux principes coloran...</td>\n", + " <td>Pharmacie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>aod-0</td>\n", + " <td>3</td>\n", + " <td>1024</td>\n", + " <td>theology</td>\n", + " <td>NaN</td>\n", + " <td>AOD, plus exactement Ehoud. personnage des com...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>aphellan-0</td>\n", + " <td>3</td>\n", + " <td>1177</td>\n", + " <td>astronomy</td>\n", + " <td>NaN</td>\n", + " <td>APHELLAN(Astron.). Un des noms de l’étoile a2 ...</td>\n", + " <td>Physique - [Sciences physico-mathématiques]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>appelle-0</td>\n", + " <td>3</td>\n", + " <td>1494</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>APPELLE. Com. du dép. du Tarn, arr. de Lavaux,...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>aragona-1</td>\n", + " <td>3</td>\n", + " <td>1841</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ARAGONA, cardinal d’origine sicilienne, né en ...</td>\n", + " <td>Religion</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>araujuzon-0</td>\n", + " <td>3</td>\n", + " <td>1940</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ARAUJUZON. Com. du dép. des Basses-Pyrénées, a...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>ardant-0</td>\n", + " <td>3</td>\n", + " <td>2421</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ARDANT(Paul-Joseph), général français, né en 1...</td>\n", + " <td>Militaire (Art) - Guerre - Arme</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>ariano-0</td>\n", + " <td>3</td>\n", + " <td>2839</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ARIANOdi Puglia. Ville de la prov. de principa...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>athabaska-0</td>\n", + " <td>4</td>\n", + " <td>1118</td>\n", + " <td>anthropology</td>\n", + " <td>NaN</td>\n", + " <td>ATHABASKA. Col, rivière, lac, territoire et fa...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>aslonnes-0</td>\n", + " <td>4</td>\n", + " <td>446</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ASLONNES, corn, du dép. de la Vienne, arr. de ...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>astr0rh1za-0</td>\n", + " <td>4</td>\n", + " <td>992</td>\n", + " <td>zoology</td>\n", + " <td>explicit domain</td>\n", + " <td>ASTR0RH1ZA(Zool.).Genre deForaminifèresimperfo...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>atthidographes-0</td>\n", + " <td>4</td>\n", + " <td>1397</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>ATTHIDOGRAPHES(V. Atthide).\\n</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>aubery-2</td>\n", + " <td>4</td>\n", + " <td>1577</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>AUBERY(Antoine;, historien français, né le .18...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>aula-0</td>\n", + " <td>4</td>\n", + " <td>1992</td>\n", + " <td>history</td>\n", + " <td>NaN</td>\n", + " <td>AULA. Mot latin signifiant cour, lieu découver...</td>\n", + " <td>Architecture</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>au-113</td>\n", + " <td>4</td>\n", + " <td>2112</td>\n", + " <td>botany</td>\n", + " <td>explicit domain</td>\n", + " <td>AUNÉE (bot.). L'Aunée, Grande Année, Année off...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>auriol-4</td>\n", + " <td>4</td>\n", + " <td>2224</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>AURIOL. Nom donné à Marseille au Maquereau (V....</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>ave-lalleniant-0</td>\n", + " <td>4</td>\n", + " <td>2739</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>AVE-LALLENIANT(Robert-Christian-Barthold), méd...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>badin-2</td>\n", + " <td>4</td>\n", + " <td>3857</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>BADIN(Pierre-Adolphe), peintre français, né à ...</td>\n", + " <td>Arts et métiers</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>baizieux-0</td>\n", + " <td>5</td>\n", + " <td>133</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>BAIZIEUX(Bacium, Basium). Com. du dép. de la\\n...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>balsam1te-0</td>\n", + " <td>5</td>\n", + " <td>677</td>\n", + " <td>botany</td>\n", + " <td>explicit domain</td>\n", + " <td>BALSAM1TE(Bot.) (Balsamita Desf.). Genre de Co...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td>balze-0</td>\n", + " <td>5</td>\n", + " <td>757</td>\n", + " <td>navy</td>\n", + " <td>explicit domain</td>\n", + " <td>BALZE(Mar.). Radeau delà côte occidentale de l...</td>\n", + " <td>Marine</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td>bande-2</td>\n", + " <td>5</td>\n", + " <td>880</td>\n", + " <td>history</td>\n", + " <td>NaN</td>\n", + " <td>BANDE(Ordre delà ) ou de l’ECHARPE.Ordre milita...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td>barbosa-5</td>\n", + " <td>5</td>\n", + " <td>1580</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>BARBOSA(Antonio), jésuite et orientaliste port...</td>\n", + " <td>Religion</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>bati-0</td>\n", + " <td>5</td>\n", + " <td>2955</td>\n", + " <td>architecture</td>\n", + " <td>NaN</td>\n", + " <td>BATIÈRE. Toit en forme de bât se terminant à c...</td>\n", + " <td>Architecture</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td>baveuse-0</td>\n", + " <td>5</td>\n", + " <td>3457</td>\n", + " <td>zoology</td>\n", + " <td>explicit domain</td>\n", + " <td>BAVEUSE(Zool.). Nom vulgaire par lequel les\\np...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>beard-2</td>\n", + " <td>5</td>\n", + " <td>3728</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>BEARD(James-Henry), peintre américain contempo...</td>\n", + " <td>Beaux-arts</td>\n", + " </tr>\n", + " <tr>\n", + " <th>48</th>\n", + " <td>beaufort-4</td>\n", + " <td>5</td>\n", + " <td>3838</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>BEAUFORT. Com. du dép. de la Meuse, arr. de Mo...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49</th>\n", + " <td>beaumont-26</td>\n", + " <td>5</td>\n", + " <td>4018</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>BEAUMONT(J.-G. Leprevôt de), secrétaire du cle...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " id tome rank domain remark \\\n", - "0 abrabeses-0 1 623 geography NaN \n", - "1 accius-0 1 1076 biography NaN \n", - "2 achenbach-2 1 1357 biography NaN \n", - "3 acireale-0 1 1513 geography NaN \n", - "4 actée-0 1 1731 botany NaN \n", + " id tome rank domain remark \\\n", + "0 abrabeses-0 1 623 geography NaN \n", + "1 accius-0 1 1076 biography NaN \n", + "2 achenbach-2 1 1357 biography NaN \n", + "3 acireale-0 1 1513 geography NaN \n", + "4 actée-0 1 1731 botany NaN \n", + "5 adulteration-0 1 2197 NaN cross reference \n", + "6 aérides-0 1 2334 botany NaN \n", + "7 ager-0 1 2710 biography NaN \n", + "8 aigu-1 1 3160 NaN cross reference \n", + "9 alavika-0 1 3664 theology NaN \n", + "10 allassac-0 2 755 geography NaN \n", + "11 allegretto-0 2 786 NaN cross reference \n", + "12 alleuze-0 2 908 geography NaN \n", + "13 alliat-0 2 933 geography NaN \n", + "14 amanty-0 2 1651 geography NaN \n", + "15 âmasserah-0 2 1701 geography explicit domain \n", + "16 a-118 2 2971 history NaN \n", + "17 androclès-0 2 3261 mythology explicit domain \n", + "18 anfouson-0 2 3394 zoology NaN \n", + "19 anicet-bourgeois-0 2 3717 biography NaN \n", + "20 anomalistique-0 3 238 astronomy explicit domain \n", + "21 anostostome-0 3 298 zoology NaN \n", + "22 anthoxanthème-0 3 571 chemistry NaN \n", + "23 aod-0 3 1024 theology NaN \n", + "24 aphellan-0 3 1177 astronomy NaN \n", + "25 appelle-0 3 1494 geography NaN \n", + "26 aragona-1 3 1841 biography NaN \n", + "27 araujuzon-0 3 1940 geography NaN \n", + "28 ardant-0 3 2421 biography NaN \n", + "29 ariano-0 3 2839 geography NaN \n", + "30 athabaska-0 4 1118 anthropology NaN \n", + "31 aslonnes-0 4 446 geography NaN \n", + "32 astr0rh1za-0 4 992 zoology explicit domain \n", + "33 atthidographes-0 4 1397 NaN cross reference \n", + "34 aubery-2 4 1577 biography NaN \n", + "35 aula-0 4 1992 history NaN \n", + "36 au-113 4 2112 botany explicit domain \n", + "37 auriol-4 4 2224 NaN cross reference \n", + "38 ave-lalleniant-0 4 2739 biography NaN \n", + "39 badin-2 4 3857 biography NaN \n", + "40 baizieux-0 5 133 geography NaN \n", + "41 balsam1te-0 5 677 botany explicit domain \n", + "42 balze-0 5 757 navy explicit domain \n", + "43 bande-2 5 880 history NaN \n", + "44 barbosa-5 5 1580 biography NaN \n", + "45 bati-0 5 2955 architecture NaN \n", + "46 baveuse-0 5 3457 zoology explicit domain \n", + "47 beard-2 5 3728 biography NaN \n", + "48 beaufort-4 5 3838 geography NaN \n", + "49 beaumont-26 5 4018 biography NaN \n", "\n", - " content class_bert \n", - "0 ABRABESES. Village d’Espagne de la prov. de Za... Géographie \n", - "1 ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po... Belles-lettres - Poésie \n", - "2 ACHENBACH(Henri), administrateur prussien, né ... Histoire \n", - "3 ACIREALE. Yille de Sicile, de la province et d... Géographie \n", - "4 ACTÉE(Actœa L.). Genre de plantes de la famill... Histoire naturelle " + " content \\\n", + "0 ABRABESES. Village d’Espagne de la prov. de Za... \n", + "1 ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po... \n", + "2 ACHENBACH(Henri), administrateur prussien, né ... \n", + "3 ACIREALE. Yille de Sicile, de la province et d... \n", + "4 ACTÉE(Actœa L.). Genre de plantes de la famill... \n", + "5 ADULTERATION. Altération d’un médicament, d’un... \n", + "6 AÉRIDES{Aérides Lour.). Genres de plantes de l... \n", + "7 AGERouAGERIUS (Nicolaus), médecin alsacien, né... \n", + "8 AIGU1 LH E (V. Raimond d’).\\n \n", + "9 ALAVIKA« qui est d'Alava »(V. ce mot) : Bhikch... \n", + "10 ALLASSAC. Com. du dép. de la Corrèze, arr. de ... \n", + "11 ALLEGRETTO(V. Allegro).\\n \n", + "12 ALLEUZE. Com. du dép. du Cantal, arr. et cant.... \n", + "13 ALLIAT. Com. du dép. de l’Ariège, arr. de Foix... \n", + "14 AMANTY. Corn, du dép. de la Meuse, arr. de Com... \n", + "15 ÂMASSERAH, AMASR1 ou AMASRAH (Géogr.). Ville d... \n", + "16 AN Cl LIA. Boucliers sacrés des Romains, au no... \n", + "17 ANDROCLÈS(Myth.), un fils d’Eole qui régna sur... \n", + "18 ANFOUSON. Nom donné à Nice au Néron brun\\n(V. ... \n", + "19 ANICET-BOURGEOIS(Auguste Anicet, connu sous le... \n", + "20 ANOMALISTIQUE(Astron.). On appelle révolution\\... \n", + "21 ANOSTOSTOME(Anostostoma Gray). Genre d’insecte... \n", + "22 ANTHOXANTHÈME. L’un des deux principes coloran... \n", + "23 AOD, plus exactement Ehoud. personnage des com... \n", + "24 APHELLAN(Astron.). Un des noms de l’étoile a2 ... \n", + "25 APPELLE. Com. du dép. du Tarn, arr. de Lavaux,... \n", + "26 ARAGONA, cardinal d’origine sicilienne, né en ... \n", + "27 ARAUJUZON. Com. du dép. des Basses-Pyrénées, a... \n", + "28 ARDANT(Paul-Joseph), général français, né en 1... \n", + "29 ARIANOdi Puglia. Ville de la prov. de principa... \n", + "30 ATHABASKA. Col, rivière, lac, territoire et fa... \n", + "31 ASLONNES, corn, du dép. de la Vienne, arr. de ... \n", + "32 ASTR0RH1ZA(Zool.).Genre deForaminifèresimperfo... \n", + "33 ATTHIDOGRAPHES(V. Atthide).\\n \n", + "34 AUBERY(Antoine;, historien français, né le .18... \n", + "35 AULA. Mot latin signifiant cour, lieu découver... \n", + "36 AUNÉE (bot.). L'Aunée, Grande Année, Année off... \n", + "37 AURIOL. Nom donné à Marseille au Maquereau (V.... \n", + "38 AVE-LALLENIANT(Robert-Christian-Barthold), méd... \n", + "39 BADIN(Pierre-Adolphe), peintre français, né à ... \n", + "40 BAIZIEUX(Bacium, Basium). Com. du dép. de la\\n... \n", + "41 BALSAM1TE(Bot.) (Balsamita Desf.). Genre de Co... \n", + "42 BALZE(Mar.). Radeau delà côte occidentale de l... \n", + "43 BANDE(Ordre delà ) ou de l’ECHARPE.Ordre milita... \n", + "44 BARBOSA(Antonio), jésuite et orientaliste port... \n", + "45 BATIÈRE. Toit en forme de bât se terminant à c... \n", + "46 BAVEUSE(Zool.). Nom vulgaire par lequel les\\np... \n", + "47 BEARD(James-Henry), peintre américain contempo... \n", + "48 BEAUFORT. Com. du dép. de la Meuse, arr. de Mo... \n", + "49 BEAUMONT(J.-G. Leprevôt de), secrétaire du cle... \n", + "\n", + " class_bert \n", + "0 Géographie \n", + "1 Belles-lettres - Poésie \n", + "2 Histoire \n", + "3 Géographie \n", + "4 Histoire naturelle \n", + "5 Chimie \n", + "6 Histoire naturelle \n", + "7 Histoire \n", + "8 Marine \n", + "9 Religion \n", + "10 Géographie \n", + "11 Musique \n", + "12 Géographie \n", + "13 Géographie \n", + "14 Géographie \n", + "15 Géographie \n", + "16 Antiquité \n", + "17 Antiquité \n", + "18 Histoire naturelle \n", + "19 Belles-lettres - Poésie \n", + "20 Physique - [Sciences physico-mathématiques] \n", + "21 Histoire naturelle \n", + "22 Pharmacie \n", + "23 Histoire \n", + "24 Physique - [Sciences physico-mathématiques] \n", + "25 Géographie \n", + "26 Religion \n", + "27 Géographie \n", + "28 Militaire (Art) - Guerre - Arme \n", + "29 Géographie \n", + "30 Géographie \n", + "31 Géographie \n", + "32 Histoire naturelle \n", + "33 Géographie \n", + "34 Histoire \n", + "35 Architecture \n", + "36 Histoire naturelle \n", + "37 Histoire naturelle \n", + "38 Histoire \n", + "39 Arts et métiers \n", + "40 Géographie \n", + "41 Histoire naturelle \n", + "42 Marine \n", + "43 Histoire \n", + "44 Religion \n", + "45 Architecture \n", + "46 Histoire naturelle \n", + "47 Beaux-arts \n", + "48 Géographie \n", + "49 Histoire " ] }, - "execution_count": 25, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_LGE.head()" + "df_LGE.head(50)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "df_LGE.to_csv(path + \"reports/classification_LGE.tsv\", sep=\"\\t\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {