From c33bee9cde56ea9fc00a75acae1640802974a08a Mon Sep 17 00:00:00 2001
From: lmoncla <moncla.ludovic@gmail.com>
Date: Thu, 6 Jan 2022 10:38:26 +0100
Subject: [PATCH] add notebooks

---
 .gitignore                                    |    1 +
 notebooks/CorpusTEI_EDdA_to_dataframe.ipynb   | 5646 +++++++++++++++++
 .../EDdA_Classification_BertFineTuning.ipynb  | 4421 +++++++++++++
 .../EDdA_Classification_ClassicModels.ipynb   |  861 +++
 .../EDdA_Classification_DeepLearning.ipynb    | 1351 ++++
 .../EDdA_Classification_DeepLearning_2.ipynb  | 1349 ++++
 ...ssification_Generate_ConfusionMatrix.ipynb | 1181 ++++
 7 files changed, 14810 insertions(+)
 create mode 100644 notebooks/CorpusTEI_EDdA_to_dataframe.ipynb
 create mode 100644 notebooks/EDdA_Classification_BertFineTuning.ipynb
 create mode 100644 notebooks/EDdA_Classification_ClassicModels.ipynb
 create mode 100644 notebooks/EDdA_Classification_DeepLearning.ipynb
 create mode 100644 notebooks/EDdA_Classification_DeepLearning_2.ipynb
 create mode 100644 notebooks/EDdA_Classification_Generate_ConfusionMatrix.ipynb

diff --git a/.gitignore b/.gitignore
index 71a9a39..03156e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ dataframe_with_domaine_enccre.csv
 dataframe_with_normClass_artfl.csv
 *.pkl
 .DS_Store
+.DS_Store
diff --git a/notebooks/CorpusTEI_EDdA_to_dataframe.ipynb b/notebooks/CorpusTEI_EDdA_to_dataframe.ipynb
new file mode 100644
index 0000000..a1321a2
--- /dev/null
+++ b/notebooks/CorpusTEI_EDdA_to_dataframe.ipynb
@@ -0,0 +1,5646 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "metallic-shelf",
+   "metadata": {},
+   "source": [
+    "# Préparation du corpus EDdA pour la classification en domaine"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "designing-advice",
+   "metadata": {},
+   "source": [
+    "## Preparing data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "floppy-fleet",
+   "metadata": {},
+   "source": [
+    "### Import des librairies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "appreciated-victim",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from bs4 import BeautifulSoup\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "import urllib, json\n",
+    "from urllib.request import urlopen"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "framed-fossil",
+   "metadata": {},
+   "source": [
+    "### Parsing des articles TEI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "suburban-honduras",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_path = \"/Users/lmoncla/Documents/Data/Corpus/EDDA/Alice/EDdA/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "scenic-vermont",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Volume :  11\n",
+      "Volume :  16\n",
+      "Volume :  17\n",
+      "Volume :  10\n",
+      "Volume :  5\n",
+      "Volume :  2\n",
+      "Volume :  3\n",
+      "Volume :  4\n",
+      "Volume :  15\n",
+      "Volume :  12\n",
+      "Volume :  13\n",
+      "Volume :  14\n",
+      "Volume :  1\n",
+      "Volume :  6\n",
+      "Volume :  8\n",
+      "Volume :  9\n",
+      "Volume :  7\n"
+     ]
+    }
+   ],
+   "source": [
+    "# récupération dans une liste des métadonnées (volume, numéro, nom de l'article, classe et auteur) à partir des fichiers TEI\n",
+    "data = []\n",
+    "\n",
+    "for tome in os.listdir(input_path):\n",
+    "    volume = tome[1:]\n",
+    "    print(\"Volume : \", volume)\n",
+    "    \n",
+    "    for article in os.listdir(input_path + tome +\"/\"):\n",
+    "        #print(\"Article : \", article[7:-4])\n",
+    "        numero = article[7:-4]\n",
+    "        extension = article[-4:]\n",
+    "        if extension == '.tei':\n",
+    "\n",
+    "            try:\n",
+    "                soup = BeautifulSoup(open(input_path+tome+\"/\"+article))\n",
+    "\n",
+    "                head = soup.find(type=\"head\")\n",
+    "                author = soup.find(type=\"author\")\n",
+    "                normclass = soup.find(type=\"normclass\")\n",
+    "                classEDdA = soup.find(type=\"class\")\n",
+    "                \n",
+    "                #print(volume, numero, head.get('value'), normclass.get('value'), author.get('value'))\n",
+    "                data.append([int(volume), int(numero), head.get('value').strip(), normclass.get('value').strip(), classEDdA.get('value').strip(), author.get('value').strip()])\n",
+    "            \n",
+    "            except AttributeError as e:\n",
+    "                #print('Volume : ', volume, ' Numéro : ', numero)\n",
+    "                #print('Error : ' + str(e))\n",
+    "                pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "excess-waterproof",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# transformation de la liste en dataframe\n",
+    "df = pd.DataFrame(data, columns=['volume', 'numero', 'head', 'normClass', 'classEDdA', 'author'])\n",
+    "df = df.sort_values(['volume', 'numero']).reset_index(drop = True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "blocked-reading",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>45529</th>\n",
+       "      <td>11</td>\n",
+       "      <td>2501</td>\n",
+       "      <td>OPICIENS, les</td>\n",
+       "      <td>Géographie ancienne</td>\n",
+       "      <td>Géog. anc.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>63464</th>\n",
+       "      <td>15</td>\n",
+       "      <td>1971</td>\n",
+       "      <td>SOUSA, Province de, ou Souse</td>\n",
+       "      <td>Géographie moderne</td>\n",
+       "      <td>Géog. mod.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38896</th>\n",
+       "      <td>9</td>\n",
+       "      <td>4159</td>\n",
+       "      <td>Maison</td>\n",
+       "      <td>Histoire moderne</td>\n",
+       "      <td>Hist. mod.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52378</th>\n",
+       "      <td>13</td>\n",
+       "      <td>522</td>\n",
+       "      <td>PORTO-FERRAIO</td>\n",
+       "      <td>Géographie moderne</td>\n",
+       "      <td>Geog. mod.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62792</th>\n",
+       "      <td>15</td>\n",
+       "      <td>1299</td>\n",
+       "      <td>SNOWDON-HILLS</td>\n",
+       "      <td>Géographie moderne</td>\n",
+       "      <td>Géog. mod.</td>\n",
+       "      <td>Jaucourt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>54108</th>\n",
+       "      <td>13</td>\n",
+       "      <td>2252</td>\n",
+       "      <td>PULO-WAY</td>\n",
+       "      <td>Géographie moderne</td>\n",
+       "      <td>Geog. mod.</td>\n",
+       "      <td>Jaucourt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62965</th>\n",
+       "      <td>15</td>\n",
+       "      <td>1472</td>\n",
+       "      <td>Solide</td>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>en Anatomie</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>61463</th>\n",
+       "      <td>14</td>\n",
+       "      <td>5167</td>\n",
+       "      <td>SÉMI-PÉLAGIANISME</td>\n",
+       "      <td>Histoire ecclésiastique</td>\n",
+       "      <td>Hist. eccles.</td>\n",
+       "      <td>Jaucourt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29174</th>\n",
+       "      <td>7</td>\n",
+       "      <td>1711</td>\n",
+       "      <td>GAS</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>Chim.</td>\n",
+       "      <td>Venel</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21200</th>\n",
+       "      <td>4</td>\n",
+       "      <td>5290</td>\n",
+       "      <td>Divin, emplâtre divin, emplastrum divinum</td>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>Pharmac.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       volume  numero                                       head  \\\n",
+       "45529      11    2501                              OPICIENS, les   \n",
+       "63464      15    1971               SOUSA, Province de, ou Souse   \n",
+       "38896       9    4159                                     Maison   \n",
+       "52378      13     522                              PORTO-FERRAIO   \n",
+       "62792      15    1299                              SNOWDON-HILLS   \n",
+       "54108      13    2252                                   PULO-WAY   \n",
+       "62965      15    1472                                     Solide   \n",
+       "61463      14    5167                          SÉMI-PÉLAGIANISME   \n",
+       "29174       7    1711                                        GAS   \n",
+       "21200       4    5290  Divin, emplâtre divin, emplastrum divinum   \n",
+       "\n",
+       "                     normClass      classEDdA    author  \n",
+       "45529      Géographie ancienne     Géog. anc.  unsigned  \n",
+       "63464       Géographie moderne     Géog. mod.  unsigned  \n",
+       "38896         Histoire moderne     Hist. mod.  unsigned  \n",
+       "52378       Géographie moderne     Geog. mod.  unsigned  \n",
+       "62792       Géographie moderne     Géog. mod.  Jaucourt  \n",
+       "54108       Géographie moderne     Geog. mod.  Jaucourt  \n",
+       "62965                 Anatomie    en Anatomie  unsigned  \n",
+       "61463  Histoire ecclésiastique  Hist. eccles.  Jaucourt  \n",
+       "29174                   Chimie          Chim.     Venel  \n",
+       "21200                Pharmacie       Pharmac.  unsigned  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# affichage aléatoire de 50 lignes du dataframe\n",
+    "df.sample(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "expired-click",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "74190"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# nombre d'articles dans le dataframe\n",
+    "len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "considered-adjustment",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>normClass</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <td>44</td>\n",
+       "      <td>44</td>\n",
+       "      <td>44</td>\n",
+       "      <td>44</td>\n",
+       "      <td>44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Abus des langues</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Accord de sons</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Acoustique</th>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  volume  numero  head  classEDdA  author\n",
+       "normClass                                                \n",
+       "                      44      44    44         44      44\n",
+       "0                     17      17    17         17      17\n",
+       "Abus des langues       1       1     1          1       1\n",
+       "Accord de sons         1       1     1          1       1\n",
+       "Acoustique             6       6     6          6       6"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# On regroupe les lignes du dataframe en fonction du normclass\n",
+    "classes = df.groupby(['normClass']).count()\n",
+    "classes.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "instructional-variation",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2908"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Nombre de classes \n",
+    "len(classes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "handmade-contest",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "12685"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# nombre d'articles 'unclassified'\n",
+    "len(df.loc[df['normClass']==\"unclassified\",:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "crude-olympus",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1614"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# nombre de classes avec un seul article\n",
+    "len(classes.loc[classes['volume']==1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "sized-barrier",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2656"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# nombre de classes avec moins de 20 articles\n",
+    "len(classes.loc[classes['volume']<20])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "indian-selection",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "weighted-hanging",
+   "metadata": {},
+   "source": [
+    "### Enregistrement"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "stainless-stewart",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du résultat du groupby\n",
+    "classes['volume'].to_csv('classesEDdA.tsv',sep='\\t',header=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "hearing-olive",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du dataframe (permet de ne pas reparser tous les fichiers TEI pour recharger ce dataframe)\n",
+    "df.to_csv('EDdA_dataframe_orginal.tsv',sep='\\t',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "stuck-courage",
+   "metadata": {},
+   "source": [
+    "### Lecture"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "id": "thick-destiny",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('EDdA_dataframe_orginal.tsv', sep='\\t')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "id": "typical-munich",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                                head     normClass  \\\n",
+       "0       1       1                          Title Page  unclassified   \n",
+       "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  unclassified   \n",
+       "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS  unclassified   \n",
+       "3       1       5                            A, a & a     Grammaire   \n",
+       "4       1       6                                   A  unclassified   \n",
+       "\n",
+       "                                           classEDdA                author  \n",
+       "0                                       unclassified              unsigned  \n",
+       "1                                       unclassified  Diderot & d'Alembert  \n",
+       "2                                       unclassified            d'Alembert  \n",
+       "3  ordre Encyclopéd. Entend. Science de l'homme, ...            Dumarsais5  \n",
+       "4                                       unclassified            Dumarsais5  "
+      ]
+     },
+     "execution_count": 144,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "baking-command",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "individual-protection",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "domaines_regroupes = {}\n",
+    "domaines_regroupes['Agriculture - Economie rustique'] = ['Agriculture', 'Economie rustique', 'Fontainier', 'Graines', 'Jardinage', 'Moulin', 'Sucre', 'Tabac', 'Vigne', 'Vin']\n",
+    "domaines_regroupes['Anatomie'] = ['Anatomie', 'Economie animale']\n",
+    "domaines_regroupes['Antiquité'] = ['Antiquité', 'Iconologie', 'Mythologie']\n",
+    "domaines_regroupes['Architecture'] = ['Architecture', 'Carreleur', 'Carrier', 'Coupe des pierres', 'Couvreur', 'Décoration', 'Maçonnerie']\n",
+    "domaines_regroupes['Arts et métiers'] = ['Arts et métiers', 'Arts mécaniques', 'Manufacture']\n",
+    "domaines_regroupes['Beaux-arts'] = ['Beaux-arts', 'Dessin', 'Gravue', 'Peinture', 'Sculpture']\n",
+    "domaines_regroupes['Belles-lettres - Poésie'] = ['Belles-lettres', 'Eloquence', 'Littérature', 'Poésie', 'Rhétorique']\n",
+    "domaines_regroupes['Blason'] = ['Blason']\n",
+    "domaines_regroupes['Caractères'] = ['Caractères', 'Ecriture']\n",
+    "domaines_regroupes['Chasse'] = ['Chasse', 'Fauconnerie', 'Oisellerie', 'Vénerie']\n",
+    "domaines_regroupes['Chimie'] = ['Alchimie', 'Chimie', 'Docimasie']\n",
+    "domaines_regroupes['Commerce'] = ['Commerce', 'Marchand', 'Voiturier']\n",
+    "domaines_regroupes['Droit - Jurisprudence'] = ['Chancellerie', 'Corporation', 'Douane', 'Droit', 'Eaux et Forêts', 'Finance', 'Jurisprudence', 'Palais']\n",
+    "domaines_regroupes['Economie domestique'] = ['Cuisine','Economie domestique']\n",
+    "#domaines_regroupes['Géographie'] = ['Géographie', 'Géographie Histoire naturelle', 'Géographie ancienne', 'Géographie des Arabes', 'Géographie du moyen âge',\n",
+    "#                                   'Géographie ecclésiastique', 'Géographie historique', 'Géographie maritime ancienne', 'Géographie des Romains', 'Géographie morderne',\n",
+    "#                                   'Géographie naturelle', 'Géographie physique', 'Géographie sacrée', 'Géographie sainte', 'Géographie transcendante', 'Géographie transcendantee']\n",
+    "domaines_regroupes['Géographie'] = ['Géographie', 'Topographie']\n",
+    "domaines_regroupes['Grammaire'] = ['Grammaire', 'Langues', 'Synonymes']\n",
+    "domaines_regroupes['Histoire'] = ['Calendrier','Chevalerie','Chronologie','Coutumes','Généalogie','Histoire','Inscriptions','Inventions', 'Voyage']\n",
+    "domaines_regroupes['Histoire naturelle'] = ['Botanique','Conchyliologie','Fossiles','Histoire naturelle', 'Ichtyologie','Insectologie','Ophiologie','Ornithologie','Zoologie']\n",
+    "domaines_regroupes['Jeu'] = ['Jeu']\n",
+    "domaines_regroupes['Maréchage - Manège'] = ['Maréchage', 'Manège']\n",
+    "domaines_regroupes['Marine'] = ['Galère','Marine', 'Navigation', 'Rivière']\n",
+    "domaines_regroupes['Mathématiques'] = ['Algèbre','Analyse des hasards', 'Arithmétique', 'Arpentage','Géométrie', 'Mathématiques', 'Trigonométrie']\n",
+    "domaines_regroupes['Médailles'] = ['Médailles','Numismatique']\n",
+    "domaines_regroupes['Médecine - Chirurgie'] = ['Chirurgie', 'Diète', 'Gymnastique', 'Maladie', 'Matière médicale', 'Médecine', 'Pathologie', 'Physiologie', 'Séméiotique', 'Thérapeutique']\n",
+    "domaines_regroupes['Mesure'] = ['Balancier', 'Jaugeage', 'Mesure', 'Poids']\n",
+    "domaines_regroupes[\"Métiers\"] = ['Boucherie', 'Boulangerie', 'Brasserie', 'Charcuterie', 'Confiserie', 'Distillation', 'Epicerie', 'Pâtisserie', 'Rôtisserie', 'Vinaigrier']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Bois', 'Boissellerie', 'Charpenterie', 'Charronnage', 'Coffretier', 'Ebénisterie', 'Formier', 'Layeterie', 'Menuiserie', 'Tonnelier', 'Vannerie']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Bourrelier', 'Boyaudier', 'Cardier', 'Chamoiseur', 'Corroierie', 'Cuir', 'Gainier', 'Hongroyeur', 'Maroquinier', 'Mégisserie', 'Parcheminerie', 'Peausserie', 'Pelleterie', 'Sellier', 'Tannerie']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Aiguilletier-Epinglier', 'Ardoiserie', 'Argent', \"Batteur d'or\", 'Bijouterie', 'Bimblotier', 'Chaînetier', 'Chaudronnerie', 'Ciselure', 'Cloche', 'Cloutier', 'Coutellerie', 'Cuivre', 'Diamantaire', 'Dorure', 'Eperonnier', 'Fer']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Ferblanterie', 'Fonderie', 'Forge', 'Fourbisseur', 'Glaces', 'Joaillier', 'Lapidaire', 'Lunetier', 'Marbrier', 'Maréchal-grossier', 'Métal', 'Metteur en oeuvre', 'Miroiterie', 'Or', 'Orfèvrerie']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Pierres', 'Plomberie', \"Potier d'étain\", 'Serrurerie', 'Taillanderie', \"Tireur d'or\", 'Verrerie', 'Vitrerie']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Cartier', 'Cartonnier', 'Imprimerie', 'Librairie', 'Marbreur de papier', 'Papeterie', 'Reliure']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Bas au métier', 'Blanchissage des toiles', 'Blondier', 'Bonneterie', 'Bottier', 'Bourserie', 'Boutonnier', 'Broderie', 'Cardeur', 'Ceinturier', 'Chapellerie', 'Cordonnerie','Coton', 'Couture', 'Découpeur', 'Dentelle', 'Draperie']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Etoffe', 'Fil', 'Friseur', 'Ganterie', 'Gazier', 'Laine', 'Lingerie', 'Mode', 'Ourdissage', 'Passementerie', 'Perruquier', 'Plumasserie', 'Rubanerie', 'Soierie', 'Tailleur', 'Tapisserie', 'Teinturerie', 'Tisserand', 'Toilerie', 'Tonderie de drap']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Amidonnier', 'Blanchisserie de cire', 'Chandelier', 'Cirerie', 'Corderie', 'Emailleur', 'Eventailliste', 'Faïencier', 'Filassier', 'Fleuriste', 'Horlogerie', 'Marqueterie', 'Métiers peu attestés', 'Parfumeur', 'Paumier', 'Poterie']\n",
+    "domaines_regroupes[\"Métiers\"] += ['Salpêtrerie', 'Savonnerie', 'Sel', 'Tabatière', 'Tabletier-Cornetier', 'Tourneur', 'Vergetier', 'Vernisseur']\n",
+    "domaines_regroupes['Militaire (Art) - Guerre - Arme'] = ['Armurerie', 'Artificier', 'Artillerie', 'Canon','Escrime','Fortification','Guerre','Milice','Militaire']\n",
+    "domaines_regroupes['Minéralogie'] = ['Lithologie','Métallurgie','Minéralogie']\n",
+    "domaines_regroupes['Monnaie'] = ['Monnaie']\n",
+    "domaines_regroupes['Musique'] = ['Danse', 'Lutherie','Musique','Orgue', 'Voix']\n",
+    "domaines_regroupes['Pêche'] = ['Pêche']\n",
+    "domaines_regroupes['Pharmacie'] = ['Drogues', 'Pharmacie']\n",
+    "domaines_regroupes['Philosophie'] = ['Education', 'Logique', 'Métaphysique', 'Morale', 'Philologie','Philosophie', 'Sciences']\n",
+    "domaines_regroupes['Physique - [Sciences physico-mathématiques]'] = ['Acoustique', 'Astrologie', 'Astronomie', 'Cosmographie-Cosmologie', 'Gnomonique', 'Hydraulique', 'Mécanique', 'Optique', 'Perspective', 'Physique', 'Science microscopique']\n",
+    "domaines_regroupes['Politique'] = ['Economie', 'Gouvernement', 'Police', 'Politique']\n",
+    "domaines_regroupes['Religion'] = ['Critique sacrée', 'Culte', 'Eglise', 'Histoire ecclésiastique', 'Idolâtrie', 'Religion', 'Théologie']\n",
+    "domaines_regroupes['Spectacle'] = ['Opéra','Spectacle', 'Théâtre']\n",
+    "domaines_regroupes['Superstition'] = ['Divination', 'Magie', 'Superstition']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "variable-instrument",
+   "metadata": {},
+   "source": [
+    "### Récupération correspondance EDdA / ENCCRE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "south-equation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_correspondances = pd.read_csv(\"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Classification domaines EDdA/correspondances_ARTFL-ENCCRE.csv\") \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "protecting-incentive",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>path</th>\n",
+       "      <th>entreeid</th>\n",
+       "      <th>tome</th>\n",
+       "      <th>article</th>\n",
+       "      <th>adresse</th>\n",
+       "      <th>entree</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T1/article5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T1/article6</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T1/article7</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T1/article8</td>\n",
+       "      <td>v1-1-3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T1/article9</td>\n",
+       "      <td>v1-1-4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          path entreeid  tome  article  adresse  entree\n",
+       "0  T1/article5   v1-1-0     1        5        1       0\n",
+       "1  T1/article6   v1-1-1     1        6        1       1\n",
+       "2  T1/article7   v1-1-2     1        7        1       2\n",
+       "3  T1/article8   v1-1-3     1        8        1       3\n",
+       "4  T1/article9   v1-1-4     1        9        1       4"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_correspondances.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "continuous-feedback",
+   "metadata": {},
+   "source": [
+    "### Test récupération données ENCCRE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "spread-feature",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import urllib, json\n",
+    "from urllib.request import urlopen\n",
+    "\n",
+    "json_url = urlopen(\"http://enccre.academie-sciences.fr/icefront/api/article/v1-544-0\")\n",
+    "data = json.loads(json_url.read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "facial-syndicate",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'géographie'"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data['annotations']['constit'][0]['domgen'][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "removed-nickel",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_key(val):\n",
+    "    for key, value in domaines_regroupes.items():\n",
+    "        for v in value:\n",
+    "            v = v.replace(\" \", \"\")\n",
+    "            if val == v.lower():\n",
+    "                return key\n",
+    " \n",
+    "    return None\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "nuclear-murder",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Histoire naturelle\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_key(\"histoirenaturelle\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "placed-homework",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "extraordinary-settlement",
+   "metadata": {},
+   "source": [
+    "### Ajout des colonnes domaines, texte, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "pursuant-camel",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def getDomaineEnccre(volume, numero, classEDDA):\n",
+    "    #print(volume, ' ', numero)\n",
+    "\n",
+    "    domaine = \"\"\n",
+    "    ensemble_domaine = \"\"\n",
+    "    entreeid = \"\"\n",
+    "    \n",
+    "    try :  \n",
+    "        #entreeid = df_correspondances.loc[(df_correspondances['tome']==volume) & (df_correspondances['article']==numero)]['entreeid'][0]\n",
+    "        d = df_correspondances.loc[(df_correspondances['tome']==volume) & (df_correspondances['article']==numero)].reset_index(drop=True)\n",
+    "        entreeid = d['entreeid'][0]\n",
+    "\n",
+    "        json_url = urlopen(\"http://enccre.academie-sciences.fr/icefront/api/article/\" + entreeid)\n",
+    "        data = json.loads(json_url.read())\n",
+    "        #print(data['annotations']['constit'][0]['domgen'][0])\n",
+    "        \n",
+    "        \n",
+    "        try :  \n",
+    "            \n",
+    "            # changer pour avoir tous les noms\n",
+    "            domaine = data['annotations']['constit'][0]['domgen'][0]\n",
+    "            ensemble_domaine = get_key(domaine)\n",
+    "\n",
+    "\n",
+    "\n",
+    "            \n",
+    "            '''\n",
+    "            for constit in data['annotations']['constit']:\n",
+    "                \n",
+    "                domaine = constit['domgen'][0]\n",
+    "                print(domaine)\n",
+    "\n",
+    "                for domgen in constit['domgen']:  \n",
+    "                    domaine_multi += domgen + \";\"\n",
+    "                    ensemble = get_key(domgen)\n",
+    "                    if ensemble:\n",
+    "                        ensemble_domaine_multi.append(ensemble)\n",
+    "                \n",
+    "            #print(domaine)\n",
+    "            '''\n",
+    "        except KeyError:\n",
+    "            pass\n",
+    "     \n",
+    "    except KeyError:\n",
+    "        pass\n",
+    "       \n",
+    "    try :\n",
+    "        if volume < 10:\n",
+    "            txt_file = \"/Users/lmoncla/Documents/Data/Corpus/EDDA/articles_all/all_txt/volume0\"+str(volume)+\"-\"+str(numero)+\".txt\"\n",
+    "        else :\n",
+    "            txt_file = \"/Users/lmoncla/Documents/Data/Corpus/EDDA/articles_all/all_txt/volume\"+str(volume)+\"-\"+str(numero)+\".txt\"\n",
+    "\n",
+    "        txtContent = open(txt_file, \"r\").read()\n",
+    "        \n",
+    "        classEDDA = str(classEDDA)\n",
+    "        \n",
+    "        #supprime le désignant du texte\n",
+    "        classEDDA_with_brcts = '('+ classEDDA +')'\n",
+    "        txtContentWithoutClass = txtContent.replace(classEDDA_with_brcts, \"\")\n",
+    "        txtContentWithoutClass = txtContent.replace(classEDDA, \"\")\n",
+    "        \n",
+    "        firstParagraph = txtContentWithoutClass.split('\\n \\n')[0]\n",
+    "        \n",
+    "    except FileNotFoundError:\n",
+    "        txtContent = \"\"\n",
+    "        txtContentWithoutClass = \"\"\n",
+    "        firstParagraph = \"\"\n",
+    "        \n",
+    "    #ensemble_domaine_multi = ';'.join(list(set(ensemble_domaine)))\n",
+    "    \n",
+    "    #print(entreeid, domaine, ensemble_domaine, txtContent, txtContentWithoutClass, firstParagraph)\n",
+    "    \n",
+    "    return pd.Series([entreeid, domaine, ensemble_domaine, txtContent, txtContentWithoutClass, firstParagraph])\n",
+    "        \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "id": "timely-inspection",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "id": "natural-spanking",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                                head     normClass  \\\n",
+       "0       1       1                          Title Page  unclassified   \n",
+       "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  unclassified   \n",
+       "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS  unclassified   \n",
+       "3       1       5                            A, a & a     Grammaire   \n",
+       "4       1       6                                   A  unclassified   \n",
+       "\n",
+       "                                           classEDdA                author  \\\n",
+       "0                                       unclassified              unsigned   \n",
+       "1                                       unclassified  Diderot & d'Alembert   \n",
+       "2                                       unclassified            d'Alembert   \n",
+       "3  ordre Encyclopéd. Entend. Science de l'homme, ...            Dumarsais5   \n",
+       "4                                       unclassified            Dumarsais5   \n",
+       "\n",
+       "   id_enccre  domaine_enccre  ensemble_domaine_enccre  content  \\\n",
+       "0          0               1                        2        3   \n",
+       "1          0               1                        2        3   \n",
+       "2          0               1                        2        3   \n",
+       "3          0               1                        2        3   \n",
+       "4          0               1                        2        3   \n",
+       "\n",
+       "   contentWithoutClass  firstParagraph  \n",
+       "0                    4               5  \n",
+       "1                    4               5  \n",
+       "2                    4               5  \n",
+       "3                    4               5  \n",
+       "4                    4               5  "
+      ]
+     },
+     "execution_count": 133,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "christian-advice",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>ENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES SCIE...</td>\n",
+       "      <td>ENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES SCIE...</td>\n",
+       "      <td>ENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES SCIE...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...</td>\n",
+       "      <td>A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...</td>\n",
+       "      <td>A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>DISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\nL'Encycl...</td>\n",
+       "      <td>DISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\nL'Encycl...</td>\n",
+       "      <td>DISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\nL'Encycl...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-3</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, étoit une lettre numérale parmi les Anciens...</td>\n",
+       "      <td>A, étoit une lettre numérale parmi les Anciens...</td>\n",
+       "      <td>A, étoit une lettre numérale parmi les Anciens...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>A, lettre symbolique</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-4</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, lettre symbolique, étoit un hiéroglyphe che...</td>\n",
+       "      <td>A, lettre symbolique, étoit un hiéroglyphe che...</td>\n",
+       "      <td>A, lettre symbolique, étoit un hiéroglyphe che...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monétaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>A, lettre de suffrage</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-7</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, lettre de suffrage ; les Romains se servoie...</td>\n",
+       "      <td>A, lettre de suffrage ; les Romains se servoie...</td>\n",
+       "      <td>A, lettre de suffrage ; les Romains se servoie...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>A, signe d'absolution</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet5</td>\n",
+       "      <td>v1-1-8</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, signe d'absolution, chez les Romains dans l...</td>\n",
+       "      <td>A, signe d'absolution, chez les Romains dans l...</td>\n",
+       "      <td>A, signe d'absolution, chez les Romains dans l...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>A cognitionibus</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-2-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* A cognitionibus. Scorpus fameux Agitateur du...</td>\n",
+       "      <td>* A cognitionibus. Scorpus fameux Agitateur du...</td>\n",
+       "      <td>* A cognitionibus. Scorpus fameux Agitateur du...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>A curâ amicorum</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-3-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* A curâ amicorum. On lit dans quelques inscri...</td>\n",
+       "      <td>* A curâ amicorum. On lit dans quelques inscri...</td>\n",
+       "      <td>* A curâ amicorum. On lit dans quelques inscri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>1</td>\n",
+       "      <td>16</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Ecrivains modernes</td>\n",
+       "      <td>dans les Ecrivains modernes</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-4-0</td>\n",
+       "      <td>caractères</td>\n",
+       "      <td>Caractères</td>\n",
+       "      <td>A, dans les Ecrivains modernes, veut dire auss...</td>\n",
+       "      <td>A, , veut dire aussi\\nl'an, comme A. D. anno D...</td>\n",
+       "      <td>A, , veut dire aussi\\nl'an, comme A. D. anno D...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "      <td>17</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Calendrier Julien</td>\n",
+       "      <td>dans le calendrier Julien</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-4-1</td>\n",
+       "      <td>calendrier</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, dans le calendrier Julien, est aussi la pre...</td>\n",
+       "      <td>A, , est aussi la premiere\\ndes sept lettres d...</td>\n",
+       "      <td>A, , est aussi la premiere\\ndes sept lettres d...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>1</td>\n",
+       "      <td>18</td>\n",
+       "      <td>A. D.</td>\n",
+       "      <td>pending</td>\n",
+       "      <td>épistolaire</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-4-2</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A. D. épistolaire ; ces deux caracteres dans l...</td>\n",
+       "      <td>A. D.  ; ces deux caracteres dans les\\nLettres...</td>\n",
+       "      <td>A. D.  ; ces deux caracteres dans les\\nLettres...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>1</td>\n",
+       "      <td>19</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-4-3</td>\n",
+       "      <td>logique</td>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>* A désigne une proposition générale affirmati...</td>\n",
+       "      <td>* A désigne une proposition générale affirmati...</td>\n",
+       "      <td>* A désigne une proposition générale affirmati...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>1</td>\n",
+       "      <td>20</td>\n",
+       "      <td>A, signe des passions</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-4-4</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* A, signe des passions ; selon certains Auteu...</td>\n",
+       "      <td>* A, signe des passions ; selon certains Auteu...</td>\n",
+       "      <td>* A, signe des passions ; selon certains Auteu...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>1</td>\n",
+       "      <td>21</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>v1-4-5</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, est aussi une abbréviation dont on se sert ...</td>\n",
+       "      <td>A, est aussi une abbréviation dont on se sert ...</td>\n",
+       "      <td>A, est aussi une abbréviation dont on se sert ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>1</td>\n",
+       "      <td>22</td>\n",
+       "      <td>A A A</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>Chimistes</td>\n",
+       "      <td>Malouin5</td>\n",
+       "      <td>v1-5-0</td>\n",
+       "      <td>chimie</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>A A A, chez les Chimistes, signifie une amalga...</td>\n",
+       "      <td>A A A, chez les , signifie une amalgame,\\nou l...</td>\n",
+       "      <td>A A A, chez les , signifie une amalgame,\\nou l...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>1</td>\n",
+       "      <td>23</td>\n",
+       "      <td>A, ā, ou ā ā</td>\n",
+       "      <td>Médecine</td>\n",
+       "      <td>Medecine</td>\n",
+       "      <td>Vandenesse</td>\n",
+       "      <td>v1-6-0</td>\n",
+       "      <td>médecine</td>\n",
+       "      <td>Médecine - Chirurgie</td>\n",
+       "      <td>A, ā, ou ā ā; on se sert de cette abbréviation...</td>\n",
+       "      <td>A, ā, ou ā ā; on se sert de cette abbréviation...</td>\n",
+       "      <td>A, ā, ou ā ā; on se sert de cette abbréviation...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>1</td>\n",
+       "      <td>24</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-7-0</td>\n",
+       "      <td>commerce</td>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>A. Les Marchands Négocians, Banquiers, &amp; Teneu...</td>\n",
+       "      <td>A. Les Marchands Négocians, Banquiers, &amp; Teneu...</td>\n",
+       "      <td>A. Les Marchands Négocians, Banquiers, &amp; Teneu...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>1</td>\n",
+       "      <td>25</td>\n",
+       "      <td>A</td>\n",
+       "      <td>pending</td>\n",
+       "      <td>caractere alphabétique</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-8-0</td>\n",
+       "      <td>ecriture</td>\n",
+       "      <td>Caractères</td>\n",
+       "      <td>* A, caractere alphabétique. Après avoir donné...</td>\n",
+       "      <td>* A, . Après avoir donné les\\ndifférentes sign...</td>\n",
+       "      <td>* A, . Après avoir donné les\\ndifférentes sign...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>1</td>\n",
+       "      <td>26</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-9-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* A, s. petite riviere de France, qui a sa sou...</td>\n",
+       "      <td>* A, s. petite riviere de France, qui a sa sou...</td>\n",
+       "      <td>* A, s. petite riviere de France, qui a sa sou...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>1</td>\n",
+       "      <td>27</td>\n",
+       "      <td>AA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-10-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* AA, s. f. riviere de France, qui prend sa so...</td>\n",
+       "      <td>* AA, s. f. riviere de France, qui prend sa so...</td>\n",
+       "      <td>* AA, s. f. riviere de France, qui prend sa so...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>1</td>\n",
+       "      <td>28</td>\n",
+       "      <td>AABAM</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Malouin</td>\n",
+       "      <td>v1-11-0</td>\n",
+       "      <td>alchimie</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>AABAM, s. m. Quelques Alchimistes se sont serv...</td>\n",
+       "      <td>AABAM, s. m. Quelques Alchimistes se sont serv...</td>\n",
+       "      <td>AABAM, s. m. Quelques Alchimistes se sont serv...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>1</td>\n",
+       "      <td>29</td>\n",
+       "      <td>AACH ou ACH</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-12-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* AACH ou ACH, s. f. petite ville d'Allemagne\\...</td>\n",
+       "      <td>* AACH ou ACH, s. f. petite ville d'Allemagne\\...</td>\n",
+       "      <td>* AACH ou ACH, s. f. petite ville d'Allemagne\\...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>1</td>\n",
+       "      <td>30</td>\n",
+       "      <td>AAHUS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-13-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* AAHUS, s. petite ville d'Allemagne dans le c...</td>\n",
+       "      <td>* AAHUS, s. petite ville d'Allemagne dans le c...</td>\n",
+       "      <td>* AAHUS, s. petite ville d'Allemagne dans le c...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>1</td>\n",
+       "      <td>31</td>\n",
+       "      <td>AAM</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-14-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* AAM, s. mesure des Liquides, en usage à Amst...</td>\n",
+       "      <td>* AAM, s. mesure des Liquides, en usage à Amst...</td>\n",
+       "      <td>* AAM, s. mesure des Liquides, en usage à Amst...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>1</td>\n",
+       "      <td>32</td>\n",
+       "      <td>AAR</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-15-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* AAR, s. grande riviere qui a sa source proch...</td>\n",
+       "      <td>* AAR, s. grande riviere qui a sa source proch...</td>\n",
+       "      <td>* AAR, s. grande riviere qui a sa source proch...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>1</td>\n",
+       "      <td>33</td>\n",
+       "      <td>Aar</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-15-1</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* Aar, s. riviere d'Allemagne qui a sa source ...</td>\n",
+       "      <td>* Aar, s. riviere d'Allemagne qui a sa source ...</td>\n",
+       "      <td>* Aar, s. riviere d'Allemagne qui a sa source ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>1</td>\n",
+       "      <td>34</td>\n",
+       "      <td>AA ou AAS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-16-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* AA ou AAS, s. ou Fontaine des Arquebusades. ...</td>\n",
+       "      <td>* AA ou AAS, s. ou Fontaine des Arquebusades. ...</td>\n",
+       "      <td>* AA ou AAS, s. ou Fontaine des Arquebusades. ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>1</td>\n",
+       "      <td>35</td>\n",
+       "      <td>AAS ou AASA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-17-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* AAS ou AASA, Fort de Norwege dans le Baillia...</td>\n",
+       "      <td>* AAS ou AASA, Fort de Norwege dans le Baillia...</td>\n",
+       "      <td>* AAS ou AASA, Fort de Norwege dans le Baillia...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>1</td>\n",
+       "      <td>36</td>\n",
+       "      <td>AB</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-18-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>AB, s. m. onzieme mois de l'année civile des H...</td>\n",
+       "      <td>AB, s. m. onzieme mois de l'année civile des H...</td>\n",
+       "      <td>AB, s. m. onzieme mois de l'année civile des H...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>1</td>\n",
+       "      <td>37</td>\n",
+       "      <td>AB</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-19-0</td>\n",
+       "      <td>calendrier</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>AB, s. m. en Langue Syriaque est le nom du der...</td>\n",
+       "      <td>AB, s. m. en Langue Syriaque est le nom du der...</td>\n",
+       "      <td>AB, s. m. en Langue Syriaque est le nom du der...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>1</td>\n",
+       "      <td>38</td>\n",
+       "      <td>AB</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-20-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>AB, s.m. en hébreu signifie pere ; d'où les Ch...</td>\n",
+       "      <td>AB, s.m. en hébreu signifie pere ; d'où les Ch...</td>\n",
+       "      <td>AB, s.m. en hébreu signifie pere ; d'où les Ch...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>1</td>\n",
+       "      <td>39</td>\n",
+       "      <td>ABA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-21-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>*ABA, s. ville de la Phocide, bâtie par les Ab...</td>\n",
+       "      <td>*ABA, s. ville de la Phocide, bâtie par les Ab...</td>\n",
+       "      <td>*ABA, s. ville de la Phocide, bâtie par les Ab...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>1</td>\n",
+       "      <td>40</td>\n",
+       "      <td>ABACA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-22-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABACA, s. Il ne paroît pas qu'on sache bien ...</td>\n",
+       "      <td>* ABACA, s. Il ne paroît pas qu'on sache bien ...</td>\n",
+       "      <td>* ABACA, s. Il ne paroît pas qu'on sache bien ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>1</td>\n",
+       "      <td>41</td>\n",
+       "      <td>ABACH</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-23-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* ABACH, s. petite ville d'Allemagne dans la b...</td>\n",
+       "      <td>* ABACH, s. petite ville d'Allemagne dans la b...</td>\n",
+       "      <td>* ABACH, s. petite ville d'Allemagne dans la b...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>1</td>\n",
+       "      <td>42</td>\n",
+       "      <td>ABACO</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td>v1-24-0</td>\n",
+       "      <td>arithmétique</td>\n",
+       "      <td>Mathématiques</td>\n",
+       "      <td>ABACO, s. m. Quelques anciens Auteurs se serve...</td>\n",
+       "      <td>ABACO, s. m. Quelques anciens Auteurs se serve...</td>\n",
+       "      <td>ABACO, s. m. Quelques anciens Auteurs se serve...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>1</td>\n",
+       "      <td>43</td>\n",
+       "      <td>ABACOA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-25-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>* ABACOA, s. Isle de l'Amérique septentrionale...</td>\n",
+       "      <td>* ABACOA, s. Isle de l'Amérique septentrionale...</td>\n",
+       "      <td>* ABACOA, s. Isle de l'Amérique septentrionale...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>1</td>\n",
+       "      <td>44</td>\n",
+       "      <td>ABACOT</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-26-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABACOT, s. m. nom de l'ancienne parure dè\\nt...</td>\n",
+       "      <td>* ABACOT, s. m. nom de l'ancienne parure dè\\nt...</td>\n",
+       "      <td>* ABACOT, s. m. nom de l'ancienne parure dè\\nt...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>1</td>\n",
+       "      <td>45</td>\n",
+       "      <td>ABADA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-27-0</td>\n",
+       "      <td>histoirenaturelle</td>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>* ABADA, s. m. c'est, dit-on, un animal qui\\ns...</td>\n",
+       "      <td>* ABADA, s. m. c'est, dit-on, un animal qui\\ns...</td>\n",
+       "      <td>* ABADA, s. m. c'est, dit-on, un animal qui\\ns...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>1</td>\n",
+       "      <td>46</td>\n",
+       "      <td>ABADDON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-28-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABADDON, s. m. vient d'abad, perdre. C'est\\n...</td>\n",
+       "      <td>* ABADDON, s. m. vient d'abad, perdre. C'est\\n...</td>\n",
+       "      <td>* ABADDON, s. m. vient d'abad, perdre. C'est\\n...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45</th>\n",
+       "      <td>1</td>\n",
+       "      <td>47</td>\n",
+       "      <td>ABADIR ou ABADDIR</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-29-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>ABADIR ou ABADDIR, s. m. mot composé \\nde deux...</td>\n",
+       "      <td>ABADIR ou ABADDIR, s. m. mot composé \\nde deux...</td>\n",
+       "      <td>ABADIR ou ABADDIR, s. m. mot composé \\nde deux...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>46</th>\n",
+       "      <td>1</td>\n",
+       "      <td>48</td>\n",
+       "      <td>ABACUZ</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; Toussaint</td>\n",
+       "      <td>v1-30-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABACUZ, s. m. pris adject. ce sont les biens...</td>\n",
+       "      <td>* ABACUZ, s. m. pris adject. ce sont les biens...</td>\n",
+       "      <td>* ABACUZ, s. m. pris adject. ce sont les biens...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>47</th>\n",
+       "      <td>1</td>\n",
+       "      <td>49</td>\n",
+       "      <td>ABAJOUR</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Blondel</td>\n",
+       "      <td>v1-31-0</td>\n",
+       "      <td>architecture</td>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>ABAJOUR, s. m. nom que les Architectes donnent...</td>\n",
+       "      <td>ABAJOUR, s. m. nom que les Architectes donnent...</td>\n",
+       "      <td>ABAJOUR, s. m. nom que les Architectes donnent...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>48</th>\n",
+       "      <td>1</td>\n",
+       "      <td>50</td>\n",
+       "      <td>ABAISIR</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Malouin</td>\n",
+       "      <td>v1-32-0</td>\n",
+       "      <td>alchimie</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>ABAISIR, s.m. Quelques Alchimistes se sont ser...</td>\n",
+       "      <td>ABAISIR, s.m. Quelques Alchimistes se sont ser...</td>\n",
+       "      <td>ABAISIR, s.m. Quelques Alchimistes se sont ser...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>49</th>\n",
+       "      <td>1</td>\n",
+       "      <td>51</td>\n",
+       "      <td>ABAISSE</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-33-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABAISSE, s. f. c'est le nom que les Pâtissie...</td>\n",
+       "      <td>* ABAISSE, s. f. c'est le nom que les Pâtissie...</td>\n",
+       "      <td>* ABAISSE, s. f. c'est le nom que les Pâtissie...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    volume  numero                                head           normClass  \\\n",
+       "0        1       1                          Title Page        unclassified   \n",
+       "1        1       2   A MONSEIGNEUR LE COMTE D'ARGENSON        unclassified   \n",
+       "2        1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS        unclassified   \n",
+       "3        1       5                            A, a & a           Grammaire   \n",
+       "4        1       6                                   A        unclassified   \n",
+       "5        1       7                                   A        unclassified   \n",
+       "6        1       8                                   A        unclassified   \n",
+       "7        1       9                A, lettre symbolique        unclassified   \n",
+       "8        1      10        A, numismatique ou monétaire        unclassified   \n",
+       "9        1      11                        A, lapidaire        unclassified   \n",
+       "10       1      12               A, lettre de suffrage        unclassified   \n",
+       "11       1      13               A, signe d'absolution        unclassified   \n",
+       "12       1      14                     A cognitionibus        unclassified   \n",
+       "13       1      15                     A curâ amicorum        unclassified   \n",
+       "14       1      16                                   A  Ecrivains modernes   \n",
+       "15       1      17                                   A   Calendrier Julien   \n",
+       "16       1      18                               A. D.             pending   \n",
+       "17       1      19                                   A        unclassified   \n",
+       "18       1      20               A, signe des passions        unclassified   \n",
+       "19       1      21                                   A        unclassified   \n",
+       "20       1      22                               A A A              Chimie   \n",
+       "21       1      23                        A, ā, ou ā ā            Médecine   \n",
+       "22       1      24                                   A        unclassified   \n",
+       "23       1      25                                   A             pending   \n",
+       "24       1      26                                   A        unclassified   \n",
+       "25       1      27                                  AA        unclassified   \n",
+       "26       1      28                               AABAM        unclassified   \n",
+       "27       1      29                         AACH ou ACH        unclassified   \n",
+       "28       1      30                               AAHUS        unclassified   \n",
+       "29       1      31                                 AAM        unclassified   \n",
+       "30       1      32                                 AAR        unclassified   \n",
+       "31       1      33                                 Aar        unclassified   \n",
+       "32       1      34                           AA ou AAS        unclassified   \n",
+       "33       1      35                         AAS ou AASA        unclassified   \n",
+       "34       1      36                                  AB        unclassified   \n",
+       "35       1      37                                  AB        unclassified   \n",
+       "36       1      38                                  AB        unclassified   \n",
+       "37       1      39                                 ABA        unclassified   \n",
+       "38       1      40                               ABACA        unclassified   \n",
+       "39       1      41                               ABACH        unclassified   \n",
+       "40       1      42                               ABACO        unclassified   \n",
+       "41       1      43                              ABACOA        unclassified   \n",
+       "42       1      44                              ABACOT        unclassified   \n",
+       "43       1      45                               ABADA        unclassified   \n",
+       "44       1      46                             ABADDON        unclassified   \n",
+       "45       1      47                   ABADIR ou ABADDIR        unclassified   \n",
+       "46       1      48                              ABACUZ        unclassified   \n",
+       "47       1      49                             ABAJOUR        unclassified   \n",
+       "48       1      50                             ABAISIR        unclassified   \n",
+       "49       1      51                             ABAISSE        unclassified   \n",
+       "\n",
+       "                                            classEDdA                author  \\\n",
+       "0                                        unclassified              unsigned   \n",
+       "1                                        unclassified  Diderot & d'Alembert   \n",
+       "2                                        unclassified            d'Alembert   \n",
+       "3   ordre Encyclopéd. Entend. Science de l'homme, ...            Dumarsais5   \n",
+       "4                                        unclassified            Dumarsais5   \n",
+       "5                                        unclassified             Dumarsais   \n",
+       "6                                        unclassified                Mallet   \n",
+       "7                                        unclassified                Mallet   \n",
+       "8                                        unclassified                Mallet   \n",
+       "9                                        unclassified                Mallet   \n",
+       "10                                       unclassified                Mallet   \n",
+       "11                                       unclassified               Mallet5   \n",
+       "12                                       unclassified               Diderot   \n",
+       "13                                       unclassified               Diderot   \n",
+       "14                        dans les Ecrivains modernes                Mallet   \n",
+       "15                          dans le calendrier Julien                Mallet   \n",
+       "16                                        épistolaire                Mallet   \n",
+       "17                                       unclassified               Diderot   \n",
+       "18                                       unclassified               Diderot   \n",
+       "19                                       unclassified              unsigned   \n",
+       "20                                          Chimistes              Malouin5   \n",
+       "21                                           Medecine            Vandenesse   \n",
+       "22                                       unclassified                Mallet   \n",
+       "23                             caractere alphabétique               Diderot   \n",
+       "24                                       unclassified               Diderot   \n",
+       "25                                       unclassified               Diderot   \n",
+       "26                                       unclassified               Malouin   \n",
+       "27                                       unclassified               Diderot   \n",
+       "28                                       unclassified               Diderot   \n",
+       "29                                       unclassified               Diderot   \n",
+       "30                                       unclassified               Diderot   \n",
+       "31                                       unclassified               Diderot   \n",
+       "32                                       unclassified               Diderot   \n",
+       "33                                       unclassified               Diderot   \n",
+       "34                                       unclassified                Mallet   \n",
+       "35                                       unclassified                Mallet   \n",
+       "36                                       unclassified                Mallet   \n",
+       "37                                       unclassified               Diderot   \n",
+       "38                                       unclassified               Diderot   \n",
+       "39                                       unclassified               Diderot   \n",
+       "40                                       unclassified            d'Alembert   \n",
+       "41                                       unclassified               Diderot   \n",
+       "42                                       unclassified               Diderot   \n",
+       "43                                       unclassified               Diderot   \n",
+       "44                                       unclassified               Diderot   \n",
+       "45                                       unclassified                Mallet   \n",
+       "46                                       unclassified   Diderot & Toussaint   \n",
+       "47                                       unclassified               Blondel   \n",
+       "48                                       unclassified               Malouin   \n",
+       "49                                       unclassified               Diderot   \n",
+       "\n",
+       "   id_enccre     domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0                                                         \n",
+       "1                                                         \n",
+       "2                                                         \n",
+       "3     v1-1-0          grammaire               Grammaire   \n",
+       "4     v1-1-1          grammaire               Grammaire   \n",
+       "5     v1-1-2          grammaire               Grammaire   \n",
+       "6     v1-1-3                                              \n",
+       "7     v1-1-4                                              \n",
+       "8     v1-1-5       numismatique               Médailles   \n",
+       "9     v1-1-6       inscriptions                Histoire   \n",
+       "10    v1-1-7                                              \n",
+       "11    v1-1-8                                              \n",
+       "12    v1-2-0                                              \n",
+       "13    v1-3-0                                              \n",
+       "14    v1-4-0         caractères              Caractères   \n",
+       "15    v1-4-1         calendrier                Histoire   \n",
+       "16    v1-4-2                                              \n",
+       "17    v1-4-3            logique             Philosophie   \n",
+       "18    v1-4-4                                              \n",
+       "19    v1-4-5                                              \n",
+       "20    v1-5-0             chimie                  Chimie   \n",
+       "21    v1-6-0           médecine    Médecine - Chirurgie   \n",
+       "22    v1-7-0           commerce                Commerce   \n",
+       "23    v1-8-0           ecriture              Caractères   \n",
+       "24    v1-9-0         géographie              Géographie   \n",
+       "25   v1-10-0         géographie              Géographie   \n",
+       "26   v1-11-0           alchimie                  Chimie   \n",
+       "27   v1-12-0         géographie              Géographie   \n",
+       "28   v1-13-0         géographie              Géographie   \n",
+       "29   v1-14-0                                              \n",
+       "30   v1-15-0         géographie              Géographie   \n",
+       "31   v1-15-1         géographie              Géographie   \n",
+       "32   v1-16-0         géographie              Géographie   \n",
+       "33   v1-17-0         géographie              Géographie   \n",
+       "34   v1-18-0                                              \n",
+       "35   v1-19-0         calendrier                Histoire   \n",
+       "36   v1-20-0                                              \n",
+       "37   v1-21-0         géographie              Géographie   \n",
+       "38   v1-22-0                                              \n",
+       "39   v1-23-0         géographie              Géographie   \n",
+       "40   v1-24-0       arithmétique           Mathématiques   \n",
+       "41   v1-25-0         géographie              Géographie   \n",
+       "42   v1-26-0                                              \n",
+       "43   v1-27-0  histoirenaturelle      Histoire naturelle   \n",
+       "44   v1-28-0                                              \n",
+       "45   v1-29-0                                              \n",
+       "46   v1-30-0                                              \n",
+       "47   v1-31-0       architecture            Architecture   \n",
+       "48   v1-32-0           alchimie                  Chimie   \n",
+       "49   v1-33-0                                              \n",
+       "\n",
+       "                                              content  \\\n",
+       "0   ENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES SCIE...   \n",
+       "1   A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...   \n",
+       "2   DISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\nL'Encycl...   \n",
+       "3   A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...   \n",
+       "4   A, mot, est 1. la troisieme personne du présen...   \n",
+       "5   A, préposition vient du latin à, à dextris, à ...   \n",
+       "6   A, étoit une lettre numérale parmi les Anciens...   \n",
+       "7   A, lettre symbolique, étoit un hiéroglyphe che...   \n",
+       "8   A, numismatique ou monétaire, sur le revers de...   \n",
+       "9   A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "10  A, lettre de suffrage ; les Romains se servoie...   \n",
+       "11  A, signe d'absolution, chez les Romains dans l...   \n",
+       "12  * A cognitionibus. Scorpus fameux Agitateur du...   \n",
+       "13  * A curâ amicorum. On lit dans quelques inscri...   \n",
+       "14  A, dans les Ecrivains modernes, veut dire auss...   \n",
+       "15  A, dans le calendrier Julien, est aussi la pre...   \n",
+       "16  A. D. épistolaire ; ces deux caracteres dans l...   \n",
+       "17  * A désigne une proposition générale affirmati...   \n",
+       "18  * A, signe des passions ; selon certains Auteu...   \n",
+       "19  A, est aussi une abbréviation dont on se sert ...   \n",
+       "20  A A A, chez les Chimistes, signifie une amalga...   \n",
+       "21  A, ā, ou ā ā; on se sert de cette abbréviation...   \n",
+       "22  A. Les Marchands Négocians, Banquiers, & Teneu...   \n",
+       "23  * A, caractere alphabétique. Après avoir donné...   \n",
+       "24  * A, s. petite riviere de France, qui a sa sou...   \n",
+       "25  * AA, s. f. riviere de France, qui prend sa so...   \n",
+       "26  AABAM, s. m. Quelques Alchimistes se sont serv...   \n",
+       "27  * AACH ou ACH, s. f. petite ville d'Allemagne\\...   \n",
+       "28  * AAHUS, s. petite ville d'Allemagne dans le c...   \n",
+       "29  * AAM, s. mesure des Liquides, en usage à Amst...   \n",
+       "30  * AAR, s. grande riviere qui a sa source proch...   \n",
+       "31  * Aar, s. riviere d'Allemagne qui a sa source ...   \n",
+       "32  * AA ou AAS, s. ou Fontaine des Arquebusades. ...   \n",
+       "33  * AAS ou AASA, Fort de Norwege dans le Baillia...   \n",
+       "34  AB, s. m. onzieme mois de l'année civile des H...   \n",
+       "35  AB, s. m. en Langue Syriaque est le nom du der...   \n",
+       "36  AB, s.m. en hébreu signifie pere ; d'où les Ch...   \n",
+       "37  *ABA, s. ville de la Phocide, bâtie par les Ab...   \n",
+       "38  * ABACA, s. Il ne paroît pas qu'on sache bien ...   \n",
+       "39  * ABACH, s. petite ville d'Allemagne dans la b...   \n",
+       "40  ABACO, s. m. Quelques anciens Auteurs se serve...   \n",
+       "41  * ABACOA, s. Isle de l'Amérique septentrionale...   \n",
+       "42  * ABACOT, s. m. nom de l'ancienne parure dè\\nt...   \n",
+       "43  * ABADA, s. m. c'est, dit-on, un animal qui\\ns...   \n",
+       "44  * ABADDON, s. m. vient d'abad, perdre. C'est\\n...   \n",
+       "45  ABADIR ou ABADDIR, s. m. mot composé \\nde deux...   \n",
+       "46  * ABACUZ, s. m. pris adject. ce sont les biens...   \n",
+       "47  ABAJOUR, s. m. nom que les Architectes donnent...   \n",
+       "48  ABAISIR, s.m. Quelques Alchimistes se sont ser...   \n",
+       "49  * ABAISSE, s. f. c'est le nom que les Pâtissie...   \n",
+       "\n",
+       "                                  contentWithoutClass  \\\n",
+       "0   ENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES SCIE...   \n",
+       "1   A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...   \n",
+       "2   DISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\nL'Encycl...   \n",
+       "3   A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...   \n",
+       "4   A, mot, est 1. la troisieme personne du présen...   \n",
+       "5   A, préposition vient du latin à, à dextris, à ...   \n",
+       "6   A, étoit une lettre numérale parmi les Anciens...   \n",
+       "7   A, lettre symbolique, étoit un hiéroglyphe che...   \n",
+       "8   A, numismatique ou monétaire, sur le revers de...   \n",
+       "9   A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "10  A, lettre de suffrage ; les Romains se servoie...   \n",
+       "11  A, signe d'absolution, chez les Romains dans l...   \n",
+       "12  * A cognitionibus. Scorpus fameux Agitateur du...   \n",
+       "13  * A curâ amicorum. On lit dans quelques inscri...   \n",
+       "14  A, , veut dire aussi\\nl'an, comme A. D. anno D...   \n",
+       "15  A, , est aussi la premiere\\ndes sept lettres d...   \n",
+       "16  A. D.  ; ces deux caracteres dans les\\nLettres...   \n",
+       "17  * A désigne une proposition générale affirmati...   \n",
+       "18  * A, signe des passions ; selon certains Auteu...   \n",
+       "19  A, est aussi une abbréviation dont on se sert ...   \n",
+       "20  A A A, chez les , signifie une amalgame,\\nou l...   \n",
+       "21  A, ā, ou ā ā; on se sert de cette abbréviation...   \n",
+       "22  A. Les Marchands Négocians, Banquiers, & Teneu...   \n",
+       "23  * A, . Après avoir donné les\\ndifférentes sign...   \n",
+       "24  * A, s. petite riviere de France, qui a sa sou...   \n",
+       "25  * AA, s. f. riviere de France, qui prend sa so...   \n",
+       "26  AABAM, s. m. Quelques Alchimistes se sont serv...   \n",
+       "27  * AACH ou ACH, s. f. petite ville d'Allemagne\\...   \n",
+       "28  * AAHUS, s. petite ville d'Allemagne dans le c...   \n",
+       "29  * AAM, s. mesure des Liquides, en usage à Amst...   \n",
+       "30  * AAR, s. grande riviere qui a sa source proch...   \n",
+       "31  * Aar, s. riviere d'Allemagne qui a sa source ...   \n",
+       "32  * AA ou AAS, s. ou Fontaine des Arquebusades. ...   \n",
+       "33  * AAS ou AASA, Fort de Norwege dans le Baillia...   \n",
+       "34  AB, s. m. onzieme mois de l'année civile des H...   \n",
+       "35  AB, s. m. en Langue Syriaque est le nom du der...   \n",
+       "36  AB, s.m. en hébreu signifie pere ; d'où les Ch...   \n",
+       "37  *ABA, s. ville de la Phocide, bâtie par les Ab...   \n",
+       "38  * ABACA, s. Il ne paroît pas qu'on sache bien ...   \n",
+       "39  * ABACH, s. petite ville d'Allemagne dans la b...   \n",
+       "40  ABACO, s. m. Quelques anciens Auteurs se serve...   \n",
+       "41  * ABACOA, s. Isle de l'Amérique septentrionale...   \n",
+       "42  * ABACOT, s. m. nom de l'ancienne parure dè\\nt...   \n",
+       "43  * ABADA, s. m. c'est, dit-on, un animal qui\\ns...   \n",
+       "44  * ABADDON, s. m. vient d'abad, perdre. C'est\\n...   \n",
+       "45  ABADIR ou ABADDIR, s. m. mot composé \\nde deux...   \n",
+       "46  * ABACUZ, s. m. pris adject. ce sont les biens...   \n",
+       "47  ABAJOUR, s. m. nom que les Architectes donnent...   \n",
+       "48  ABAISIR, s.m. Quelques Alchimistes se sont ser...   \n",
+       "49  * ABAISSE, s. f. c'est le nom que les Pâtissie...   \n",
+       "\n",
+       "                                       firstParagraph  \n",
+       "0   ENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES SCIE...  \n",
+       "1   A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...  \n",
+       "2   DISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\nL'Encycl...  \n",
+       "3   A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...  \n",
+       "4   A, mot, est 1. la troisieme personne du présen...  \n",
+       "5   A, préposition vient du latin à, à dextris, à ...  \n",
+       "6   A, étoit une lettre numérale parmi les Anciens...  \n",
+       "7   A, lettre symbolique, étoit un hiéroglyphe che...  \n",
+       "8   A, numismatique ou monétaire, sur le revers de...  \n",
+       "9   A, lapidaire, dans les anciennes inscriptions ...  \n",
+       "10  A, lettre de suffrage ; les Romains se servoie...  \n",
+       "11  A, signe d'absolution, chez les Romains dans l...  \n",
+       "12  * A cognitionibus. Scorpus fameux Agitateur du...  \n",
+       "13  * A curâ amicorum. On lit dans quelques inscri...  \n",
+       "14  A, , veut dire aussi\\nl'an, comme A. D. anno D...  \n",
+       "15  A, , est aussi la premiere\\ndes sept lettres d...  \n",
+       "16  A. D.  ; ces deux caracteres dans les\\nLettres...  \n",
+       "17  * A désigne une proposition générale affirmati...  \n",
+       "18  * A, signe des passions ; selon certains Auteu...  \n",
+       "19  A, est aussi une abbréviation dont on se sert ...  \n",
+       "20  A A A, chez les , signifie une amalgame,\\nou l...  \n",
+       "21  A, ā, ou ā ā; on se sert de cette abbréviation...  \n",
+       "22  A. Les Marchands Négocians, Banquiers, & Teneu...  \n",
+       "23  * A, . Après avoir donné les\\ndifférentes sign...  \n",
+       "24  * A, s. petite riviere de France, qui a sa sou...  \n",
+       "25  * AA, s. f. riviere de France, qui prend sa so...  \n",
+       "26  AABAM, s. m. Quelques Alchimistes se sont serv...  \n",
+       "27  * AACH ou ACH, s. f. petite ville d'Allemagne\\...  \n",
+       "28  * AAHUS, s. petite ville d'Allemagne dans le c...  \n",
+       "29  * AAM, s. mesure des Liquides, en usage à Amst...  \n",
+       "30  * AAR, s. grande riviere qui a sa source proch...  \n",
+       "31  * Aar, s. riviere d'Allemagne qui a sa source ...  \n",
+       "32  * AA ou AAS, s. ou Fontaine des Arquebusades. ...  \n",
+       "33  * AAS ou AASA, Fort de Norwege dans le Baillia...  \n",
+       "34  AB, s. m. onzieme mois de l'année civile des H...  \n",
+       "35  AB, s. m. en Langue Syriaque est le nom du der...  \n",
+       "36  AB, s.m. en hébreu signifie pere ; d'où les Ch...  \n",
+       "37  *ABA, s. ville de la Phocide, bâtie par les Ab...  \n",
+       "38  * ABACA, s. Il ne paroît pas qu'on sache bien ...  \n",
+       "39  * ABACH, s. petite ville d'Allemagne dans la b...  \n",
+       "40  ABACO, s. m. Quelques anciens Auteurs se serve...  \n",
+       "41  * ABACOA, s. Isle de l'Amérique septentrionale...  \n",
+       "42  * ABACOT, s. m. nom de l'ancienne parure dè\\nt...  \n",
+       "43  * ABADA, s. m. c'est, dit-on, un animal qui\\ns...  \n",
+       "44  * ABADDON, s. m. vient d'abad, perdre. C'est\\n...  \n",
+       "45  ABADIR ou ABADDIR, s. m. mot composé \\nde deux...  \n",
+       "46  * ABACUZ, s. m. pris adject. ce sont les biens...  \n",
+       "47  ABAJOUR, s. m. nom que les Architectes donnent...  \n",
+       "48  ABAISIR, s.m. Quelques Alchimistes se sont ser...  \n",
+       "49  * ABAISSE, s. f. c'est le nom que les Pâtissie...  "
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "df['id_enccre'], df['domaine_enccre'],  df['ensemble_domaine_enccre'], df['content'], df['contentWithoutClass'], df['firstParagraph'] = df.apply(lambda row: getDomaineEnccre(row.volume, row.numero, row.classEDdA), axis=1).T.values\n",
+    "\n",
+    "#df['id_enccre'], df['domaine_enccre'],  df['ensemble_domaine_enccre'], df['content'], df['contentWithoutClass'], df['firstParagraph'] = getDomaineEnccre(df.volume, df.numero, df.classEDdA)\n",
+    "df.head(50)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "daily-office",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# nombre d'articles non classés par ENCCRE (à partir de la correspondance automatique)\n",
+    "len(df.loc[(df['domaine_enccre']==\"\")])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "suited-methodology",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# nombre d'article non classés par ARTFL\n",
+    "len(df.loc[(df['normClass']==\"unclassified\")])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "special-investigation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# nombre de classe ENCCRE\n",
+    "\n",
+    "classes_enccre = df.groupby(['domaine_enccre']).count()\n",
+    "classes_enccre.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "id": "legendary-independence",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "312"
+      ]
+     },
+     "execution_count": 94,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(classes_enccre)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "id": "theoretical-marathon",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "lonely-efficiency",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>\\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...</td>\n",
+       "      <td>\\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...</td>\n",
+       "      <td>\\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>\\nDIDEROT &amp; D'ALEMBERT.\\n</td>\n",
+       "      <td>\\nDIDEROT &amp; D'ALEMBERT.\\n</td>\n",
+       "      <td>\\nDIDEROT &amp; D'ALEMBERT.\\n</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>\\nVoilà dans son ordre naturel, &amp; sans démembr...</td>\n",
+       "      <td>\\nVoilà dans son ordre naturel, &amp; sans démembr...</td>\n",
+       "      <td>\\nVoilà dans son ordre naturel, &amp; sans démembr...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire;</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire;</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                                head     normClass  \\\n",
+       "0       1       1                          Title Page  unclassified   \n",
+       "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  unclassified   \n",
+       "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS  unclassified   \n",
+       "3       1       5                            A, a & a     Grammaire   \n",
+       "4       1       6                                   A  unclassified   \n",
+       "\n",
+       "                                           classEDdA                author  \\\n",
+       "0                                       unclassified              unsigned   \n",
+       "1                                       unclassified  Diderot & d'Alembert   \n",
+       "2                                       unclassified            d'Alembert   \n",
+       "3  ordre Encyclopéd. Entend. Science de l'homme, ...            Dumarsais5   \n",
+       "4                                       unclassified            Dumarsais5   \n",
+       "\n",
+       "  id_enccre domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0                                                    \n",
+       "1                                                    \n",
+       "2                                                    \n",
+       "3    v1-1-0     grammaire;               Grammaire   \n",
+       "4    v1-1-1     grammaire;               Grammaire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  \\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...   \n",
+       "1                          \\nDIDEROT & D'ALEMBERT.\\n   \n",
+       "2  \\nVoilà dans son ordre naturel, & sans démembr...   \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "4  \\n2. A, comme mot, est aussi une préposition, ...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  \\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...   \n",
+       "1                          \\nDIDEROT & D'ALEMBERT.\\n   \n",
+       "2  \\nVoilà dans son ordre naturel, & sans démembr...   \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "4  \\n2. A, comme mot, est aussi une préposition, ...   \n",
+       "\n",
+       "                                      firstParagraph  \n",
+       "0  \\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...  \n",
+       "1                          \\nDIDEROT & D'ALEMBERT.\\n  \n",
+       "2  \\nVoilà dans son ordre naturel, & sans démembr...  \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...  \n",
+       "4  \\n2. A, comme mot, est aussi une préposition, ...  "
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "skilled-channel",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "least-practice",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "circular-service",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "possible-sleeping",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "streaming-savings",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 146,
+   "id": "fourth-involvement",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du dataframe dans un fichier tsv\n",
+    "df.to_csv('EDdA_dataframe_withContent.tsv',sep='\\t',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 152,
+   "id": "framed-sodium",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('EDdA_dataframe_withContent.tsv', sep='\\t')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "comparable-envelope",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "tutorial-savannah",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "74190"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "minus-waterproof",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.dropna(subset = ['content', 'contentWithoutClass', 'firstParagraph', 'ensemble_domaine_enccre', 'domaine_enccre', 'normClass'], inplace= True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 155,
+   "id": "scenic-sugar",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "61673"
+      ]
+     },
+     "execution_count": 155,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 156,
+   "id": "unavailable-indiana",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monétaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+       "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+       "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "3       1       5                      A, a & a     Grammaire   \n",
+       "4       1       6                             A  unclassified   \n",
+       "5       1       7                             A  unclassified   \n",
+       "8       1      10  A, numismatique ou monétaire  unclassified   \n",
+       "9       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "3  ordre Encyclopéd. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "4                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "5                                       unclassified   Dumarsais    v1-1-2   \n",
+       "8                                       unclassified      Mallet    v1-1-5   \n",
+       "9                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "3      grammaire               Grammaire   \n",
+       "4      grammaire               Grammaire   \n",
+       "5      grammaire               Grammaire   \n",
+       "8   numismatique               Médailles   \n",
+       "9   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "4  \\n2. A, comme mot, est aussi une préposition, ...   \n",
+       "5  \\nEn terme de Grammaire, & sur-tout de Grammai...   \n",
+       "8  \\nA, numismatique ou monétaire, sur le revers ...   \n",
+       "9  \\nA, lapidaire, dans les anciennes inscription...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "4  \\n2. A, comme mot, est aussi une préposition, ...   \n",
+       "5  \\nEn terme de Grammaire, & sur-tout de Grammai...   \n",
+       "8  \\nA, numismatique ou monétaire, sur le revers ...   \n",
+       "9  \\nA, lapidaire, dans les anciennes inscription...   \n",
+       "\n",
+       "                                      firstParagraph  \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...  \n",
+       "4  \\n2. A, comme mot, est aussi une préposition, ...  \n",
+       "5  \\nEn terme de Grammaire, & sur-tout de Grammai...  \n",
+       "8  \\nA, numismatique ou monétaire, sur le revers ...  \n",
+       "9  \\nA, lapidaire, dans les anciennes inscription...  "
+      ]
+     },
+     "execution_count": 156,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "ahead-pendant",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def addNbWord(content):\n",
+    "    return len(content.split(' '))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "hearing-backup",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['nb_word'] = df.apply(lambda row: addNbWord(row.content), axis=1).T.values\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "suffering-athletics",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.loc[(df['nb_word']>=15)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "needed-behavior",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "mature-norfolk",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "green-afternoon",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "suffering-puppy",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_unclassified = df.loc[(df['normClass']==\"unclassified\")]\n",
+    "df_classified = df.loc[(df['normClass']!=\"unclassified\")]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "disturbed-constitution",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "12685\n",
+      "61505\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(df_unclassified))\n",
+    "print(len(df_classified))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 159,
+   "id": "fatty-bouquet",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 163,
+   "id": "pharmaceutical-presence",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "seasonal-suspect",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du dataframe dans un fichier tsv\n",
+    "df.to_csv('EDdA_dataframe_withContent.tsv',sep='\\t',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 182,
+   "id": "opposed-binding",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "######\n",
+    "df = pd.read_csv('EDdA_dataframe_withContent.tsv', sep='\\t')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 185,
+   "id": "banner-beijing",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>38</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "      <td>18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>24</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monétaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+       "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+       "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+       "      <td>112</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "0       1       5                      A, a & a     Grammaire   \n",
+       "1       1       6                             A  unclassified   \n",
+       "2       1       7                             A  unclassified   \n",
+       "3       1      10  A, numismatique ou monétaire  unclassified   \n",
+       "4       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "0  ordre Encyclopéd. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "1                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "2                                       unclassified   Dumarsais    v1-1-2   \n",
+       "3                                       unclassified      Mallet    v1-1-5   \n",
+       "4                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0      grammaire               Grammaire   \n",
+       "1      grammaire               Grammaire   \n",
+       "2      grammaire               Grammaire   \n",
+       "3   numismatique               Médailles   \n",
+       "4   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "1  \\n2. A, comme mot, est aussi une préposition, ...   \n",
+       "2  \\nEn terme de Grammaire, & sur-tout de Grammai...   \n",
+       "3  \\nA, numismatique ou monétaire, sur le revers ...   \n",
+       "4  \\nA, lapidaire, dans les anciennes inscription...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "1  \\n2. A, comme mot, est aussi une préposition, ...   \n",
+       "2  \\nEn terme de Grammaire, & sur-tout de Grammai...   \n",
+       "3  \\nA, numismatique ou monétaire, sur le revers ...   \n",
+       "4  \\nA, lapidaire, dans les anciennes inscription...   \n",
+       "\n",
+       "                                      firstParagraph  nb_word  \n",
+       "0  \\n3. On dit de quelqu'un qui n'a rien fait, ri...       38  \n",
+       "1  \\n2. A, comme mot, est aussi une préposition, ...       18  \n",
+       "2  \\nEn terme de Grammaire, & sur-tout de Grammai...       24  \n",
+       "3  \\nA, numismatique ou monétaire, sur le revers ...      112  \n",
+       "4  \\nA, lapidaire, dans les anciennes inscription...       80  "
+      ]
+     },
+     "execution_count": 185,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "innocent-stability",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 183,
+   "id": "classical-receipt",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "7837"
+      ]
+     },
+     "execution_count": 183,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df.loc[(df['nb_word']<=15)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 184,
+   "id": "featured-tennis",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1695"
+      ]
+     },
+     "execution_count": 184,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df.loc[(df['nb_word']<=15) & (df['ensemble_domaine_enccre']==\"Géographie\")])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "neither-idaho",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 175,
+   "id": "expanded-tunnel",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_old = pd.read_csv('EDdA_dataframe_withContent_old.tsv', sep='\\t')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 176,
+   "id": "valid-manor",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>author</th>\n",
+       "      <th>normClass_artfl</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>\\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>\\nDIDEROT &amp; D'ALEMBERT.\\n</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>\\nVoilà dans son ordre naturel, &amp; sans démembr...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire;</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire;</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une préposition, ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                                head                author  \\\n",
+       "0       1       1                          Title Page              unsigned   \n",
+       "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  Diderot & d'Alembert   \n",
+       "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS            d'Alembert   \n",
+       "3       1       5                            A, a & a            Dumarsais5   \n",
+       "4       1       6                                   A            Dumarsais5   \n",
+       "\n",
+       "  normClass_artfl id_enccre domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0    unclassified       NaN            NaN                     NaN   \n",
+       "1    unclassified       NaN            NaN                     NaN   \n",
+       "2    unclassified       NaN            NaN                     NaN   \n",
+       "3       Grammaire    v1-1-0     grammaire;               Grammaire   \n",
+       "4    unclassified    v1-1-1     grammaire;               Grammaire   \n",
+       "\n",
+       "                                             content  \n",
+       "0  \\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...  \n",
+       "1                          \\nDIDEROT & D'ALEMBERT.\\n  \n",
+       "2  \\nVoilà dans son ordre naturel, & sans démembr...  \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...  \n",
+       "4  \\n2. A, comme mot, est aussi une préposition, ...  "
+      ]
+     },
+     "execution_count": 176,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_old.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 179,
+   "id": "focused-bulgarian",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3654\n"
+     ]
+    }
+   ],
+   "source": [
+    "def countDomaine(domaine):\n",
+    "    return str(domaine).count(';')\n",
+    "\n",
+    "df_old['nb_domaine'] = df_old.apply(lambda row: countDomaine(row.ensemble_domaine_enccre), axis=1).T.values\n",
+    "\n",
+    "print(len(df_old.loc[(df_old['nb_domaine']>0)]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "informative-chess",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "covered-spine",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "endless-cathedral",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(66056, 13)"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "corrected-batman",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.loc[(df['nb_word']>=15)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "documentary-prince",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(66056, 13)"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "opened-november",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "\n",
+    "\n",
+    "train_x, validation_x, train_y, validation_y = train_test_split(df, df[\"ensemble_domaine_enccre\"], test_size=0.2, random_state=42, stratify = df[\"ensemble_domaine_enccre\"] )\n",
+    "\n",
+    "train, test_x, train_labels, test_y = train_test_split(train_x, train_x[\"ensemble_domaine_enccre\"], test_size=0.3, random_state=42, stratify = train_x[\"ensemble_domaine_enccre\"] )\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "noticed-evanescence",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(36990, 13)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "welcome-homework",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(13212, 13)"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "validation_x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "returning-george",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(15854, 13)"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "thorough-senator",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>46001</th>\n",
+       "      <td>11</td>\n",
+       "      <td>2973</td>\n",
+       "      <td>ORNIS</td>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>Comm.</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>v11-1767-0</td>\n",
+       "      <td>commerce</td>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>ORNIS, s. m. toile des Indes, (Comm.) sortes d...</td>\n",
+       "      <td>ORNIS, s. m. toile des Indes, () sortes de\\nto...</td>\n",
+       "      <td>ORNIS, s. m. toile des Indes, () sortes de\\nto...</td>\n",
+       "      <td>45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15442</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3525</td>\n",
+       "      <td>COMPRENDRE</td>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>terme de Philosophie,</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v3-1722-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* COMPRENDRE, v. act. terme de Philosophie,\\nc...</td>\n",
+       "      <td>* COMPRENDRE, v. act. \\nc'est appercevoir la l...</td>\n",
+       "      <td>* COMPRENDRE, v. act. \\nc'est appercevoir la l...</td>\n",
+       "      <td>92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2558</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2560</td>\n",
+       "      <td>ANCRE</td>\n",
+       "      <td>Marine</td>\n",
+       "      <td>Marine</td>\n",
+       "      <td>d'Alembert &amp; Diderot</td>\n",
+       "      <td>v1-1865-0</td>\n",
+       "      <td>marine</td>\n",
+       "      <td>Marine</td>\n",
+       "      <td>ANCRE, s. f. (Marine.) est un instrument de fe...</td>\n",
+       "      <td>ANCRE, s. f. (.) est un instrument de fer\\nABC...</td>\n",
+       "      <td>ANCRE, s. f. (.) est un instrument de fer\\nABC...</td>\n",
+       "      <td>3327</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70433</th>\n",
+       "      <td>16</td>\n",
+       "      <td>4241</td>\n",
+       "      <td>VAKEBARO</td>\n",
+       "      <td>Géographie moderne</td>\n",
+       "      <td>Géog. mod.</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>v16-2587-0</td>\n",
+       "      <td>géographie</td>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>VAKEBARO, (Géog. mod.) vallée du royaume\\nd'Es...</td>\n",
+       "      <td>VAKEBARO, () vallée du royaume\\nd'Espagne dans...</td>\n",
+       "      <td>VAKEBARO, () vallée du royaume\\nd'Espagne dans...</td>\n",
+       "      <td>34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34129</th>\n",
+       "      <td>8</td>\n",
+       "      <td>3281</td>\n",
+       "      <td>INSPECTEUR</td>\n",
+       "      <td>Histoire ancienne</td>\n",
+       "      <td>Hist. anc.</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>v8-2533-0</td>\n",
+       "      <td>histoire</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>INSPECTEUR, s. m. inspector ; (Hist. anc.) cel...</td>\n",
+       "      <td>INSPECTEUR, s. m. inspector ; () celui \\nà qui...</td>\n",
+       "      <td>INSPECTEUR, s. m. inspector ; () celui \\nà qui...</td>\n",
+       "      <td>102</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       volume  numero        head           normClass              classEDdA  \\\n",
+       "46001      11    2973       ORNIS            Commerce                  Comm.   \n",
+       "15442       3    3525  COMPRENDRE         Philosophie  terme de Philosophie,   \n",
+       "2558        1    2560       ANCRE              Marine                 Marine   \n",
+       "70433      16    4241    VAKEBARO  Géographie moderne             Géog. mod.   \n",
+       "34129       8    3281  INSPECTEUR   Histoire ancienne             Hist. anc.   \n",
+       "\n",
+       "                     author   id_enccre domaine_enccre  \\\n",
+       "46001              unsigned  v11-1767-0       commerce   \n",
+       "15442               Diderot   v3-1722-0                  \n",
+       "2558   d'Alembert & Diderot   v1-1865-0         marine   \n",
+       "70433              unsigned  v16-2587-0     géographie   \n",
+       "34129              unsigned   v8-2533-0       histoire   \n",
+       "\n",
+       "      ensemble_domaine_enccre  \\\n",
+       "46001                Commerce   \n",
+       "15442                           \n",
+       "2558                   Marine   \n",
+       "70433              Géographie   \n",
+       "34129                Histoire   \n",
+       "\n",
+       "                                                 content  \\\n",
+       "46001  ORNIS, s. m. toile des Indes, (Comm.) sortes d...   \n",
+       "15442  * COMPRENDRE, v. act. terme de Philosophie,\\nc...   \n",
+       "2558   ANCRE, s. f. (Marine.) est un instrument de fe...   \n",
+       "70433  VAKEBARO, (Géog. mod.) vallée du royaume\\nd'Es...   \n",
+       "34129  INSPECTEUR, s. m. inspector ; (Hist. anc.) cel...   \n",
+       "\n",
+       "                                     contentWithoutClass  \\\n",
+       "46001  ORNIS, s. m. toile des Indes, () sortes de\\nto...   \n",
+       "15442  * COMPRENDRE, v. act. \\nc'est appercevoir la l...   \n",
+       "2558   ANCRE, s. f. (.) est un instrument de fer\\nABC...   \n",
+       "70433  VAKEBARO, () vallée du royaume\\nd'Espagne dans...   \n",
+       "34129  INSPECTEUR, s. m. inspector ; () celui \\nà qui...   \n",
+       "\n",
+       "                                          firstParagraph  nb_word  \n",
+       "46001  ORNIS, s. m. toile des Indes, () sortes de\\nto...       45  \n",
+       "15442  * COMPRENDRE, v. act. \\nc'est appercevoir la l...       92  \n",
+       "2558   ANCRE, s. f. (.) est un instrument de fer\\nABC...     3327  \n",
+       "70433  VAKEBARO, () vallée du royaume\\nd'Espagne dans...       34  \n",
+       "34129  INSPECTEUR, s. m. inspector ; () celui \\nà qui...      102  "
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_x.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "hearing-moses",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train.to_csv('training_set.tsv',sep='\\t',index=False) \n",
+    "validation_x.to_csv('validation_set.tsv',sep='\\t',index=False)  \n",
+    "test_x.to_csv('test_set.tsv',sep='\\t',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "exterior-praise",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>counts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td></td>\n",
+       "      <td>10053</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Agriculture - Economie rustique</td>\n",
+       "      <td>1077</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>1021</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Antiquité</td>\n",
+       "      <td>1336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>1357</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Arts et métiers</td>\n",
+       "      <td>550</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Beaux-arts</td>\n",
+       "      <td>427</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Belles-lettres - Poésie</td>\n",
+       "      <td>1026</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Blason</td>\n",
+       "      <td>526</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Caractères</td>\n",
+       "      <td>113</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Chasse</td>\n",
+       "      <td>516</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>478</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>1823</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Droit - Jurisprudence</td>\n",
+       "      <td>6052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Economie domestique</td>\n",
+       "      <td>131</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>2397</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>11959</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>3025</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>4707</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Jeu</td>\n",
+       "      <td>279</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Marine</td>\n",
+       "      <td>1893</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Maréchage - Manège</td>\n",
+       "      <td>494</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>Mathématiques</td>\n",
+       "      <td>681</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Mesure</td>\n",
+       "      <td>179</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+       "      <td>1265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>Minéralogie</td>\n",
+       "      <td>109</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Monnaie</td>\n",
+       "      <td>309</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Musique</td>\n",
+       "      <td>681</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>Médecine - Chirurgie</td>\n",
+       "      <td>2227</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>Métiers</td>\n",
+       "      <td>5083</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>311</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>478</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Physique - [Sciences physico-mathématiques]</td>\n",
+       "      <td>1286</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Politique</td>\n",
+       "      <td>114</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>Pêche</td>\n",
+       "      <td>199</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Religion</td>\n",
+       "      <td>1623</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Spectacle</td>\n",
+       "      <td>47</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>Superstition</td>\n",
+       "      <td>108</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        ensemble_domaine_enccre  counts\n",
+       "0                                                 10053\n",
+       "1               Agriculture - Economie rustique    1077\n",
+       "2                                      Anatomie    1021\n",
+       "3                                     Antiquité    1336\n",
+       "4                                  Architecture    1357\n",
+       "5                               Arts et métiers     550\n",
+       "6                                    Beaux-arts     427\n",
+       "7                       Belles-lettres - Poésie    1026\n",
+       "8                                        Blason     526\n",
+       "9                                    Caractères     113\n",
+       "10                                       Chasse     516\n",
+       "11                                       Chimie     478\n",
+       "12                                     Commerce    1823\n",
+       "13                        Droit - Jurisprudence    6052\n",
+       "14                          Economie domestique     131\n",
+       "15                                    Grammaire    2397\n",
+       "16                                   Géographie   11959\n",
+       "17                                     Histoire    3025\n",
+       "18                           Histoire naturelle    4707\n",
+       "19                                          Jeu     279\n",
+       "20                                       Marine    1893\n",
+       "21                           Maréchage - Manège     494\n",
+       "22                                Mathématiques     681\n",
+       "23                                       Mesure     179\n",
+       "24              Militaire (Art) - Guerre - Arme    1265\n",
+       "25                                  Minéralogie     109\n",
+       "26                                      Monnaie     309\n",
+       "27                                      Musique     681\n",
+       "28                                    Médailles     116\n",
+       "29                         Médecine - Chirurgie    2227\n",
+       "30                                      Métiers    5083\n",
+       "31                                    Pharmacie     311\n",
+       "32                                  Philosophie     478\n",
+       "33  Physique - [Sciences physico-mathématiques]    1286\n",
+       "34                                    Politique     114\n",
+       "35                                        Pêche     199\n",
+       "36                                     Religion    1623\n",
+       "37                                    Spectacle      47\n",
+       "38                                 Superstition     108"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.groupby(['ensemble_domaine_enccre']).size().reset_index(name='counts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "unable-agenda",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>counts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td></td>\n",
+       "      <td>5629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Agriculture - Economie rustique</td>\n",
+       "      <td>603</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>572</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Antiquité</td>\n",
+       "      <td>748</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>760</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Arts et métiers</td>\n",
+       "      <td>308</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Beaux-arts</td>\n",
+       "      <td>239</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Belles-lettres - Poésie</td>\n",
+       "      <td>575</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Blason</td>\n",
+       "      <td>295</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Caractères</td>\n",
+       "      <td>63</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Chasse</td>\n",
+       "      <td>289</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>267</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>1021</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Droit - Jurisprudence</td>\n",
+       "      <td>3389</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Economie domestique</td>\n",
+       "      <td>74</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>1343</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>6697</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>1694</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>2636</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Jeu</td>\n",
+       "      <td>156</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Marine</td>\n",
+       "      <td>1060</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Maréchage - Manège</td>\n",
+       "      <td>277</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>Mathématiques</td>\n",
+       "      <td>381</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Mesure</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+       "      <td>708</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>Minéralogie</td>\n",
+       "      <td>61</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Monnaie</td>\n",
+       "      <td>173</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Musique</td>\n",
+       "      <td>382</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>65</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>Médecine - Chirurgie</td>\n",
+       "      <td>1247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>Métiers</td>\n",
+       "      <td>2846</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>174</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>267</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Physique - [Sciences physico-mathématiques]</td>\n",
+       "      <td>720</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Politique</td>\n",
+       "      <td>64</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>Pêche</td>\n",
+       "      <td>111</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Religion</td>\n",
+       "      <td>909</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Spectacle</td>\n",
+       "      <td>27</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>Superstition</td>\n",
+       "      <td>60</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        ensemble_domaine_enccre  counts\n",
+       "0                                                  5629\n",
+       "1               Agriculture - Economie rustique     603\n",
+       "2                                      Anatomie     572\n",
+       "3                                     Antiquité     748\n",
+       "4                                  Architecture     760\n",
+       "5                               Arts et métiers     308\n",
+       "6                                    Beaux-arts     239\n",
+       "7                       Belles-lettres - Poésie     575\n",
+       "8                                        Blason     295\n",
+       "9                                    Caractères      63\n",
+       "10                                       Chasse     289\n",
+       "11                                       Chimie     267\n",
+       "12                                     Commerce    1021\n",
+       "13                        Droit - Jurisprudence    3389\n",
+       "14                          Economie domestique      74\n",
+       "15                                    Grammaire    1343\n",
+       "16                                   Géographie    6697\n",
+       "17                                     Histoire    1694\n",
+       "18                           Histoire naturelle    2636\n",
+       "19                                          Jeu     156\n",
+       "20                                       Marine    1060\n",
+       "21                           Maréchage - Manège     277\n",
+       "22                                Mathématiques     381\n",
+       "23                                       Mesure     100\n",
+       "24              Militaire (Art) - Guerre - Arme     708\n",
+       "25                                  Minéralogie      61\n",
+       "26                                      Monnaie     173\n",
+       "27                                      Musique     382\n",
+       "28                                    Médailles      65\n",
+       "29                         Médecine - Chirurgie    1247\n",
+       "30                                      Métiers    2846\n",
+       "31                                    Pharmacie     174\n",
+       "32                                  Philosophie     267\n",
+       "33  Physique - [Sciences physico-mathématiques]     720\n",
+       "34                                    Politique      64\n",
+       "35                                        Pêche     111\n",
+       "36                                     Religion     909\n",
+       "37                                    Spectacle      27\n",
+       "38                                 Superstition      60"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.groupby(['ensemble_domaine_enccre']).size().reset_index(name='counts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 208,
+   "id": "potential-friday",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>counts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Agriculture - Economie rustique</td>\n",
+       "      <td>212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>187</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Antiquité</td>\n",
+       "      <td>263</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Arts et métiers</td>\n",
+       "      <td>108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Beaux-arts</td>\n",
+       "      <td>84</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Belles-lettres - Poésie</td>\n",
+       "      <td>195</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Blason</td>\n",
+       "      <td>87</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Caractères</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Chasse</td>\n",
+       "      <td>102</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>361</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Droit - Jurisprudence</td>\n",
+       "      <td>1181</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Economie domestique</td>\n",
+       "      <td>26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>466</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>2368</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>592</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>931</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Jeu</td>\n",
+       "      <td>54</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Marine</td>\n",
+       "      <td>363</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Maréchage - Manège</td>\n",
+       "      <td>97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Mathématiques</td>\n",
+       "      <td>126</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>Mesure</td>\n",
+       "      <td>35</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+       "      <td>247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Minéralogie</td>\n",
+       "      <td>21</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>Monnaie</td>\n",
+       "      <td>61</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Musique</td>\n",
+       "      <td>133</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>Médecine - Chirurgie</td>\n",
+       "      <td>428</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>Métiers</td>\n",
+       "      <td>1006</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>93</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Physique - [Sciences physico-mathématiques]</td>\n",
+       "      <td>247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Politique</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Pêche</td>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>Religion</td>\n",
+       "      <td>319</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Spectacle</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Superstition</td>\n",
+       "      <td>21</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        ensemble_domaine_enccre  counts\n",
+       "0               Agriculture - Economie rustique     212\n",
+       "1                                      Anatomie     187\n",
+       "2                                     Antiquité     263\n",
+       "3                                  Architecture     265\n",
+       "4                               Arts et métiers     108\n",
+       "5                                    Beaux-arts      84\n",
+       "6                       Belles-lettres - Poésie     195\n",
+       "7                                        Blason      87\n",
+       "8                                    Caractères      22\n",
+       "9                                        Chasse     102\n",
+       "10                                       Chimie      94\n",
+       "11                                     Commerce     361\n",
+       "12                        Droit - Jurisprudence    1181\n",
+       "13                          Economie domestique      26\n",
+       "14                                    Grammaire     466\n",
+       "15                                   Géographie    2368\n",
+       "16                                     Histoire     592\n",
+       "17                           Histoire naturelle     931\n",
+       "18                                          Jeu      54\n",
+       "19                                       Marine     363\n",
+       "20                           Maréchage - Manège      97\n",
+       "21                                Mathématiques     126\n",
+       "22                                       Mesure      35\n",
+       "23              Militaire (Art) - Guerre - Arme     247\n",
+       "24                                  Minéralogie      21\n",
+       "25                                      Monnaie      61\n",
+       "26                                      Musique     133\n",
+       "27                                    Médailles      23\n",
+       "28                         Médecine - Chirurgie     428\n",
+       "29                                      Métiers    1006\n",
+       "30                                    Pharmacie      59\n",
+       "31                                  Philosophie      93\n",
+       "32  Physique - [Sciences physico-mathématiques]     247\n",
+       "33                                    Politique      22\n",
+       "34                                        Pêche      39\n",
+       "35                                     Religion     319\n",
+       "36                                    Spectacle       9\n",
+       "37                                 Superstition      21"
+      ]
+     },
+     "execution_count": 208,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "validation_x.groupby(['ensemble_domaine_enccre']).size().reset_index(name='counts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 209,
+   "id": "fatty-pharmacy",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>counts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Agriculture - Economie rustique</td>\n",
+       "      <td>254</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>224</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Antiquité</td>\n",
+       "      <td>316</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>318</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Arts et métiers</td>\n",
+       "      <td>129</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Beaux-arts</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Belles-lettres - Poésie</td>\n",
+       "      <td>235</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Blason</td>\n",
+       "      <td>105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Caractères</td>\n",
+       "      <td>27</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Chasse</td>\n",
+       "      <td>122</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>112</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>433</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Droit - Jurisprudence</td>\n",
+       "      <td>1417</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Economie domestique</td>\n",
+       "      <td>31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>560</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Géographie</td>\n",
+       "      <td>2842</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>711</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>1118</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Jeu</td>\n",
+       "      <td>65</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Marine</td>\n",
+       "      <td>435</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Maréchage - Manège</td>\n",
+       "      <td>116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Mathématiques</td>\n",
+       "      <td>151</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>Mesure</td>\n",
+       "      <td>42</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+       "      <td>296</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Minéralogie</td>\n",
+       "      <td>26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>Monnaie</td>\n",
+       "      <td>73</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Musique</td>\n",
+       "      <td>160</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>Médecine - Chirurgie</td>\n",
+       "      <td>513</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>Métiers</td>\n",
+       "      <td>1207</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>71</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>112</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Physique - [Sciences physico-mathématiques]</td>\n",
+       "      <td>296</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Politique</td>\n",
+       "      <td>26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Pêche</td>\n",
+       "      <td>47</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>Religion</td>\n",
+       "      <td>383</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Spectacle</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Superstition</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        ensemble_domaine_enccre  counts\n",
+       "0               Agriculture - Economie rustique     254\n",
+       "1                                      Anatomie     224\n",
+       "2                                     Antiquité     316\n",
+       "3                                  Architecture     318\n",
+       "4                               Arts et métiers     129\n",
+       "5                                    Beaux-arts     100\n",
+       "6                       Belles-lettres - Poésie     235\n",
+       "7                                        Blason     105\n",
+       "8                                    Caractères      27\n",
+       "9                                        Chasse     122\n",
+       "10                                       Chimie     112\n",
+       "11                                     Commerce     433\n",
+       "12                        Droit - Jurisprudence    1417\n",
+       "13                          Economie domestique      31\n",
+       "14                                    Grammaire     560\n",
+       "15                                   Géographie    2842\n",
+       "16                                     Histoire     711\n",
+       "17                           Histoire naturelle    1118\n",
+       "18                                          Jeu      65\n",
+       "19                                       Marine     435\n",
+       "20                           Maréchage - Manège     116\n",
+       "21                                Mathématiques     151\n",
+       "22                                       Mesure      42\n",
+       "23              Militaire (Art) - Guerre - Arme     296\n",
+       "24                                  Minéralogie      26\n",
+       "25                                      Monnaie      73\n",
+       "26                                      Musique     160\n",
+       "27                                    Médailles      28\n",
+       "28                         Médecine - Chirurgie     513\n",
+       "29                                      Métiers    1207\n",
+       "30                                    Pharmacie      71\n",
+       "31                                  Philosophie     112\n",
+       "32  Physique - [Sciences physico-mathématiques]     296\n",
+       "33                                    Politique      26\n",
+       "34                                        Pêche      47\n",
+       "35                                     Religion     383\n",
+       "36                                    Spectacle      11\n",
+       "37                                 Superstition      25"
+      ]
+     },
+     "execution_count": 209,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_x.groupby(['ensemble_domaine_enccre']).size().reset_index(name='counts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "indonesian-reach",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "divine-winner",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "tropical-research",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "younger-louisiana",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "demanding-essay",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "vanilla-italy",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "consistent-checklist",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## ajout dune colonne avec toutes les classes enccre\n",
+    "\n",
+    "def getDomaineEnccre2(volume, numero):\n",
+    "    #print(volume, ' ', numero)\n",
+    "\n",
+    "    ensemble_domaine = \"\"\n",
+    "\n",
+    "    try :  \n",
+    "        #entreeid = df_correspondances.loc[(df_correspondances['tome']==volume) & (df_correspondances['article']==numero)]['entreeid'][0]\n",
+    "        d = df_correspondances.loc[(df_correspondances['tome']==volume) & (df_correspondances['article']==numero)].reset_index(drop=True)\n",
+    "        entreeid = d['entreeid'][0]\n",
+    "\n",
+    "        json_url = urlopen(\"http://enccre.academie-sciences.fr/icefront/api/article/\" + entreeid)\n",
+    "        data = json.loads(json_url.read())\n",
+    "        #print(data['annotations']['constit'][0]['domgen'][0])\n",
+    "        cpt = 0\n",
+    "        try :  \n",
+    "            \n",
+    "            # changer pour avoir tous les noms\n",
+    "            for dom in data['annotations']['constit'][0]['domgen']:\n",
+    "                val = get_key(dom)\n",
+    "                if val is not None:\n",
+    "                    if cpt > 0:\n",
+    "                        ensemble_domaine += '|'\n",
+    "                    ensemble_domaine += get_key(dom)\n",
+    "                    cpt += 1\n",
+    "\n",
+    "            \n",
+    "            #print(ensemble_domaine)\n",
+    "\n",
+    "        except KeyError:\n",
+    "            pass\n",
+    "     \n",
+    "    except KeyError:\n",
+    "        pass\n",
+    "       \n",
+    "    \n",
+    "        \n",
+    "    #ensemble_domaine_multi = ';'.join(list(set(ensemble_domaine)))\n",
+    "    \n",
+    "    #print(entreeid, domaine, ensemble_domaine, txtContent, txtContentWithoutClass, firstParagraph)\n",
+    "    \n",
+    "    return ensemble_domaine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "coral-level",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "      <th>classification</th>\n",
+       "      <th>class_is_true</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>711</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>238</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>1980</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monétaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>112</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "0       1       5                      A, a & a     Grammaire   \n",
+       "1       1       6                             A  unclassified   \n",
+       "2       1       7                             A  unclassified   \n",
+       "3       1      10  A, numismatique ou monétaire  unclassified   \n",
+       "4       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "0  ordre Encyclopéd. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "1                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "2                                       unclassified   Dumarsais    v1-1-2   \n",
+       "3                                       unclassified      Mallet    v1-1-5   \n",
+       "4                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0      grammaire               Grammaire   \n",
+       "1      grammaire               Grammaire   \n",
+       "2      grammaire               Grammaire   \n",
+       "3   numismatique               Médailles   \n",
+       "4   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                      firstParagraph  nb_word classification  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...      711      Grammaire   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...      238      Grammaire   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...     1980      Grammaire   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...      112      Médailles   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...       80       Histoire   \n",
+       "\n",
+       "   class_is_true  \n",
+       "0           True  \n",
+       "1           True  \n",
+       "2           True  \n",
+       "3           True  \n",
+       "4           True  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "filepath = '/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Classification domaines EDdA/results_classification/result_classification_sgdtfidf_21.11.24.csv'\n",
+    "df = pd.read_csv(filepath)\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "enormous-longer",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    " df['ensembles_domaine_enccre'] = df.apply(lambda row: getDomaineEnccre2(row.volume, row.numero), axis=1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "incorporated-commons",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "      <th>classification</th>\n",
+       "      <th>class_is_true</th>\n",
+       "      <th>ensembles_domaine_enccre</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>711</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>238</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>1980</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monétaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>112</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Médailles|Monnaie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Histoire</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "0       1       5                      A, a & a     Grammaire   \n",
+       "1       1       6                             A  unclassified   \n",
+       "2       1       7                             A  unclassified   \n",
+       "3       1      10  A, numismatique ou monétaire  unclassified   \n",
+       "4       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "0  ordre Encyclopéd. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "1                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "2                                       unclassified   Dumarsais    v1-1-2   \n",
+       "3                                       unclassified      Mallet    v1-1-5   \n",
+       "4                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0      grammaire               Grammaire   \n",
+       "1      grammaire               Grammaire   \n",
+       "2      grammaire               Grammaire   \n",
+       "3   numismatique               Médailles   \n",
+       "4   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                      firstParagraph  nb_word classification  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...      711      Grammaire   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...      238      Grammaire   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...     1980      Grammaire   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...      112      Médailles   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...       80       Histoire   \n",
+       "\n",
+       "   class_is_true ensembles_domaine_enccre  \n",
+       "0           True                Grammaire  \n",
+       "1           True                Grammaire  \n",
+       "2           True                Grammaire  \n",
+       "3           True        Médailles|Monnaie  \n",
+       "4           True                 Histoire  "
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "pleasant-throat",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du dataframe dans un fichier tsv\n",
+    "df.to_csv('/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Classification domaines EDdA/results_classification/result_classification_sgdtfidf_21.11.25.csv',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "small-shore",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "      <th>classification</th>\n",
+       "      <th>class_is_true</th>\n",
+       "      <th>ensembles_domaine_enccre</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+       "      <td>711</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+       "      <td>238</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+       "      <td>1980</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monétaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+       "      <td>112</td>\n",
+       "      <td>Médailles</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Médailles|Monnaie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Histoire</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "0       1       5                      A, a & a     Grammaire   \n",
+       "1       1       6                             A  unclassified   \n",
+       "2       1       7                             A  unclassified   \n",
+       "3       1      10  A, numismatique ou monétaire  unclassified   \n",
+       "4       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "0  ordre Encyclopéd. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "1                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "2                                       unclassified   Dumarsais    v1-1-2   \n",
+       "3                                       unclassified      Mallet    v1-1-5   \n",
+       "4                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0      grammaire               Grammaire   \n",
+       "1      grammaire               Grammaire   \n",
+       "2      grammaire               Grammaire   \n",
+       "3   numismatique               Médailles   \n",
+       "4   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                      firstParagraph  nb_word classification  \\\n",
+       "0  A, a & a s.m. (ordre Encyclopéd.\\nEntend. Scie...      711      Grammaire   \n",
+       "1  A, mot, est 1. la troisieme personne du présen...      238      Grammaire   \n",
+       "2  A, préposition vient du latin à, à dextris, à ...     1980      Grammaire   \n",
+       "3  A, numismatique ou monétaire, sur le revers de...      112      Médailles   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...       80       Histoire   \n",
+       "\n",
+       "   class_is_true ensembles_domaine_enccre  \n",
+       "0           True                Grammaire  \n",
+       "1           True                Grammaire  \n",
+       "2           True                Grammaire  \n",
+       "3           True        Médailles|Monnaie  \n",
+       "4           True                 Histoire  "
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acute-basketball",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "verified-compression",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/EDdA_Classification_BertFineTuning.ipynb b/notebooks/EDdA_Classification_BertFineTuning.ipynb
new file mode 100644
index 0000000..dc0830e
--- /dev/null
+++ b/notebooks/EDdA_Classification_BertFineTuning.ipynb
@@ -0,0 +1,4421 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4YCMlsNwOWs0"
+   },
+   "source": [
+    "# BERT fine-tuning for EDdA classification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Pz9VDIXUON97"
+   },
+   "source": [
+    "## Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ouU5usvXg4PA"
+   },
+   "outputs": [],
+   "source": [
+    "train_path = 'training_set.tsv'\n",
+    "validation_path = 'validation_set.tsv'\n",
+    "test_path =  'test_set.tsv'\n",
+    "\n",
+    "columnText = 'contentWithoutClass'\n",
+    "columnClass = 'ensemble_domaine_enccre'\n",
+    "\n",
+    "minOfInstancePerClass = 0\n",
+    "maxOfInstancePerClass = 10000\n",
+    "\n",
+    "#model_chosen = \"bert\"\n",
+    "model_chosen = \"camembert\"\n",
+    "\n",
+    "batch_size = 8  # 16 or 32 recommended\n",
+    "max_len = 512"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6xdYI9moOQSv"
+   },
+   "source": [
+    "## Setup colab environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "WF0qFN_g3ekz",
+    "outputId": "445ffd96-843b-4ff1-a24d-c110964a63e4"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Your runtime has 27.3 gigabytes of available RAM\n",
+      "\n",
+      "You are using a high-RAM runtime!\n"
+     ]
+    }
+   ],
+   "source": [
+    "from psutil import virtual_memory\n",
+    "ram_gb = virtual_memory().total / 1e9\n",
+    "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
+    "\n",
+    "if ram_gb < 20:\n",
+    "  print('Not using a high-RAM runtime')\n",
+    "else:\n",
+    "  print('You are using a high-RAM runtime!')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "vL0S-s9Uofvn",
+    "outputId": "415b7bf1-d3fd-42b6-ee03-13601c953a4f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mounted at /content/drive\n"
+     ]
+    }
+   ],
+   "source": [
+    "from google.colab import drive\n",
+    "drive.mount('/content/drive')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "8hzEGHl7gmzk"
+   },
+   "source": [
+    "## Setup GPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "dPOU-Efhf4ui",
+    "outputId": "fc873e0c-1254-4928-c8e9-e3eb093acc64"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 1 GPU(s) available.\n",
+      "We will use the GPU: Tesla P100-PCIE-16GB\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# If there's a GPU available...\n",
+    "if torch.cuda.is_available():    \n",
+    "\n",
+    "    # Tell PyTorch to use the GPU.    \n",
+    "    device = torch.device(\"cuda\")\n",
+    "\n",
+    "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
+    "\n",
+    "    print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
+    "\n",
+    "# If not...\n",
+    "else:\n",
+    "    print('No GPU available, using the CPU instead.')\n",
+    "    device = torch.device(\"cpu\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Jr-S9yYIgGkA"
+   },
+   "source": [
+    "## Install packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "pwmZ5bBvgGNh",
+    "outputId": "e92404c6-af38-4bd8-8c99-20ec6b545b3f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting transformers==4.10.3\n",
+      "  Downloading transformers-4.10.3-py3-none-any.whl (2.8 MB)\n",
+      "\u001b[K     |████████████████████████████████| 2.8 MB 5.0 MB/s \n",
+      "\u001b[?25hCollecting tokenizers<0.11,>=0.10.1\n",
+      "  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
+      "\u001b[K     |████████████████████████████████| 3.3 MB 38.8 MB/s \n",
+      "\u001b[?25hCollecting pyyaml>=5.1\n",
+      "  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
+      "\u001b[K     |████████████████████████████████| 596 kB 58.6 MB/s \n",
+      "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (2019.12.20)\n",
+      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (4.62.3)\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (2.23.0)\n",
+      "Collecting huggingface-hub>=0.0.12\n",
+      "  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)\n",
+      "\u001b[K     |████████████████████████████████| 61 kB 486 kB/s \n",
+      "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (3.4.0)\n",
+      "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (1.19.5)\n",
+      "Collecting sacremoses\n",
+      "  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n",
+      "\u001b[K     |████████████████████████████████| 895 kB 43.3 MB/s \n",
+      "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (21.3)\n",
+      "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (4.8.2)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub>=0.0.12->transformers==4.10.3) (3.10.0.2)\n",
+      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers==4.10.3) (3.0.6)\n",
+      "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers==4.10.3) (3.6.0)\n",
+      "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (2.10)\n",
+      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (1.24.3)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (2021.10.8)\n",
+      "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (3.0.4)\n",
+      "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (7.1.2)\n",
+      "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (1.15.0)\n",
+      "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (1.1.0)\n",
+      "Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers\n",
+      "  Attempting uninstall: pyyaml\n",
+      "    Found existing installation: PyYAML 3.13\n",
+      "    Uninstalling PyYAML-3.13:\n",
+      "      Successfully uninstalled PyYAML-3.13\n",
+      "Successfully installed huggingface-hub-0.2.1 pyyaml-6.0 sacremoses-0.0.46 tokenizers-0.10.3 transformers-4.10.3\n",
+      "Collecting sentencepiece\n",
+      "  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
+      "\u001b[K     |████████████████████████████████| 1.2 MB 5.1 MB/s \n",
+      "\u001b[?25hInstalling collected packages: sentencepiece\n",
+      "Successfully installed sentencepiece-0.1.96\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install transformers==4.10.3\n",
+    "!pip install sentencepiece"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "wSqbrupGMc1M"
+   },
+   "source": [
+    "## Import librairies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "SkErnwgMMbRj"
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd \n",
+    "import numpy as np\n",
+    "import csv\n",
+    "from sklearn import preprocessing\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import *\n",
+    "\n",
+    "from transformers import BertTokenizer, CamembertTokenizer, BertForSequenceClassification, AdamW, BertConfig, CamembertForSequenceClassification\n",
+    "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
+    "from transformers import get_linear_schedule_with_warmup\n",
+    "\n",
+    "import time\n",
+    "import datetime\n",
+    "\n",
+    "import random\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.metrics import plot_confusion_matrix\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.metrics import classification_report\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "12SA-qPFgsVo"
+   },
+   "source": [
+    "## Utils functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "WkIVcabUgxIl"
+   },
+   "outputs": [],
+   "source": [
+    "def create_dict(df, classColumnName):\n",
+    "  return dict(df[classColumnName].value_counts())\n",
+    "\n",
+    "\n",
+    "def remove_weak_classes(df, classColumnName, threshold):\n",
+    "  dictOfClassInstances = create_dict(df,classColumnName)\n",
+    "  dictionary = {k: v for k, v in dictOfClassInstances.items() if v >= threshold }\n",
+    "  keys = [*dictionary]\n",
+    "  df_tmp = df[~ df[classColumnName].isin(keys)]\n",
+    "  df =  pd.concat([df,df_tmp]).drop_duplicates(keep=False)\n",
+    "  return df\n",
+    "\n",
+    "\n",
+    "def resample_classes(df, classColumnName, numberOfInstances):\n",
+    "  #random numberOfInstances elements\n",
+    "  replace = False  # with replacement\n",
+    "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+    "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+    "    \n",
+    "\n",
+    "# Function to calculate the accuracy of our predictions vs labels\n",
+    "def flat_accuracy(preds, labels):\n",
+    "  pred_flat = np.argmax(preds, axis=1).flatten()\n",
+    "  labels_flat = labels.flatten()\n",
+    "  return np.sum(pred_flat == labels_flat) / len(labels_flat) \n",
+    "\n",
+    "\n",
+    "def format_time(elapsed):\n",
+    "  '''\n",
+    "  Takes a time in seconds and returns a string hh:mm:ss\n",
+    "  '''\n",
+    "  # Round to the nearest second.\n",
+    "  elapsed_rounded = int(round((elapsed)))\n",
+    "\n",
+    "  # Format as hh:mm:ss\n",
+    "  return str(datetime.timedelta(seconds=elapsed_rounded))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "c5QKcXulhNJ-"
+   },
+   "source": [
+    "## Load Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "jdCdUVOTZrqh"
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+    "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+    "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "9d1IxD_bLEvp"
+   },
+   "source": [
+    "## Parameters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "5u1acjunhoxe"
+   },
+   "outputs": [],
+   "source": [
+    "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+    "df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)\n",
+    "\n",
+    "df_validation = pd.read_csv(validation_path, sep=\"\\t\")\n",
+    "df_validation = resample_classes(df_validation, columnClass, maxOfInstancePerClass)\n",
+    "\n",
+    "#df_train = remove_weak_classes(df, columnClass, minOfInstancePerClass)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "zj3JDoJNfx1f",
+    "outputId": "59262e3f-5fe0-49f5-bb55-8586653498ab"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(30650, 13)\n",
+      "(10947, 13)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df_train.shape)\n",
+    "print(df_validation.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "zrjZvs2dhzAy"
+   },
+   "outputs": [],
+   "source": [
+    "y_train  = df_train[columnClass]\n",
+    "y_validation = df_validation[columnClass]\n",
+    "numberOfClasses = y_train.nunique()\n",
+    "\n",
+    "encoder = preprocessing.LabelEncoder()\n",
+    "\n",
+    "y_train = encoder.fit_transform(y_train)\n",
+    "y_validation = encoder.fit_transform(y_validation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "u9AxxaA_h1CM"
+   },
+   "outputs": [],
+   "source": [
+    "#train_x, test_x, train_y, test_y = train_test_split(df, y, test_size=0.33, random_state=42, stratify = y )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Xt_PhH_6h1_3"
+   },
+   "outputs": [],
+   "source": [
+    "sentences_train = df_train[columnText].values\n",
+    "labels_train = y_train.tolist()\n",
+    "\n",
+    "sentences_validation = df_validation[columnText].values\n",
+    "labels_validation = y_validation.tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "Dq_KF5WAsbpC",
+    "outputId": "ba91b953-abcb-4bed-a5c5-9e429e68239a"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([\"\\nESTAMPEUR, s. m. en , est une\\nsorte de pilon de bois, surmonté d'un manche d'environ \\ndeux piés & demi. On s'en sert pour estamper\\nles formes où l'on veut faire des vergeoises. Voyez\\nVergeoise & Estamper.\\n\",\n",
+       "       \"\\nOn doit ébourgeonner les vignes, alors ce mot doit\\ns'entendre autrement que pour les arbres fruitiers:\\non ébourgeonne les vignes. non-seulement quand on\\nsupprime les bourgeons surnuméraires, mais encore\\nquand on arrête par-en-haut les bourgeons. Il en est\\nde même quand on détache en cassant les faux bourgeons \\nqui poussent d'ordinaire à chaque noeud à\\ncôté des yeux, à commencer par le bas. (K)\\n\",\n",
+       "       \"\\nBois mort en pié, s'il est pourri sur pié, sans\\nsubstance, & bon seulement à brûler.\\n\",\n",
+       "       ...,\n",
+       "       \"\\nIl y a une hydatoscopie naturelle & permise ; elle\\nconsiste à prévoir & à prédire les orages & les tempêtes \\nsur certains signes qu'on remarque dans la mer,\\ndans l'air, & dans les nuages. Voyez Tems & Ouragans. Dict. de Trévoux.\\n\",\n",
+       "       \"\\nMÉTÉOROMANCIE, s.f. () divination par\\nles météores ; & comme les météores ignés sont ceux\\nqui jettent le plus de crainte parmi les hommes, la\\nmétéoromancie désigne proprement la divination par\\nle tonnerre & les éclairs. Cette espece de divination\\npassa des Toscans aux Romains, sons rien perdre de\\nce qu'elle avoit de frivole. Seneque nous apprend\\nque deux auteurs graves, & qui avoient exercé des\\n\\nmagistratures, écrivoient à Rome sur cette matiere.\\nIl semble même que l'un d'eux l'épuisa entierement,\\ncar il donnoit une liste exacte des différentes especes\\nde tonnerres. Il circonstancioit & leurs noms & les\\nprognostics qui s'en pouvoient tirer ; le tout avec un\\nair de confiance plus surprenant encore que les choses\\nqu'il rapportoit. On eût dit, tant cette matiere météorologique lui étoit familiere, qu'il comptoit les tableaux \\nde sa galerie, ou qu'il faisoit la description\\ndes fleurs de son jardin. La plus ancienne maladie,\\nla plus invétérée, la plus incurable du genre humain,\\nc'est l'envie de connoître ce qui doit arriver.\\nNi le voile obscur qui nous cache notre destinée, ni\\nl'expérience journaliere, ni une infinité de tentatives \\nmalheureuses, n'ont pû guerir les hommes. Hé!\\nse dépréviennent-ils jamais d'une erreur agréablement \\nreçue? Nous sommes sur ce point aussi crédules\\nque nos ancêtres ; nous prêtons comme eux l'oreille\\nà toutes les impostures flatteuses. Pour avoir trompé\\ncent fois, elles n'ont point perdu le droit funeste de\\ntromper encore. (D. J.)\\n\",\n",
+       "       \"\\nPENTACLE, s. m. () c'est le nom que la\\nmagie des exorcismes donne à un sceau imprimé ou\\nsur du parchemin vierge fait de peau de bouc, ou\\nsur quelque métal, or, argent, cuivre, étain, plomb,\\n&c. On ne peut faire aucune opération magique pour\\nexorciser les esprits, sans avoir ce sceau qui contient\\nles noms de Dieu. Le pentacle se fait en renfermant\\nun triangle dans deux cercles : on lit dans ce triangle \\nces trois mots ; formatio, reformatio, transformatio. A côté du triangle est le mot agla, qui est très puissant \\npour arrêter la malice des esprits. Il faut que\\nla peau sur laquelle on applique le sceau soit exorcisée \\n& bénite. On exorcise aussi l'encre & la plume,\\ndont on se sert pour écrire les mots dont on vient de\\nparler. Après cela on encense le pentacle ; on l'enferme \\ntrois jours & trois nuits dans un vase bien net ;\\nenfin, on le met dans un linge ou dans un livre que\\nl'on parfume & que l'on exorcise. Voilà les fadaises\\nqu'on lit dans le livre intitulé Encheiridion Leonis papae, ouvrage misérable, qui n'a servi qu'à gâter davantage \\nles esprits crédules & portés à la superstitition.\\n(D. J.)\\n\"],\n",
+       "      dtype=object)"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sentences_train"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Gs4Agx_5h43M"
+   },
+   "source": [
+    "# Model\n",
+    "## Tokenisation & Input Formatting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "YZ5PhEYZiCEA"
+   },
+   "outputs": [],
+   "source": [
+    "if model_chosen == \"bert\":\n",
+    "  tokeniser_bert = 'bert-base-multilingual-cased'\n",
+    "  model_bert =  \"bert-base-multilingual-cased\"\n",
+    "elif model_chosen == \"camembert\":\n",
+    "  tokeniser_bert = 'camembert-base'\n",
+    "  model_bert = 'camembert-base'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 131,
+     "referenced_widgets": [
+      "06c6e7721b68449a9f3619ffdf18dfeb",
+      "5ec6a851b16c4339b51acb6129935f13",
+      "fd39a852133144e2b4aed474b204451f",
+      "0143df420df444e9aac5c8b39c342021",
+      "c61b6474b55948cb91a598e6b9aa10d2",
+      "a0d9ceaa8d3a4876ae65d877687bcf50",
+      "aa6ea92757df47eda1e41603cb109e79",
+      "41558bfcc0464711916c2d96337bef66",
+      "fdf05cea504c42f793f9c06e58ef995b",
+      "044fc1f96f8347ddb4a79d31edf32174",
+      "cf0d3320e06546789b5d5a2021dbc3ad",
+      "fba1d1d5c83b40659295a3457d74cb4e",
+      "f7224a1b831d459594852eece9f05543",
+      "185ae5ef7be646b797467086ad7d3a82",
+      "3ceaa994a3814d3c85e2051e37397342",
+      "e674e279b13b41fda3df3a6c89f5fcb1",
+      "3203783f58e54b0e856ab84503bf0d3c",
+      "0214f74b229a4232a9edf3cab751b90d",
+      "152afcb9245c416fae0fde257fa25e2e",
+      "fb3a174c597b47c7a527517004ba5f54",
+      "75073a0f673345728871dfb0346e7c1b",
+      "db8c94b4ed724f859d1ae8c153b01110",
+      "6a29c1c28ceb415f91ec55512da981c5",
+      "5879fadf430646f6af41b1a9b14864ff",
+      "340241453dab4db88043d372aaa88c2e",
+      "27e18e1fa3884c0fb0339764e0397990",
+      "2af1124092684f8bafab311cbe9bf22c",
+      "95a3332ba4634d1c930a7021eacce230",
+      "d53488432f8544de863210d9e8ee4e48",
+      "4422e64029184ba4ba30eecfdf2b4306",
+      "1d97e83c703f4071b9176ba7bf57cddf",
+      "17bf94188b844f649642d9c6e6a20373",
+      "d3aaecd7a6e34cc8918a689ac6299746"
+     ]
+    },
+    "id": "C4bigx_3ibuN",
+    "outputId": "b8cef3f8-7a6c-47d1-9d37-7b3b6d08f00b"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading CamemBERT tokenizer...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "06c6e7721b68449a9f3619ffdf18dfeb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/811k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fba1d1d5c83b40659295a3457d74cb4e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/1.40M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6a29c1c28ceb415f91ec55512da981c5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/508 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Load the BERT tokenizer.\n",
+    "if model_chosen == \"bert\":\n",
+    "  print('Loading BERT tokenizer...')\n",
+    "  tokenizer = BertTokenizer.from_pretrained(tokeniser_bert)\n",
+    "elif model_chosen == \"camembert\":\n",
+    "  print('Loading CamemBERT tokenizer...')\n",
+    "  tokenizer = CamembertTokenizer.from_pretrained(tokeniser_bert)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "5hNod5X9jDZN",
+    "outputId": "93b6e633-afb7-4bcc-be00-44388f801d64"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (1263 > 512). Running this sequence through the model will result in indexing errors\n"
+     ]
+    }
+   ],
+   "source": [
+    " # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+    "input_ids_train = []\n",
+    "\n",
+    "# For every sentence...\n",
+    "for sent in sentences_train:\n",
+    "    # `encode` will:\n",
+    "    #   (1) Tokenize the sentence.\n",
+    "    #   (2) Prepend the `[CLS]` token to the start.\n",
+    "    #   (3) Append the `[SEP]` token to the end.\n",
+    "    #   (4) Map tokens to their IDs.\n",
+    "    encoded_sent_train = tokenizer.encode(\n",
+    "                        str(sent),                      # Sentence to encode.\n",
+    "                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+    "\n",
+    "                        # This function also supports truncation and conversion\n",
+    "                        # to pytorch tensors, but I need to do padding, so I\n",
+    "                        # can't use these features.\n",
+    "                        #max_length = 128,          # Truncate all sentences.\n",
+    "                        #return_tensors = 'pt',     # Return pytorch tensors.\n",
+    "                   )\n",
+    "    \n",
+    "    # Add the encoded sentence to the list.\n",
+    "    input_ids_train.append(encoded_sent_train)\n",
+    "\n",
+    "input_ids_validation = []\n",
+    "for sent in sentences_validation:\n",
+    "    # `encode` will:\n",
+    "    #   (1) Tokenize the sentence.\n",
+    "    #   (2) Prepend the `[CLS]` token to the start.\n",
+    "    #   (3) Append the `[SEP]` token to the end.\n",
+    "    #   (4) Map tokens to their IDs.\n",
+    "    encoded_sent_validation = tokenizer.encode(\n",
+    "                        str(sent),                      # Sentence to encode.\n",
+    "                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+    "\n",
+    "                        # This function also supports truncation and conversion\n",
+    "                        # to pytorch tensors, but I need to do padding, so I\n",
+    "                        # can't use these features.\n",
+    "                        #max_length = 128,          # Truncate all sentences.\n",
+    "                        #return_tensors = 'pt',     # Return pytorch tensors.\n",
+    "                   )\n",
+    "    \n",
+    "    # Add the encoded sentence to the list.\n",
+    "    input_ids_validation.append(encoded_sent_validation)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "W9EWv5JvjGH3",
+    "outputId": "32cd417d-9a40-4086-d900-b81982407667"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Max sentence length train:  2253\n",
+      "Max sentence length validation:  3067\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('Max sentence length train: ', max([len(sen) for sen in input_ids_train]))\n",
+    "print('Max sentence length validation: ', max([len(sen) for sen in input_ids_validation])) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "xh1TQJyvjOx5"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "padded_train = []\n",
+    "for i in input_ids_train:\n",
+    "\n",
+    "  if len(i) > max_len:\n",
+    "    padded_train.extend([i[:max_len]])\n",
+    "  else:\n",
+    "    padded_train.extend([i + [0] * (max_len - len(i))])\n",
+    "\n",
+    "\n",
+    "padded_train = input_ids_train = np.array(padded_train)\n",
+    "\n",
+    "\n",
+    "padded_validation = []\n",
+    "for i in input_ids_validation:\n",
+    "\n",
+    "  if len(i) > max_len:\n",
+    "    padded_validation.extend([i[:max_len]])\n",
+    "  else:\n",
+    "    padded_validation.extend([i + [0] * (max_len - len(i))])\n",
+    "\n",
+    "\n",
+    "padded_validation = input_ids_train = np.array(padded_validation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ZiwY6gn0jUkD"
+   },
+   "outputs": [],
+   "source": [
+    " # Create attention masks\n",
+    "attention_masks_train = []\n",
+    "\n",
+    "# For each sentence...\n",
+    "for sent in padded_train:\n",
+    "    \n",
+    "    # Create the attention mask.\n",
+    "    #   - If a token ID is 0, then it's padding, set the mask to 0.\n",
+    "    #   - If a token ID is > 0, then it's a real token, set the mask to 1.\n",
+    "    att_mask = [int(token_id > 0) for token_id in sent]\n",
+    "    \n",
+    "    # Store the attention mask for this sentence.\n",
+    "    attention_masks_train.append(att_mask)\n",
+    "\n",
+    "\n",
+    "attention_masks_validation = []\n",
+    "\n",
+    "# For each sentence...\n",
+    "for sent in padded_validation:\n",
+    "    \n",
+    "    # Create the attention mask.\n",
+    "    #   - If a token ID is 0, then it's padding, set the mask to 0.\n",
+    "    #   - If a token ID is > 0, then it's a real token, set the mask to 1.\n",
+    "    att_mask = [int(token_id > 0) for token_id in sent]\n",
+    "    \n",
+    "    # Store the attention mask for this sentence.\n",
+    "    attention_masks_validation.append(att_mask)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "oBTR5AfAjXJe"
+   },
+   "outputs": [],
+   "source": [
+    "# Use 70% for training and 30% for validation.\n",
+    "#train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(padded, labels, \n",
+    "#                                                            random_state=2018, test_size=0.3, stratify = labels)\n",
+    "# Do the same for the masks.\n",
+    "#train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,\n",
+    "#                                             random_state=2018, test_size=0.3, stratify = labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "b9Mw5kq3jhTb"
+   },
+   "outputs": [],
+   "source": [
+    "# Convert all inputs and labels into torch tensors, the required datatype \n",
+    "# for my model.\n",
+    "train_inputs = torch.tensor(padded_train)\n",
+    "validation_inputs = torch.tensor(padded_validation)\n",
+    "\n",
+    "train_labels = torch.tensor(labels_train)\n",
+    "validation_labels = torch.tensor(labels_validation)\n",
+    "\n",
+    "train_masks = torch.tensor(attention_masks_train)\n",
+    "validation_masks = torch.tensor(attention_masks_validation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "UfFWzbENjnkw"
+   },
+   "outputs": [],
+   "source": [
+    "# The DataLoader needs to know the batch size for training, so I specify it here.\n",
+    "# For fine-tuning BERT on a specific task, the authors recommend a batch size of\n",
+    "# 16 or 32.\n",
+    "\n",
+    "# Create the DataLoader for training set.\n",
+    "train_data = TensorDataset(train_inputs, train_masks, train_labels)\n",
+    "train_sampler = RandomSampler(train_data)\n",
+    "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n",
+    "\n",
+    "# Create the DataLoader for validation set.\n",
+    "validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)\n",
+    "validation_sampler = SequentialSampler(validation_data)\n",
+    "validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "x45JNGqhkUn2"
+   },
+   "source": [
+    "## Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000,
+     "referenced_widgets": [
+      "4873cc6c9e1d493c9a67d6536e4367a6",
+      "12aa3280d3284c07ac12e2fe842b40b0",
+      "1bcdb04d16dd4f9e9d86938e1d2def02",
+      "b5f86071b23c40bf9c96f74c613c2729",
+      "27a20a17123744948e0c1dbf49b51b27",
+      "f470af786c1c4d049de4f0a7f373379f",
+      "00bd66a81aad4cd7a10df4a67b52b14e",
+      "a5efb634a95c42a7abfaaf61e1c2c928",
+      "600e627de1f0403595f701381dc3b164",
+      "f3b7527bd4d04c81936d8392decee3ac",
+      "885f91c34b9c422889df8b556aad8ec0"
+     ]
+    },
+    "id": "C7M2Er1ajsTf",
+    "outputId": "2c3f467d-ab09-4f8f-d464-a4e738333587"
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4873cc6c9e1d493c9a67d6536e4367a6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/445M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at camembert-base were not used when initializing CamembertForSequenceClassification: ['lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']\n",
+      "- This IS expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at camembert-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CamembertForSequenceClassification(\n",
+       "  (roberta): RobertaModel(\n",
+       "    (embeddings): RobertaEmbeddings(\n",
+       "      (word_embeddings): Embedding(32005, 768, padding_idx=1)\n",
+       "      (position_embeddings): Embedding(514, 768, padding_idx=1)\n",
+       "      (token_type_embeddings): Embedding(1, 768)\n",
+       "      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "      (dropout): Dropout(p=0.1, inplace=False)\n",
+       "    )\n",
+       "    (encoder): RobertaEncoder(\n",
+       "      (layer): ModuleList(\n",
+       "        (0): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (1): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (2): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (3): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (4): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (5): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (6): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (7): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (8): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (9): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (10): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (11): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "  )\n",
+       "  (classifier): RobertaClassificationHead(\n",
+       "    (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "    (dropout): Dropout(p=0.1, inplace=False)\n",
+       "    (out_proj): Linear(in_features=768, out_features=38, bias=True)\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load BertForSequenceClassification, the pretrained BERT model with a single \n",
+    "# linear classification layer on top.\n",
+    "\n",
+    "#model = CamembertForSequenceClassification.from_pretrained(\n",
+    "if model_chosen == \"bert\":\n",
+    "  model = BertForSequenceClassification.from_pretrained(\n",
+    "      model_bert, # Use the 12-layer BERT model, with an uncased vocab.\n",
+    "      num_labels = numberOfClasses, # The number of output labels--2 for binary classification.\n",
+    "                      # You can increase this for multi-class tasks.   \n",
+    "      output_attentions = False, # Whether the model returns attentions weights.\n",
+    "      output_hidden_states = False, # Whether the model returns all hidden-states.\n",
+    "  )\n",
+    "elif model_chosen == \"camembert\":\n",
+    "  model = CamembertForSequenceClassification.from_pretrained(\n",
+    "      model_bert, # Use the 12-layer BERT model, with an uncased vocab.\n",
+    "      num_labels = numberOfClasses, # The number of output labels--2 for binary classification.\n",
+    "                      # You can increase this for multi-class tasks.   \n",
+    "      output_attentions = False, # Whether the model returns attentions weights.\n",
+    "      output_hidden_states = False, # Whether the model returns all hidden-states.\n",
+    "  )\n",
+    "\n",
+    "# Tell pytorch to run this model on the GPU.\n",
+    "model.cuda()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "xd_cG-8pj4Iw"
+   },
+   "outputs": [],
+   "source": [
+    "#Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n",
+    "# I believe the 'W' stands for 'Weight Decay fix\"\n",
+    "optimizer = AdamW(model.parameters(),\n",
+    "                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
+    "                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n",
+    "                )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "65G-uHuLj4_6"
+   },
+   "outputs": [],
+   "source": [
+    "# Number of training epochs (authors recommend between 2 and 4)\n",
+    "epochs = 4\n",
+    "\n",
+    "# Total number of training steps is number of batches * number of epochs.\n",
+    "total_steps = len(train_dataloader) * epochs\n",
+    "\n",
+    "# Create the learning rate scheduler.\n",
+    "scheduler = get_linear_schedule_with_warmup(optimizer, \n",
+    "                                            num_warmup_steps = 0, # Default value in run_glue.py\n",
+    "                                            num_training_steps = total_steps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "background_save": true,
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "SbHBbYpwkKaA",
+    "outputId": "49f7f5f4-716d-44c2-e299-505086a89061"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "======== Epoch 1 / 4 ========\n",
+      "Training...\n",
+      "  Batch    40  of  2,642.    Elapsed: 0:00:18.\n",
+      "  Batch    80  of  2,642.    Elapsed: 0:00:36.\n",
+      "  Batch   120  of  2,642.    Elapsed: 0:00:55.\n",
+      "  Batch   160  of  2,642.    Elapsed: 0:01:13.\n",
+      "  Batch   200  of  2,642.    Elapsed: 0:01:31.\n",
+      "  Batch   240  of  2,642.    Elapsed: 0:01:49.\n",
+      "  Batch   280  of  2,642.    Elapsed: 0:02:08.\n",
+      "  Batch   320  of  2,642.    Elapsed: 0:02:26.\n",
+      "  Batch   360  of  2,642.    Elapsed: 0:02:44.\n",
+      "  Batch   400  of  2,642.    Elapsed: 0:03:02.\n",
+      "  Batch   440  of  2,642.    Elapsed: 0:03:20.\n",
+      "  Batch   480  of  2,642.    Elapsed: 0:03:39.\n",
+      "  Batch   520  of  2,642.    Elapsed: 0:03:57.\n",
+      "  Batch   560  of  2,642.    Elapsed: 0:04:15.\n",
+      "  Batch   600  of  2,642.    Elapsed: 0:04:33.\n",
+      "  Batch   640  of  2,642.    Elapsed: 0:04:51.\n",
+      "  Batch   680  of  2,642.    Elapsed: 0:05:10.\n",
+      "  Batch   720  of  2,642.    Elapsed: 0:05:28.\n",
+      "  Batch   760  of  2,642.    Elapsed: 0:05:46.\n",
+      "  Batch   800  of  2,642.    Elapsed: 0:06:04.\n",
+      "  Batch   840  of  2,642.    Elapsed: 0:06:22.\n",
+      "  Batch   880  of  2,642.    Elapsed: 0:06:41.\n",
+      "  Batch   920  of  2,642.    Elapsed: 0:06:59.\n",
+      "  Batch   960  of  2,642.    Elapsed: 0:07:17.\n",
+      "  Batch 1,000  of  2,642.    Elapsed: 0:07:35.\n",
+      "  Batch 1,040  of  2,642.    Elapsed: 0:07:54.\n",
+      "  Batch 1,080  of  2,642.    Elapsed: 0:08:12.\n",
+      "  Batch 1,120  of  2,642.    Elapsed: 0:08:30.\n",
+      "  Batch 1,160  of  2,642.    Elapsed: 0:08:48.\n",
+      "  Batch 1,200  of  2,642.    Elapsed: 0:09:06.\n",
+      "  Batch 1,240  of  2,642.    Elapsed: 0:09:25.\n",
+      "  Batch 1,280  of  2,642.    Elapsed: 0:09:43.\n",
+      "  Batch 1,320  of  2,642.    Elapsed: 0:10:01.\n",
+      "  Batch 1,360  of  2,642.    Elapsed: 0:10:19.\n",
+      "  Batch 1,400  of  2,642.    Elapsed: 0:10:37.\n",
+      "  Batch 1,440  of  2,642.    Elapsed: 0:10:56.\n",
+      "  Batch 1,480  of  2,642.    Elapsed: 0:11:14.\n",
+      "  Batch 1,520  of  2,642.    Elapsed: 0:11:32.\n",
+      "  Batch 1,560  of  2,642.    Elapsed: 0:11:50.\n",
+      "  Batch 1,600  of  2,642.    Elapsed: 0:12:08.\n",
+      "  Batch 1,640  of  2,642.    Elapsed: 0:12:27.\n",
+      "  Batch 1,680  of  2,642.    Elapsed: 0:12:45.\n",
+      "  Batch 1,720  of  2,642.    Elapsed: 0:13:03.\n",
+      "  Batch 1,760  of  2,642.    Elapsed: 0:13:21.\n",
+      "  Batch 1,800  of  2,642.    Elapsed: 0:13:39.\n",
+      "  Batch 1,840  of  2,642.    Elapsed: 0:13:58.\n",
+      "  Batch 1,880  of  2,642.    Elapsed: 0:14:16.\n",
+      "  Batch 1,920  of  2,642.    Elapsed: 0:14:34.\n",
+      "  Batch 1,960  of  2,642.    Elapsed: 0:14:52.\n",
+      "  Batch 2,000  of  2,642.    Elapsed: 0:15:11.\n",
+      "  Batch 2,040  of  2,642.    Elapsed: 0:15:29.\n",
+      "  Batch 2,080  of  2,642.    Elapsed: 0:15:47.\n",
+      "  Batch 2,120  of  2,642.    Elapsed: 0:16:05.\n",
+      "  Batch 2,160  of  2,642.    Elapsed: 0:16:23.\n",
+      "  Batch 2,200  of  2,642.    Elapsed: 0:16:42.\n",
+      "  Batch 2,240  of  2,642.    Elapsed: 0:17:00.\n",
+      "  Batch 2,280  of  2,642.    Elapsed: 0:17:18.\n",
+      "  Batch 2,320  of  2,642.    Elapsed: 0:17:36.\n",
+      "  Batch 2,360  of  2,642.    Elapsed: 0:17:54.\n",
+      "  Batch 2,400  of  2,642.    Elapsed: 0:18:13.\n",
+      "  Batch 2,440  of  2,642.    Elapsed: 0:18:31.\n",
+      "  Batch 2,480  of  2,642.    Elapsed: 0:18:49.\n",
+      "  Batch 2,520  of  2,642.    Elapsed: 0:19:07.\n",
+      "  Batch 2,560  of  2,642.    Elapsed: 0:19:26.\n",
+      "  Batch 2,600  of  2,642.    Elapsed: 0:19:44.\n",
+      "  Batch 2,640  of  2,642.    Elapsed: 0:20:02.\n",
+      "\n",
+      "  Average training loss: 2.04\n",
+      "  Training epoch took: 0:20:03\n",
+      "\n",
+      "Running Validation...\n",
+      "  Accuracy: 0.75\n",
+      "  Validation took: 0:03:09\n",
+      "\n",
+      "======== Epoch 2 / 4 ========\n",
+      "Training...\n",
+      "  Batch    40  of  2,642.    Elapsed: 0:00:18.\n",
+      "  Batch    80  of  2,642.    Elapsed: 0:00:36.\n",
+      "  Batch   120  of  2,642.    Elapsed: 0:00:55.\n",
+      "  Batch   160  of  2,642.    Elapsed: 0:01:13.\n",
+      "  Batch   200  of  2,642.    Elapsed: 0:01:31.\n",
+      "  Batch   240  of  2,642.    Elapsed: 0:01:49.\n",
+      "  Batch   280  of  2,642.    Elapsed: 0:02:07.\n",
+      "  Batch   320  of  2,642.    Elapsed: 0:02:26.\n",
+      "  Batch   360  of  2,642.    Elapsed: 0:02:44.\n",
+      "  Batch   400  of  2,642.    Elapsed: 0:03:02.\n",
+      "  Batch   440  of  2,642.    Elapsed: 0:03:20.\n",
+      "  Batch   480  of  2,642.    Elapsed: 0:03:38.\n",
+      "  Batch   520  of  2,642.    Elapsed: 0:03:57.\n",
+      "  Batch   560  of  2,642.    Elapsed: 0:04:15.\n",
+      "  Batch   600  of  2,642.    Elapsed: 0:04:33.\n",
+      "  Batch   640  of  2,642.    Elapsed: 0:04:51.\n",
+      "  Batch   680  of  2,642.    Elapsed: 0:05:10.\n",
+      "  Batch   720  of  2,642.    Elapsed: 0:05:28.\n",
+      "  Batch   760  of  2,642.    Elapsed: 0:05:46.\n",
+      "  Batch   800  of  2,642.    Elapsed: 0:06:04.\n",
+      "  Batch   840  of  2,642.    Elapsed: 0:06:22.\n",
+      "  Batch   880  of  2,642.    Elapsed: 0:06:41.\n",
+      "  Batch   920  of  2,642.    Elapsed: 0:06:59.\n",
+      "  Batch   960  of  2,642.    Elapsed: 0:07:17.\n",
+      "  Batch 1,000  of  2,642.    Elapsed: 0:07:35.\n",
+      "  Batch 1,040  of  2,642.    Elapsed: 0:07:53.\n",
+      "  Batch 1,080  of  2,642.    Elapsed: 0:08:12.\n",
+      "  Batch 1,120  of  2,642.    Elapsed: 0:08:30.\n",
+      "  Batch 1,160  of  2,642.    Elapsed: 0:08:48.\n",
+      "  Batch 1,200  of  2,642.    Elapsed: 0:09:06.\n",
+      "  Batch 1,240  of  2,642.    Elapsed: 0:09:24.\n",
+      "  Batch 1,280  of  2,642.    Elapsed: 0:09:43.\n",
+      "  Batch 1,320  of  2,642.    Elapsed: 0:10:01.\n",
+      "  Batch 1,360  of  2,642.    Elapsed: 0:10:19.\n",
+      "  Batch 1,400  of  2,642.    Elapsed: 0:10:37.\n",
+      "  Batch 1,440  of  2,642.    Elapsed: 0:10:55.\n",
+      "  Batch 1,480  of  2,642.    Elapsed: 0:11:14.\n",
+      "  Batch 1,520  of  2,642.    Elapsed: 0:11:32.\n",
+      "  Batch 1,560  of  2,642.    Elapsed: 0:11:50.\n",
+      "  Batch 1,600  of  2,642.    Elapsed: 0:12:08.\n",
+      "  Batch 1,640  of  2,642.    Elapsed: 0:12:27.\n",
+      "  Batch 1,680  of  2,642.    Elapsed: 0:12:45.\n",
+      "  Batch 1,720  of  2,642.    Elapsed: 0:13:03.\n",
+      "  Batch 1,760  of  2,642.    Elapsed: 0:13:21.\n",
+      "  Batch 1,800  of  2,642.    Elapsed: 0:13:39.\n",
+      "  Batch 1,840  of  2,642.    Elapsed: 0:13:58.\n",
+      "  Batch 1,880  of  2,642.    Elapsed: 0:14:16.\n",
+      "  Batch 1,920  of  2,642.    Elapsed: 0:14:34.\n",
+      "  Batch 1,960  of  2,642.    Elapsed: 0:14:52.\n",
+      "  Batch 2,000  of  2,642.    Elapsed: 0:15:10.\n",
+      "  Batch 2,040  of  2,642.    Elapsed: 0:15:29.\n",
+      "  Batch 2,080  of  2,642.    Elapsed: 0:15:47.\n",
+      "  Batch 2,120  of  2,642.    Elapsed: 0:16:05.\n",
+      "  Batch 2,160  of  2,642.    Elapsed: 0:16:23.\n",
+      "  Batch 2,200  of  2,642.    Elapsed: 0:16:41.\n",
+      "  Batch 2,240  of  2,642.    Elapsed: 0:17:00.\n",
+      "  Batch 2,280  of  2,642.    Elapsed: 0:17:18.\n",
+      "  Batch 2,320  of  2,642.    Elapsed: 0:17:36.\n",
+      "  Batch 2,360  of  2,642.    Elapsed: 0:17:54.\n",
+      "  Batch 2,400  of  2,642.    Elapsed: 0:18:12.\n",
+      "  Batch 2,440  of  2,642.    Elapsed: 0:18:31.\n",
+      "  Batch 2,480  of  2,642.    Elapsed: 0:18:49.\n",
+      "  Batch 2,520  of  2,642.    Elapsed: 0:19:07.\n",
+      "  Batch 2,560  of  2,642.    Elapsed: 0:19:25.\n",
+      "  Batch 2,600  of  2,642.    Elapsed: 0:19:44.\n",
+      "  Batch 2,640  of  2,642.    Elapsed: 0:20:02.\n",
+      "\n",
+      "  Average training loss: 1.03\n",
+      "  Training epoch took: 0:20:02\n",
+      "\n",
+      "Running Validation...\n",
+      "  Accuracy: 0.79\n",
+      "  Validation took: 0:03:09\n",
+      "\n",
+      "======== Epoch 3 / 4 ========\n",
+      "Training...\n",
+      "  Batch    40  of  2,642.    Elapsed: 0:00:18.\n",
+      "  Batch    80  of  2,642.    Elapsed: 0:00:36.\n",
+      "  Batch   120  of  2,642.    Elapsed: 0:00:55.\n",
+      "  Batch   160  of  2,642.    Elapsed: 0:01:13.\n",
+      "  Batch   200  of  2,642.    Elapsed: 0:01:31.\n",
+      "  Batch   240  of  2,642.    Elapsed: 0:01:49.\n",
+      "  Batch   280  of  2,642.    Elapsed: 0:02:07.\n",
+      "  Batch   320  of  2,642.    Elapsed: 0:02:26.\n",
+      "  Batch   360  of  2,642.    Elapsed: 0:02:44.\n",
+      "  Batch   400  of  2,642.    Elapsed: 0:03:02.\n",
+      "  Batch   440  of  2,642.    Elapsed: 0:03:20.\n",
+      "  Batch   480  of  2,642.    Elapsed: 0:03:38.\n",
+      "  Batch   520  of  2,642.    Elapsed: 0:03:57.\n",
+      "  Batch   560  of  2,642.    Elapsed: 0:04:15.\n",
+      "  Batch   600  of  2,642.    Elapsed: 0:04:33.\n",
+      "  Batch   640  of  2,642.    Elapsed: 0:04:51.\n",
+      "  Batch   680  of  2,642.    Elapsed: 0:05:09.\n",
+      "  Batch   720  of  2,642.    Elapsed: 0:05:28.\n",
+      "  Batch   760  of  2,642.    Elapsed: 0:05:46.\n",
+      "  Batch   800  of  2,642.    Elapsed: 0:06:04.\n",
+      "  Batch   840  of  2,642.    Elapsed: 0:06:22.\n",
+      "  Batch   880  of  2,642.    Elapsed: 0:06:41.\n",
+      "  Batch   920  of  2,642.    Elapsed: 0:06:59.\n",
+      "  Batch   960  of  2,642.    Elapsed: 0:07:17.\n",
+      "  Batch 1,000  of  2,642.    Elapsed: 0:07:35.\n",
+      "  Batch 1,040  of  2,642.    Elapsed: 0:07:53.\n",
+      "  Batch 1,080  of  2,642.    Elapsed: 0:08:12.\n",
+      "  Batch 1,120  of  2,642.    Elapsed: 0:08:30.\n",
+      "  Batch 1,160  of  2,642.    Elapsed: 0:08:48.\n",
+      "  Batch 1,200  of  2,642.    Elapsed: 0:09:06.\n",
+      "  Batch 1,240  of  2,642.    Elapsed: 0:09:24.\n",
+      "  Batch 1,280  of  2,642.    Elapsed: 0:09:43.\n",
+      "  Batch 1,320  of  2,642.    Elapsed: 0:10:01.\n",
+      "  Batch 1,360  of  2,642.    Elapsed: 0:10:19.\n",
+      "  Batch 1,400  of  2,642.    Elapsed: 0:10:37.\n",
+      "  Batch 1,440  of  2,642.    Elapsed: 0:10:55.\n",
+      "  Batch 1,480  of  2,642.    Elapsed: 0:11:14.\n",
+      "  Batch 1,520  of  2,642.    Elapsed: 0:11:32.\n",
+      "  Batch 1,560  of  2,642.    Elapsed: 0:11:50.\n",
+      "  Batch 1,600  of  2,642.    Elapsed: 0:12:08.\n",
+      "  Batch 1,640  of  2,642.    Elapsed: 0:12:26.\n",
+      "  Batch 1,680  of  2,642.    Elapsed: 0:12:45.\n",
+      "  Batch 1,720  of  2,642.    Elapsed: 0:13:03.\n",
+      "  Batch 1,760  of  2,642.    Elapsed: 0:13:21.\n",
+      "  Batch 1,800  of  2,642.    Elapsed: 0:13:39.\n",
+      "  Batch 1,840  of  2,642.    Elapsed: 0:13:57.\n",
+      "  Batch 1,880  of  2,642.    Elapsed: 0:14:16.\n",
+      "  Batch 1,920  of  2,642.    Elapsed: 0:14:34.\n",
+      "  Batch 1,960  of  2,642.    Elapsed: 0:14:52.\n",
+      "  Batch 2,000  of  2,642.    Elapsed: 0:15:10.\n",
+      "  Batch 2,040  of  2,642.    Elapsed: 0:15:28.\n",
+      "  Batch 2,080  of  2,642.    Elapsed: 0:15:47.\n",
+      "  Batch 2,120  of  2,642.    Elapsed: 0:16:05.\n",
+      "  Batch 2,160  of  2,642.    Elapsed: 0:16:23.\n",
+      "  Batch 2,200  of  2,642.    Elapsed: 0:16:41.\n",
+      "  Batch 2,240  of  2,642.    Elapsed: 0:17:00.\n",
+      "  Batch 2,280  of  2,642.    Elapsed: 0:17:18.\n",
+      "  Batch 2,320  of  2,642.    Elapsed: 0:17:36.\n",
+      "  Batch 2,360  of  2,642.    Elapsed: 0:17:54.\n",
+      "  Batch 2,400  of  2,642.    Elapsed: 0:18:12.\n",
+      "  Batch 2,440  of  2,642.    Elapsed: 0:18:31.\n",
+      "  Batch 2,480  of  2,642.    Elapsed: 0:18:49.\n",
+      "  Batch 2,520  of  2,642.    Elapsed: 0:19:07.\n",
+      "  Batch 2,560  of  2,642.    Elapsed: 0:19:25.\n",
+      "  Batch 2,600  of  2,642.    Elapsed: 0:19:43.\n",
+      "  Batch 2,640  of  2,642.    Elapsed: 0:20:02.\n",
+      "\n",
+      "  Average training loss: 0.75\n",
+      "  Training epoch took: 0:20:02\n",
+      "\n",
+      "Running Validation...\n",
+      "  Accuracy: 0.79\n",
+      "  Validation took: 0:03:09\n",
+      "\n",
+      "======== Epoch 4 / 4 ========\n",
+      "Training...\n",
+      "  Batch    40  of  2,642.    Elapsed: 0:00:18.\n",
+      "  Batch    80  of  2,642.    Elapsed: 0:00:36.\n",
+      "  Batch   120  of  2,642.    Elapsed: 0:00:55.\n",
+      "  Batch   160  of  2,642.    Elapsed: 0:01:13.\n",
+      "  Batch   200  of  2,642.    Elapsed: 0:01:31.\n",
+      "  Batch   240  of  2,642.    Elapsed: 0:01:49.\n",
+      "  Batch   280  of  2,642.    Elapsed: 0:02:07.\n",
+      "  Batch   320  of  2,642.    Elapsed: 0:02:26.\n",
+      "  Batch   360  of  2,642.    Elapsed: 0:02:44.\n",
+      "  Batch   400  of  2,642.    Elapsed: 0:03:02.\n",
+      "  Batch   440  of  2,642.    Elapsed: 0:03:20.\n",
+      "  Batch   480  of  2,642.    Elapsed: 0:03:39.\n",
+      "  Batch   520  of  2,642.    Elapsed: 0:03:57.\n",
+      "  Batch   560  of  2,642.    Elapsed: 0:04:15.\n",
+      "  Batch   600  of  2,642.    Elapsed: 0:04:33.\n",
+      "  Batch   640  of  2,642.    Elapsed: 0:04:51.\n",
+      "  Batch   680  of  2,642.    Elapsed: 0:05:10.\n",
+      "  Batch   720  of  2,642.    Elapsed: 0:05:28.\n",
+      "  Batch   760  of  2,642.    Elapsed: 0:05:46.\n",
+      "  Batch   800  of  2,642.    Elapsed: 0:06:04.\n",
+      "  Batch   840  of  2,642.    Elapsed: 0:06:22.\n",
+      "  Batch   880  of  2,642.    Elapsed: 0:06:41.\n",
+      "  Batch   920  of  2,642.    Elapsed: 0:06:59.\n",
+      "  Batch   960  of  2,642.    Elapsed: 0:07:17.\n",
+      "  Batch 1,000  of  2,642.    Elapsed: 0:07:35.\n",
+      "  Batch 1,040  of  2,642.    Elapsed: 0:07:53.\n",
+      "  Batch 1,080  of  2,642.    Elapsed: 0:08:12.\n",
+      "  Batch 1,120  of  2,642.    Elapsed: 0:08:30.\n",
+      "  Batch 1,160  of  2,642.    Elapsed: 0:08:48.\n",
+      "  Batch 1,200  of  2,642.    Elapsed: 0:09:06.\n",
+      "  Batch 1,240  of  2,642.    Elapsed: 0:09:24.\n",
+      "  Batch 1,280  of  2,642.    Elapsed: 0:09:43.\n",
+      "  Batch 1,320  of  2,642.    Elapsed: 0:10:01.\n",
+      "  Batch 1,360  of  2,642.    Elapsed: 0:10:19.\n",
+      "  Batch 1,400  of  2,642.    Elapsed: 0:10:37.\n",
+      "  Batch 1,440  of  2,642.    Elapsed: 0:10:55.\n",
+      "  Batch 1,480  of  2,642.    Elapsed: 0:11:14.\n",
+      "  Batch 1,520  of  2,642.    Elapsed: 0:11:32.\n",
+      "  Batch 1,560  of  2,642.    Elapsed: 0:11:50.\n",
+      "  Batch 1,600  of  2,642.    Elapsed: 0:12:08.\n",
+      "  Batch 1,640  of  2,642.    Elapsed: 0:12:26.\n",
+      "  Batch 1,680  of  2,642.    Elapsed: 0:12:45.\n",
+      "  Batch 1,720  of  2,642.    Elapsed: 0:13:03.\n",
+      "  Batch 1,760  of  2,642.    Elapsed: 0:13:21.\n",
+      "  Batch 1,800  of  2,642.    Elapsed: 0:13:39.\n",
+      "  Batch 1,840  of  2,642.    Elapsed: 0:13:57.\n",
+      "  Batch 1,880  of  2,642.    Elapsed: 0:14:16.\n",
+      "  Batch 1,920  of  2,642.    Elapsed: 0:14:34.\n",
+      "  Batch 1,960  of  2,642.    Elapsed: 0:14:52.\n",
+      "  Batch 2,000  of  2,642.    Elapsed: 0:15:10.\n",
+      "  Batch 2,040  of  2,642.    Elapsed: 0:15:28.\n",
+      "  Batch 2,080  of  2,642.    Elapsed: 0:15:46.\n",
+      "  Batch 2,120  of  2,642.    Elapsed: 0:16:05.\n",
+      "  Batch 2,160  of  2,642.    Elapsed: 0:16:23.\n",
+      "  Batch 2,200  of  2,642.    Elapsed: 0:16:41.\n",
+      "  Batch 2,240  of  2,642.    Elapsed: 0:16:59.\n",
+      "  Batch 2,280  of  2,642.    Elapsed: 0:17:17.\n",
+      "  Batch 2,320  of  2,642.    Elapsed: 0:17:36.\n",
+      "  Batch 2,360  of  2,642.    Elapsed: 0:17:54.\n",
+      "  Batch 2,400  of  2,642.    Elapsed: 0:18:12.\n",
+      "  Batch 2,440  of  2,642.    Elapsed: 0:18:30.\n",
+      "  Batch 2,480  of  2,642.    Elapsed: 0:18:48.\n",
+      "  Batch 2,520  of  2,642.    Elapsed: 0:19:07.\n",
+      "  Batch 2,560  of  2,642.    Elapsed: 0:19:25.\n",
+      "  Batch 2,600  of  2,642.    Elapsed: 0:19:43.\n",
+      "  Batch 2,640  of  2,642.    Elapsed: 0:20:01.\n",
+      "\n",
+      "  Average training loss: 0.60\n",
+      "  Training epoch took: 0:20:02\n",
+      "\n",
+      "Running Validation...\n",
+      "  Accuracy: 0.80\n",
+      "  Validation took: 0:03:09\n",
+      "\n",
+      "Training complete!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This training code is based on the `run_glue.py` script here:\n",
+    "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n",
+    "\n",
+    "# Set the seed value all over the place to make this reproducible.\n",
+    "seed_val = 42\n",
+    "\n",
+    "random.seed(seed_val)\n",
+    "np.random.seed(seed_val)\n",
+    "torch.manual_seed(seed_val)\n",
+    "torch.cuda.manual_seed_all(seed_val)\n",
+    "\n",
+    "# Store the average loss after each epoch so I can plot them.\n",
+    "loss_values = []\n",
+    "\n",
+    "# For each epoch...\n",
+    "for epoch_i in range(0, epochs):\n",
+    "    \n",
+    "    # ========================================\n",
+    "    #               Training\n",
+    "    # ========================================\n",
+    "    \n",
+    "    # Perform one full pass over the training set.\n",
+    "\n",
+    "    print(\"\")\n",
+    "    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
+    "    print('Training...')\n",
+    "\n",
+    "    # Measure how long the training epoch takes.\n",
+    "    t0 = time.time()\n",
+    "\n",
+    "    # Reset the total loss for this epoch.\n",
+    "    total_loss = 0\n",
+    "\n",
+    "    # Put the model into training mode.\n",
+    "    model.train()\n",
+    "\n",
+    "    # For each batch of training data...\n",
+    "    for step, batch in enumerate(train_dataloader):\n",
+    "\n",
+    "        # Progress update every 40 batches.\n",
+    "        if step % 40 == 0 and not step == 0:\n",
+    "            # Calculate elapsed time in minutes.\n",
+    "            elapsed = format_time(time.time() - t0)\n",
+    "            \n",
+    "            # Report progress.\n",
+    "            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n",
+    "\n",
+    "        # Unpack this training batch from the dataloader. \n",
+    "        #\n",
+    "        # As I unpack the batch, I'll also copy each tensor to the GPU using the \n",
+    "        # `to` method.\n",
+    "        #\n",
+    "        # `batch` contains three pytorch tensors:\n",
+    "        #   [0]: input ids \n",
+    "        #   [1]: attention masks\n",
+    "        #   [2]: labels \n",
+    "        b_input_ids = batch[0].to(device)\n",
+    "        b_input_mask = batch[1].to(device)\n",
+    "        b_labels = batch[2].to(device)\n",
+    "\n",
+    "        # Always clear any previously calculated gradients before performing a\n",
+    "        # backward pass. PyTorch doesn't do this automatically because \n",
+    "        # accumulating the gradients is \"convenient while training RNNs\". \n",
+    "        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n",
+    "        model.zero_grad()        \n",
+    "\n",
+    "        # Perform a forward pass (evaluate the model on this training batch).\n",
+    "        # This will return the loss (rather than the model output) because I\n",
+    "        # have provided the `labels`.\n",
+    "        # The documentation for this `model` function is here: \n",
+    "        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
+    "        outputs = model(b_input_ids, \n",
+    "                    token_type_ids=None, \n",
+    "                    attention_mask=b_input_mask, \n",
+    "                    labels=b_labels)\n",
+    "        \n",
+    "        # The call to `model` always returns a tuple, so I need to pull the \n",
+    "        # loss value out of the tuple.\n",
+    "        loss = outputs[0]\n",
+    "\n",
+    "        # Accumulate the training loss over all of the batches so that I can\n",
+    "        # calculate the average loss at the end. `loss` is a Tensor containing a\n",
+    "        # single value; the `.item()` function just returns the Python value \n",
+    "        # from the tensor.\n",
+    "        total_loss += loss.item()\n",
+    "\n",
+    "        # Perform a backward pass to calculate the gradients.\n",
+    "        loss.backward()\n",
+    "\n",
+    "        # Clip the norm of the gradients to 1.0.\n",
+    "        # This is to help prevent the \"exploding gradients\" problem.\n",
+    "        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
+    "\n",
+    "        # Update parameters and take a step using the computed gradient.\n",
+    "        # The optimizer dictates the \"update rule\"--how the parameters are\n",
+    "        # modified based on their gradients, the learning rate, etc.\n",
+    "        optimizer.step()\n",
+    "\n",
+    "        # Update the learning rate.\n",
+    "        scheduler.step()\n",
+    "\n",
+    "    # Calculate the average loss over the training data.\n",
+    "    avg_train_loss = total_loss / len(train_dataloader)            \n",
+    "    \n",
+    "    # Store the loss value for plotting the learning curve.\n",
+    "    loss_values.append(avg_train_loss)\n",
+    "\n",
+    "    print(\"\")\n",
+    "    print(\"  Average training loss: {0:.2f}\".format(avg_train_loss))\n",
+    "    print(\"  Training epoch took: {:}\".format(format_time(time.time() - t0)))\n",
+    "        \n",
+    "    # ========================================\n",
+    "    #               Validation\n",
+    "    # ========================================\n",
+    "    # After the completion of each training epoch, measure the performance on\n",
+    "    # the validation set.\n",
+    "\n",
+    "    print(\"\")\n",
+    "    print(\"Running Validation...\")\n",
+    "\n",
+    "    t0 = time.time()\n",
+    "\n",
+    "    # Put the model in evaluation mode--the dropout layers behave differently\n",
+    "    # during evaluation.\n",
+    "    model.eval()\n",
+    "\n",
+    "    # Tracking variables \n",
+    "    eval_loss, eval_accuracy = 0, 0\n",
+    "    nb_eval_steps, nb_eval_examples = 0, 0\n",
+    "\n",
+    "    # Evaluate data for one epoch\n",
+    "    for batch in validation_dataloader:\n",
+    "        \n",
+    "        # Add batch to GPU\n",
+    "        batch = tuple(t.to(device) for t in batch)\n",
+    "        \n",
+    "        # Unpack the inputs from dataloader\n",
+    "        b_input_ids, b_input_mask, b_labels = batch\n",
+    "        \n",
+    "        # Telling the model not to compute or store gradients, saving memory and\n",
+    "        # speeding up validation\n",
+    "        with torch.no_grad():        \n",
+    "\n",
+    "            # Forward pass, calculate logit predictions.\n",
+    "            # This will return the logits rather than the loss because we have\n",
+    "            # not provided labels.\n",
+    "            # token_type_ids is the same as the \"segment ids\", which \n",
+    "            # differentiates sentence 1 and 2 in 2-sentence tasks.\n",
+    "            # The documentation for this `model` function is here: \n",
+    "            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
+    "            outputs = model(b_input_ids, \n",
+    "                            token_type_ids=None, \n",
+    "                            attention_mask=b_input_mask)\n",
+    "        \n",
+    "        # Get the \"logits\" output by the model. The \"logits\" are the output\n",
+    "        # values prior to applying an activation function like the softmax.\n",
+    "        logits = outputs[0]\n",
+    "\n",
+    "        # Move logits and labels to CPU\n",
+    "        logits = logits.detach().cpu().numpy()\n",
+    "        label_ids = b_labels.to('cpu').numpy()\n",
+    "        \n",
+    "        # Calculate the accuracy for this batch of test sentences.\n",
+    "        tmp_eval_accuracy = flat_accuracy(logits, label_ids)\n",
+    "        \n",
+    "        # Accumulate the total accuracy.\n",
+    "        eval_accuracy += tmp_eval_accuracy\n",
+    "\n",
+    "        # Track the number of batches\n",
+    "        nb_eval_steps += 1\n",
+    "\n",
+    "    # Report the final accuracy for this validation run.\n",
+    "    print(\"  Accuracy: {0:.2f}\".format(eval_accuracy/nb_eval_steps))\n",
+    "    print(\"  Validation took: {:}\".format(format_time(time.time() - t0)))\n",
+    "\n",
+    "print(\"\")\n",
+    "print(\"Training complete!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "uEe7lPtVKpIY"
+   },
+   "source": [
+    "## Saving model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "background_save": true
+    },
+    "id": "AYCSVm_wKnuM"
+   },
+   "outputs": [],
+   "source": [
+    "model_path = \"drive/MyDrive/Classification-EDdA/model_\"+model_bert+\"_s\"+str(maxOfInstancePerClass)+\".pt\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "background_save": true
+    },
+    "id": "qmsxrOqjCsGo"
+   },
+   "outputs": [],
+   "source": [
+    "torch.save(model, model_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "pM9bSsckCndR"
+   },
+   "source": [
+    "## Loading model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "cEycmiS8Cnjw"
+   },
+   "outputs": [],
+   "source": [
+    "#model = torch.load(model_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VJwyfmakkQyj"
+   },
+   "source": [
+    "## Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "K9qdtYexIIvk"
+   },
+   "outputs": [],
+   "source": [
+    "def evaluate_bert(data, labels, model, batch_size):\n",
+    "  # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+    "  input_ids = []\n",
+    "  # For every sentence...\n",
+    "  for sent in data:\n",
+    "      # `encode` will:\n",
+    "      #   (1) Tokenize the sentence.\n",
+    "      #   (2) Prepend the `[CLS]` token to the start.\n",
+    "      #   (3) Append the `[SEP]` token to the end.\n",
+    "      #   (4) Map tokens to their IDs.\n",
+    "      encoded_sent = tokenizer.encode(\n",
+    "                          str(sent),                      # Sentence to encode.\n",
+    "                          add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+    "                  )\n",
+    "      \n",
+    "      input_ids.append(encoded_sent)\n",
+    "\n",
+    "  # Pad our input tokens\n",
+    "  padded = []\n",
+    "  for i in input_ids:\n",
+    "\n",
+    "    if len(i) > max_len:\n",
+    "      padded.extend([i[:max_len]])\n",
+    "    else:\n",
+    "      padded.extend([i + [0] * (max_len - len(i))])\n",
+    "  input_ids = np.array(padded)\n",
+    "\n",
+    "  # Create attention masks\n",
+    "  attention_masks = []\n",
+    "\n",
+    "  # Create a mask of 1s for each token followed by 0s for padding\n",
+    "  for seq in input_ids:\n",
+    "      seq_mask = [float(i>0) for i in seq]\n",
+    "      attention_masks.append(seq_mask) \n",
+    "\n",
+    "  # Convert to tensors.\n",
+    "  prediction_inputs = torch.tensor(input_ids)\n",
+    "  prediction_masks = torch.tensor(attention_masks)\n",
+    "  prediction_labels = torch.tensor(labels)\n",
+    "\n",
+    "  # Create the DataLoader.\n",
+    "  prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)\n",
+    "  prediction_sampler = SequentialSampler(prediction_data)\n",
+    "  prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n",
+    "\n",
+    "  print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs)))\n",
+    "\n",
+    "  # Put model in evaluation mode\n",
+    "  model.eval()\n",
+    "\n",
+    "  # Tracking variables \n",
+    "  predictions , true_labels = [], []\n",
+    "\n",
+    "  # Predict \n",
+    "  for batch in prediction_dataloader:\n",
+    "  # Add batch to GPU\n",
+    "      batch = tuple(t.to(device) for t in batch)\n",
+    "      \n",
+    "      # Unpack the inputs from the dataloader\n",
+    "      b_input_ids, b_input_mask, b_labels = batch\n",
+    "      \n",
+    "      # Telling the model not to compute or store gradients, saving memory and \n",
+    "      # speeding up prediction\n",
+    "      with torch.no_grad():\n",
+    "          # Forward pass, calculate logit predictions\n",
+    "          outputs = model(b_input_ids, token_type_ids=None, \n",
+    "                          attention_mask=b_input_mask)\n",
+    "\n",
+    "      logits = outputs[0]\n",
+    "      #print(logits)\n",
+    "\n",
+    "      # Move logits and labels to CPU\n",
+    "      logits = logits.detach().cpu().numpy()\n",
+    "      label_ids = b_labels.to('cpu').numpy()\n",
+    "      #print(logits)\n",
+    "      \n",
+    "      # Store predictions and true labels\n",
+    "      predictions.append(logits)\n",
+    "      true_labels.append(label_ids)\n",
+    "\n",
+    "  print('    DONE.')\n",
+    "\n",
+    "\n",
+    "  pred_labels = []\n",
+    "\n",
+    "  # Evaluate each test batch using many matrics\n",
+    "  print('Calculating the matrics for each batch...')\n",
+    "\n",
+    "  for i in range(len(true_labels)):\n",
+    "    \n",
+    "    # The predictions for this batch are a 2-column ndarray (one column for \"0\" \n",
+    "    # and one column for \"1\"). Pick the label with the highest value and turn this\n",
+    "    # in to a list of 0s and 1s.\n",
+    "    pred_labels_i = np.argmax(predictions[i], axis=1).flatten()\n",
+    "    pred_labels.append(pred_labels_i)\n",
+    "\n",
+    "\n",
+    "  pred_labels_ = [item for sublist in pred_labels for item in sublist]\n",
+    "  true_labels_ = [item for sublist in true_labels for item in sublist]\n",
+    "\n",
+    "  return pred_labels_, true_labels_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "AJ0suC8iMs8a"
+   },
+   "outputs": [],
+   "source": [
+    "dataset_name = [\"validation\", \"test\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "dPjV_5g8DDQy"
+   },
+   "outputs": [],
+   "source": [
+    "for dataset in dataset_name:\n",
+    "  df_eval = pd.read_csv(dataset+\"_set.tsv\", sep=\"\\t\")\n",
+    "  data_eval = df_eval[columnText].values\n",
+    "\n",
+    "  y = df_eval[columnClass]\n",
+    "  encoder = preprocessing.LabelEncoder()\n",
+    "  y = encoder.fit_transform(y)\n",
+    "  labels = y.tolist()\n",
+    "\n",
+    "  pred_labels_, true_labels_ = evaluate_bert(data_eval, labels, model, batch_size)\n",
+    "\n",
+    "\n",
+    "  report = classification_report( pred_labels_, true_labels_, output_dict = True)\n",
+    "      \n",
+    "  classes = [str(e) for e in encoder.transform(encoder.classes_)]\n",
+    "  classesName = encoder.classes_\n",
+    "\n",
+    "  precision = []\n",
+    "  recall = []\n",
+    "  f1 = []\n",
+    "  support = []\n",
+    "  dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+    "  for c in classes:\n",
+    "    precision.append(report[c]['precision'])\n",
+    "    recall.append(report[c]['recall'])\n",
+    "    f1.append(report[c]['f1-score'])\n",
+    "    support.append(report[c]['support'])\n",
+    "\n",
+    "  accuracy = report['accuracy']\n",
+    "  weighted_avg = report['weighted avg']\n",
+    "  cnf_matrix = confusion_matrix(true_labels_, pred_labels_)\n",
+    "  FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+    "  FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+    "  TP = np.diag(cnf_matrix)\n",
+    "  TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+    "\n",
+    "  dff['className'] = classesName\n",
+    "  dff['precision'] = precision\n",
+    "  dff['recall'] = recall\n",
+    "  dff['f1-score'] = f1\n",
+    "  dff['support'] = support\n",
+    "  dff['FP'] = FP\n",
+    "  dff['FN'] = FN\n",
+    "  dff['TP'] = TP\n",
+    "  dff['TN'] = TN\n",
+    "\n",
+    "  print(dataset+\"_\"+model_bert+\"_s\"+str(maxOfInstancePerClass))\n",
+    "\n",
+    "  print(weighted_avg)\n",
+    "  print(accuracy)\n",
+    "  print(dff)\n",
+    "\n",
+    "  dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_\"+dataset+\"_\"+model_bert+\"_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "cVdM4eT6I8g2"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "HzxyFO3knanV"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "KDRPPw4Wnap7"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "DX81R2dcnasF"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "wgfqJFVeJMK1"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "GqEf5_41JMNZ"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "x_n57EvhJMQh"
+   },
+   "outputs": [],
+   "source": [
+    "model_path = \"drive/MyDrive/Classification-EDdA/model_bert-base-multilingual-cased_s10000.pt\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "R3_9tA9MI8ju"
+   },
+   "outputs": [],
+   "source": [
+    "model = torch.load(model_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "_fzgS5USJeAF",
+    "outputId": "be4a5506-76ed-4eef-bb3c-fe2bb77c6e4d"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2021-09-30 19:38:22--  https://projet.liris.cnrs.fr/geode/files/datasets/EDdA/Classification/LGE_withContent.tsv\n",
+      "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+      "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 356197 (348K) [text/tab-separated-values]\n",
+      "Saving to: ‘LGE_withContent.tsv’\n",
+      "\n",
+      "LGE_withContent.tsv 100%[===================>] 347.85K   567KB/s    in 0.6s    \n",
+      "\n",
+      "2021-09-30 19:38:24 (567 KB/s) - ‘LGE_withContent.tsv’ saved [356197/356197]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "!wget https://projet.liris.cnrs.fr/geode/files/datasets/EDdA/Classification/LGE_withContent.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "8WEJjQC7I8mP"
+   },
+   "outputs": [],
+   "source": [
+    "df_LGE = pd.read_csv(\"LGE_withContent.tsv\", sep=\"\\t\")\n",
+    "data_LGE = df_LGE[\"content\"].values\n",
+    "\n",
+    "\n",
+    "#pred_labels_, true_labels_ = evaluate_bert(data_eval, labels, model, batch_size)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "id": "9qJDTU-6vzkk",
+    "outputId": "1b279f0e-7715-4d23-f524-08e8ba327f6c"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tome</th>\n",
+       "      <th>rank</th>\n",
+       "      <th>domain</th>\n",
+       "      <th>remark</th>\n",
+       "      <th>content</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>abrabeses-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>623</td>\n",
+       "      <td>geography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>accius-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1076</td>\n",
+       "      <td>biography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>achenbach-2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1357</td>\n",
+       "      <td>biography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>acireale-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1513</td>\n",
+       "      <td>geography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>actée-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1731</td>\n",
+       "      <td>botany</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            id  tome  ...  remark                                            content\n",
+       "0  abrabeses-0     1  ...     NaN  ABRABESES. Village d’Espagne de la prov. de Za...\n",
+       "1     accius-0     1  ...     NaN  ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...\n",
+       "2  achenbach-2     1  ...     NaN  ACHENBACH(Henri), administrateur prussien, né ...\n",
+       "3   acireale-0     1  ...     NaN  ACIREALE. Yille de Sicile, de la province et d...\n",
+       "4      actée-0     1  ...     NaN  ACTÉE(Actœa L.). Genre de plantes de la famill...\n",
+       "\n",
+       "[5 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_LGE.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "71-fP61-OOwQ",
+    "outputId": "ef08b49e-0a9f-4653-e303-3163250af35b"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(310, 6)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_LGE.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "lFFed2EAI8oq"
+   },
+   "outputs": [],
+   "source": [
+    "def generate_prediction_dataloader(chosen_model, sentences_to_predict, batch_size = 8, max_len = 512):\n",
+    "\n",
+    "    if chosen_model == 'bert-base-multilingual-cased' :\n",
+    "        print('Loading Bert Tokenizer...')\n",
+    "        tokenizer = BertTokenizer.from_pretrained(chosen_model)\n",
+    "    elif chosen_model == 'camembert-base':\n",
+    "        print('Loading Camembert Tokenizer...')\n",
+    "        tokenizer = CamembertTokenizer.from_pretrained(chosen_model)\n",
+    "\n",
+    "    # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+    "    input_ids_test = []\n",
+    "    # For every sentence...\n",
+    "    for sent in sentences_to_predict:\n",
+    "        # `encode` will:\n",
+    "        #   (1) Tokenize the sentence.\n",
+    "        #   (2) Prepend the `[CLS]` token to the start.\n",
+    "        #   (3) Append the `[SEP]` token to the end.\n",
+    "        #   (4) Map tokens to their IDs.\n",
+    "        encoded_sent = tokenizer.encode(\n",
+    "                            sent,                      # Sentence to encode.\n",
+    "                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+    "                    )\n",
+    "\n",
+    "        input_ids_test.append(encoded_sent)\n",
+    "\n",
+    "    # Pad our input tokens\n",
+    "    padded_test = []\n",
+    "    for i in input_ids_test:\n",
+    "\n",
+    "        if len(i) > max_len:\n",
+    "            padded_test.extend([i[:max_len]])\n",
+    "        else:\n",
+    "\n",
+    "            padded_test.extend([i + [0] * (max_len - len(i))])\n",
+    "    input_ids_test = np.array(padded_test)\n",
+    "\n",
+    "    # Create attention masks\n",
+    "    attention_masks = []\n",
+    "\n",
+    "    # Create a mask of 1s for each token followed by 0s for padding\n",
+    "    for seq in input_ids_test:\n",
+    "        seq_mask = [float(i>0) for i in seq]\n",
+    "        attention_masks.append(seq_mask)\n",
+    "\n",
+    "    # Convert to tensors.\n",
+    "    prediction_inputs = torch.tensor(input_ids_test)\n",
+    "    prediction_masks = torch.tensor(attention_masks)\n",
+    "    #set batch size\n",
+    "\n",
+    "\n",
+    "    # Create the DataLoader.\n",
+    "    prediction_data = TensorDataset(prediction_inputs, prediction_masks)\n",
+    "    prediction_sampler = SequentialSampler(prediction_data)\n",
+    "    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n",
+    "\n",
+    "    return prediction_dataloader\n",
+    "\n",
+    "\n",
+    "\n",
+    "def predict_class_bertFineTuning(model, sentences_to_predict_dataloader):\n",
+    "\n",
+    "\n",
+    "    # If there's a GPU available...\n",
+    "    if torch.cuda.is_available():\n",
+    "\n",
+    "        # Tell PyTorch to use the GPU.\n",
+    "        device = torch.device(\"cuda\")\n",
+    "\n",
+    "        print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
+    "\n",
+    "        print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
+    "\n",
+    "        # If not...\n",
+    "    else:\n",
+    "        print('No GPU available, using the CPU instead.')\n",
+    "        device = torch.device(\"cpu\")\n",
+    "\n",
+    "    # Put model in evaluation mode\n",
+    "    model.eval()\n",
+    "\n",
+    "    # Tracking variables\n",
+    "    predictions_test , true_labels = [], []\n",
+    "    pred_labels_ = []\n",
+    "    # Predict\n",
+    "    for batch in sentences_to_predict_dataloader:\n",
+    "    # Add batch to GPU\n",
+    "        batch = tuple(t.to(device) for t in batch)\n",
+    "\n",
+    "        # Unpack the inputs from the dataloader\n",
+    "        b_input_ids, b_input_mask = batch\n",
+    "\n",
+    "        # Telling the model not to compute or store gradients, saving memory and\n",
+    "        # speeding up prediction\n",
+    "        with torch.no_grad():\n",
+    "            # Forward pass, calculate logit predictions\n",
+    "            outputs = model(b_input_ids, token_type_ids=None,\n",
+    "                            attention_mask=b_input_mask)\n",
+    "\n",
+    "        logits = outputs[0]\n",
+    "        #print(logits)\n",
+    "\n",
+    "        # Move logits and labels to CPU\n",
+    "        logits = logits.detach().cpu().numpy()\n",
+    "        #print(logits)\n",
+    "\n",
+    "        # Store predictions and true labels\n",
+    "        predictions_test.append(logits)\n",
+    "\n",
+    "        #print('    DONE.')\n",
+    "\n",
+    "        pred_labels = []\n",
+    "        \n",
+    "        for i in range(len(predictions_test)):\n",
+    "\n",
+    "            # The predictions for this batch are a 2-column ndarray (one column for \"0\"\n",
+    "            # and one column for \"1\"). Pick the label with the highest value and turn this\n",
+    "            # in to a list of 0s and 1s.\n",
+    "            pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()\n",
+    "            pred_labels.append(pred_labels_i)\n",
+    "\n",
+    "    pred_labels_ += [item for sublist in pred_labels for item in sublist]\n",
+    "    return pred_labels_\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "O9eer_kgI8rC",
+    "outputId": "94ea7418-14a8-4918-e210-caf0018f5989"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading Bert Tokenizer...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (1204 > 512). Running this sequence through the model will result in indexing errors\n"
+     ]
+    }
+   ],
+   "source": [
+    "data_loader = generate_prediction_dataloader('bert-base-multilingual-cased', data_LGE)\n",
+    "#data_loader = generate_prediction_dataloader('camembert-base', data_LGE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "sFpAwbrBwF2h",
+    "outputId": "8d210732-619d-41f0-b6e2-ad9d06a85069"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 1 GPU(s) available.\n",
+      "We will use the GPU: Tesla P100-PCIE-16GB\n"
+     ]
+    }
+   ],
+   "source": [
+    "p = predict_class_bertFineTuning( model, data_loader )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "51HF6-8UPSTc",
+    "outputId": "26bff792-eb8d-4e1a-efa4-a7a6c9d32bf9"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "310"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "rFFGhaCvQHfh"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "qgJ-O4rcQHiI",
+    "outputId": "bfe93dd6-4d89-4d5c-be0d-45e1c98c6b14"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LabelEncoder()"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Il faudrait enregistrer l'encoder, \n",
+    "# sinon on est obligé de le refaire à partir du jeu d'entrainement pour récupérer le noms des classes.\n",
+    "encoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "QuST9wJoQHnS"
+   },
+   "outputs": [],
+   "source": [
+    "p2 = list(encoder.inverse_transform(p))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "6ek7suq9QHqE",
+    "outputId": "6636983a-7eba-48c8-d884-f8fb437294dc"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Chimie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Géographie',\n",
+       " 'Mathématiques',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Musique',\n",
+       " 'Commerce',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Géographie',\n",
+       " 'Physique - [Sciences physico-mathématiques]',\n",
+       " 'Histoire naturelle',\n",
+       " 'Chimie',\n",
+       " 'Histoire',\n",
+       " 'Physique - [Sciences physico-mathématiques]',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Histoire',\n",
+       " 'Histoire naturelle',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Géographie',\n",
+       " 'Architecture',\n",
+       " 'Histoire naturelle',\n",
+       " 'Histoire naturelle',\n",
+       " 'Géographie',\n",
+       " 'Arts et métiers',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Marine',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Architecture',\n",
+       " 'Histoire naturelle',\n",
+       " 'Beaux-arts',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Chimie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Religion',\n",
+       " 'Histoire naturelle',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Agriculture - Economie rustique',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Jeu',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Histoire naturelle',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Histoire',\n",
+       " 'Histoire naturelle',\n",
+       " 'Commerce',\n",
+       " 'Histoire',\n",
+       " 'Militaire (Art) - Guerre - Arme',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Religion',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Agriculture - Economie rustique',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Métiers',\n",
+       " 'Belles-lettres - Poésie',\n",
+       " 'Beaux-arts',\n",
+       " 'Religion',\n",
+       " 'Architecture',\n",
+       " 'Architecture',\n",
+       " 'Architecture',\n",
+       " 'Géographie',\n",
+       " 'Chimie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Histoire naturelle',\n",
+       " 'Militaire (Art) - Guerre - Arme',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Minéralogie',\n",
+       " 'Belles-lettres - Poésie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Grammaire',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Mathématiques',\n",
+       " 'Géographie',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Blason',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Histoire naturelle',\n",
+       " 'Militaire (Art) - Guerre - Arme',\n",
+       " 'Géographie',\n",
+       " 'Antiquité',\n",
+       " 'Agriculture - Economie rustique',\n",
+       " 'Chimie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Belles-lettres - Poésie',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Métiers',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Arts et métiers',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Musique',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Religion',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Histoire',\n",
+       " 'Droit - Jurisprudence',\n",
+       " 'Histoire',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Histoire',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Chimie',\n",
+       " 'Antiquité',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Beaux-arts',\n",
+       " 'Histoire',\n",
+       " 'Géographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Antiquité',\n",
+       " 'Grammaire',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Beaux-arts',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Histoire',\n",
+       " 'Architecture',\n",
+       " 'Commerce',\n",
+       " 'Antiquité',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Histoire',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Anatomie',\n",
+       " 'Commerce',\n",
+       " 'Beaux-arts',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Histoire naturelle',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Commerce',\n",
+       " 'Architecture',\n",
+       " 'Commerce',\n",
+       " 'Antiquité',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Médecine - Chirurgie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Antiquité',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Histoire',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Antiquité',\n",
+       " 'Géographie',\n",
+       " 'Religion',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Philosophie',\n",
+       " 'Géographie',\n",
+       " 'Chimie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Géographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Commerce',\n",
+       " 'Commerce',\n",
+       " 'Géographie',\n",
+       " 'Géographie']"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "p2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "XvdDj5PBQHtk"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "t39Xs0j7QHXJ"
+   },
+   "outputs": [],
+   "source": [
+    "df_LGE['class_bert'] = p2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "id": "-VZ7geRmQHaD",
+    "outputId": "350a4122-5b1f-43e2-e372-2f628f665c4a"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tome</th>\n",
+       "      <th>rank</th>\n",
+       "      <th>domain</th>\n",
+       "      <th>remark</th>\n",
+       "      <th>content</th>\n",
+       "      <th>class_bert</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>abrabeses-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>623</td>\n",
+       "      <td>geography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n",
+       "      <td>Géographie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>accius-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1076</td>\n",
+       "      <td>biography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+       "      <td>Géographie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>achenbach-2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1357</td>\n",
+       "      <td>biography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n",
+       "      <td>Géographie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>acireale-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1513</td>\n",
+       "      <td>geography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+       "      <td>Géographie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>actée-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1731</td>\n",
+       "      <td>botany</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            id  ...          class_bert\n",
+       "0  abrabeses-0  ...          Géographie\n",
+       "1     accius-0  ...          Géographie\n",
+       "2  achenbach-2  ...          Géographie\n",
+       "3   acireale-0  ...          Géographie\n",
+       "4      actée-0  ...  Histoire naturelle\n",
+       "\n",
+       "[5 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_LGE.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "3xkzdkrKQHwA"
+   },
+   "outputs": [],
+   "source": [
+    "df_LGE.to_csv(\"drive/MyDrive/Classification-EDdA/classification_LGE.tsv\", sep=\"\\t\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "machine_shape": "hm",
+   "name": "EDdA-Classification_BertFineTuning.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.10"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "00bd66a81aad4cd7a10df4a67b52b14e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0143df420df444e9aac5c8b39c342021": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_044fc1f96f8347ddb4a79d31edf32174",
+      "placeholder": "​",
+      "style": "IPY_MODEL_cf0d3320e06546789b5d5a2021dbc3ad",
+      "value": " 811k/811k [00:00&lt;00:00, 932kB/s]"
+     }
+    },
+    "0214f74b229a4232a9edf3cab751b90d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "044fc1f96f8347ddb4a79d31edf32174": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "06c6e7721b68449a9f3619ffdf18dfeb": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_5ec6a851b16c4339b51acb6129935f13",
+       "IPY_MODEL_fd39a852133144e2b4aed474b204451f",
+       "IPY_MODEL_0143df420df444e9aac5c8b39c342021"
+      ],
+      "layout": "IPY_MODEL_c61b6474b55948cb91a598e6b9aa10d2"
+     }
+    },
+    "12aa3280d3284c07ac12e2fe842b40b0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f470af786c1c4d049de4f0a7f373379f",
+      "placeholder": "​",
+      "style": "IPY_MODEL_00bd66a81aad4cd7a10df4a67b52b14e",
+      "value": "Downloading: 100%"
+     }
+    },
+    "152afcb9245c416fae0fde257fa25e2e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "17bf94188b844f649642d9c6e6a20373": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "185ae5ef7be646b797467086ad7d3a82": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_152afcb9245c416fae0fde257fa25e2e",
+      "max": 1395301,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_fb3a174c597b47c7a527517004ba5f54",
+      "value": 1395301
+     }
+    },
+    "1bcdb04d16dd4f9e9d86938e1d2def02": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a5efb634a95c42a7abfaaf61e1c2c928",
+      "max": 445032417,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_600e627de1f0403595f701381dc3b164",
+      "value": 445032417
+     }
+    },
+    "1d97e83c703f4071b9176ba7bf57cddf": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "27a20a17123744948e0c1dbf49b51b27": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "27e18e1fa3884c0fb0339764e0397990": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_17bf94188b844f649642d9c6e6a20373",
+      "placeholder": "​",
+      "style": "IPY_MODEL_d3aaecd7a6e34cc8918a689ac6299746",
+      "value": " 508/508 [00:00&lt;00:00, 15.9kB/s]"
+     }
+    },
+    "2af1124092684f8bafab311cbe9bf22c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3203783f58e54b0e856ab84503bf0d3c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "340241453dab4db88043d372aaa88c2e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4422e64029184ba4ba30eecfdf2b4306",
+      "max": 508,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_1d97e83c703f4071b9176ba7bf57cddf",
+      "value": 508
+     }
+    },
+    "3ceaa994a3814d3c85e2051e37397342": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_75073a0f673345728871dfb0346e7c1b",
+      "placeholder": "​",
+      "style": "IPY_MODEL_db8c94b4ed724f859d1ae8c153b01110",
+      "value": " 1.40M/1.40M [00:00&lt;00:00, 2.81MB/s]"
+     }
+    },
+    "41558bfcc0464711916c2d96337bef66": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4422e64029184ba4ba30eecfdf2b4306": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4873cc6c9e1d493c9a67d6536e4367a6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_12aa3280d3284c07ac12e2fe842b40b0",
+       "IPY_MODEL_1bcdb04d16dd4f9e9d86938e1d2def02",
+       "IPY_MODEL_b5f86071b23c40bf9c96f74c613c2729"
+      ],
+      "layout": "IPY_MODEL_27a20a17123744948e0c1dbf49b51b27"
+     }
+    },
+    "5879fadf430646f6af41b1a9b14864ff": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_95a3332ba4634d1c930a7021eacce230",
+      "placeholder": "​",
+      "style": "IPY_MODEL_d53488432f8544de863210d9e8ee4e48",
+      "value": "Downloading: 100%"
+     }
+    },
+    "5ec6a851b16c4339b51acb6129935f13": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a0d9ceaa8d3a4876ae65d877687bcf50",
+      "placeholder": "​",
+      "style": "IPY_MODEL_aa6ea92757df47eda1e41603cb109e79",
+      "value": "Downloading: 100%"
+     }
+    },
+    "600e627de1f0403595f701381dc3b164": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "6a29c1c28ceb415f91ec55512da981c5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_5879fadf430646f6af41b1a9b14864ff",
+       "IPY_MODEL_340241453dab4db88043d372aaa88c2e",
+       "IPY_MODEL_27e18e1fa3884c0fb0339764e0397990"
+      ],
+      "layout": "IPY_MODEL_2af1124092684f8bafab311cbe9bf22c"
+     }
+    },
+    "75073a0f673345728871dfb0346e7c1b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "885f91c34b9c422889df8b556aad8ec0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "95a3332ba4634d1c930a7021eacce230": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a0d9ceaa8d3a4876ae65d877687bcf50": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a5efb634a95c42a7abfaaf61e1c2c928": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "aa6ea92757df47eda1e41603cb109e79": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "b5f86071b23c40bf9c96f74c613c2729": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f3b7527bd4d04c81936d8392decee3ac",
+      "placeholder": "​",
+      "style": "IPY_MODEL_885f91c34b9c422889df8b556aad8ec0",
+      "value": " 445M/445M [00:12&lt;00:00, 41.9MB/s]"
+     }
+    },
+    "c61b6474b55948cb91a598e6b9aa10d2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "cf0d3320e06546789b5d5a2021dbc3ad": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d3aaecd7a6e34cc8918a689ac6299746": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d53488432f8544de863210d9e8ee4e48": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "db8c94b4ed724f859d1ae8c153b01110": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "e674e279b13b41fda3df3a6c89f5fcb1": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f3b7527bd4d04c81936d8392decee3ac": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f470af786c1c4d049de4f0a7f373379f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f7224a1b831d459594852eece9f05543": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_3203783f58e54b0e856ab84503bf0d3c",
+      "placeholder": "​",
+      "style": "IPY_MODEL_0214f74b229a4232a9edf3cab751b90d",
+      "value": "Downloading: 100%"
+     }
+    },
+    "fb3a174c597b47c7a527517004ba5f54": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "fba1d1d5c83b40659295a3457d74cb4e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_f7224a1b831d459594852eece9f05543",
+       "IPY_MODEL_185ae5ef7be646b797467086ad7d3a82",
+       "IPY_MODEL_3ceaa994a3814d3c85e2051e37397342"
+      ],
+      "layout": "IPY_MODEL_e674e279b13b41fda3df3a6c89f5fcb1"
+     }
+    },
+    "fd39a852133144e2b4aed474b204451f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_41558bfcc0464711916c2d96337bef66",
+      "max": 810912,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_fdf05cea504c42f793f9c06e58ef995b",
+      "value": 810912
+     }
+    },
+    "fdf05cea504c42f793f9c06e58ef995b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/EDdA_Classification_ClassicModels.ipynb b/notebooks/EDdA_Classification_ClassicModels.ipynb
new file mode 100644
index 0000000..fcb2ba0
--- /dev/null
+++ b/notebooks/EDdA_Classification_ClassicModels.ipynb
@@ -0,0 +1,861 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "EDdA-Classification_ClassicModels.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "machine_shape": "hm"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aXLlx8vXQlJw"
+      },
+      "source": [
+        "# Train supervised models for EDdA classification"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3kYI_pq3Q1BT"
+      },
+      "source": [
+        "## Configuration"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "D_uwiuJq3pAM"
+      },
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "validation_path = 'validation_set.tsv'\n",
+        "test_path =  'test_set.tsv'\n",
+        "\n",
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "minOfInstancePerClass = 0\n",
+        "maxOfInstancePerClass = 10000\n",
+        "\n",
+        "\n",
+        "classifier_list = [\"bayes\"]\n",
+        "vectorizer_list = [\"bagofwords\", \"tf_idf\"]\n",
+        "\n",
+        "#classifier_list = [\"lr\", \"rfc\", \"sgd\", \"svm\"]\n",
+        "#vectorizer_list = [\"bagofwords\", \"tf_idf\", \"doc2vec\"]\n",
+        "\n",
+        "vectorization_max_df= 1.0\n",
+        "vectorization_min_df= 4\n",
+        "vectorization_numberOfFeatures= None\n",
+        "\n",
+        "doc2vec_vec_size = 700\n",
+        "max_epochs = 10\n",
+        "doc2vec_min_count = 12\n",
+        "doc2vec_dm = 0\n",
+        "doc2vec_workers = 8"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P_L0rDhZQ6Fn"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "FsAR4CsB3aUc",
+        "outputId": "a5e4efde-a5c9-45f9-ef1c-9223b4d52ac6"
+      },
+      "source": [
+        "from psutil import virtual_memory\n",
+        "ram_gb = virtual_memory().total / 1e9\n",
+        "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
+        "\n",
+        "if ram_gb < 20:\n",
+        "  print('Not using a high-RAM runtime')\n",
+        "else:\n",
+        "  print('You are using a high-RAM runtime!')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Your runtime has 27.3 gigabytes of available RAM\n",
+            "\n",
+            "You are using a high-RAM runtime!\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "h5MwRwL53aYY",
+        "outputId": "bc4c4c16-fb20-404a-e044-550fc4ca907d"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4z78CLYi75kV"
+      },
+      "source": [
+        "## Import libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "bcptSr6o3ac7",
+        "outputId": "19713482-dfeb-4be3-e63c-35b4253cb9e5"
+      },
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "\n",
+        "from sklearn.naive_bayes import MultinomialNB\n",
+        "from sklearn.svm import SVC\n",
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "from sklearn.linear_model import LogisticRegression\n",
+        "from sklearn.linear_model import SGDClassifier\n",
+        "from sklearn.metrics import classification_report\n",
+        "from sklearn.metrics import confusion_matrix\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "import pickle\n",
+        "\n",
+        "from sklearn.feature_extraction.text import CountVectorizer\n",
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "from nltk.stem.snowball import SnowballStemmer\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.tokenize import word_tokenize\n",
+        "from gensim.models.doc2vec import Doc2Vec, TaggedDocument\n",
+        "from nltk.tokenize import word_tokenize\n",
+        "import spacy\n",
+        "import os\n",
+        "import nltk\n",
+        "import string\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Package punkt is already up-to-date!\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dwSVXDtWZB5H",
+        "outputId": "44e2aa14-726f-43af-aa6a-1b7899e1025b"
+      },
+      "source": [
+        "!python -m spacy download fr_core_news_sm"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting fr_core_news_sm==2.2.5\n",
+            "  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.2.5/fr_core_news_sm-2.2.5.tar.gz (14.7 MB)\n",
+            "\u001b[K     |████████████████████████████████| 14.7 MB 5.5 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: spacy>=2.2.2 in /usr/local/lib/python3.7/dist-packages (from fr_core_news_sm==2.2.5) (2.2.4)\n",
+            "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (0.8.2)\n",
+            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (57.4.0)\n",
+            "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.1.3)\n",
+            "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (3.0.6)\n",
+            "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.0.5)\n",
+            "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (2.23.0)\n",
+            "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.0.0)\n",
+            "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (0.4.1)\n",
+            "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (2.0.6)\n",
+            "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (7.4.0)\n",
+            "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (4.62.3)\n",
+            "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.19.5)\n",
+            "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.0.6)\n",
+            "Requirement already satisfied: importlib-metadata>=0.20 in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->fr_core_news_sm==2.2.5) (4.8.2)\n",
+            "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->fr_core_news_sm==2.2.5) (3.10.0.2)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->fr_core_news_sm==2.2.5) (3.6.0)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->fr_core_news_sm==2.2.5) (2.10)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->fr_core_news_sm==2.2.5) (2021.10.8)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->fr_core_news_sm==2.2.5) (3.0.4)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.24.3)\n",
+            "Building wheels for collected packages: fr-core-news-sm\n",
+            "  Building wheel for fr-core-news-sm (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for fr-core-news-sm: filename=fr_core_news_sm-2.2.5-py3-none-any.whl size=14727026 sha256=994d176b35663506dd047e65863238d29b9b60313ba0dee5997c107f116477aa\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-c8y7i3ag/wheels/c9/a6/ea/0778337c34660027ee67ef3a91fb9d3600b76777a912ea1c24\n",
+            "Successfully built fr-core-news-sm\n",
+            "Installing collected packages: fr-core-news-sm\n",
+            "Successfully installed fr-core-news-sm-2.2.5\n",
+            "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
+            "You can now load the model via spacy.load('fr_core_news_sm')\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SuDZl6v48CBi"
+      },
+      "source": [
+        "## Utils functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Tunf_CYi3afO"
+      },
+      "source": [
+        "def create_dict(df, classColumnName):\n",
+        "    return dict(df[classColumnName].value_counts())\n",
+        "\n",
+        "def remove_weak_classes(df, classColumnName, threshold):\n",
+        "    dictOfClassInstances = create_dict(df,classColumnName)\n",
+        "    dictionary = {k: v for k, v in dictOfClassInstances.items() if v >= threshold }\n",
+        "    keys = [*dictionary]\n",
+        "    df_tmp = df[~ df[classColumnName].isin(keys)]\n",
+        "    df =  pd.concat([df,df_tmp]).drop_duplicates(keep=False)\n",
+        "    return df\n",
+        "\n",
+        "\n",
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "    #random numberOfInstances elements\n",
+        "    replace = False  # with replacement\n",
+        "    fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "    return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "\n",
+        "\n",
+        "def count_vect(data, max_df= 1.0 , min_df= 1, numberOfFeatures= None ):\n",
+        "  stop_words = set(stopwords.words('french'))\n",
+        "  stemmer_fr = SnowballStemmer(\"french\")\n",
+        "  analyzer = CountVectorizer().build_analyzer()\n",
+        "  def stemmed_words_fr(doc):\n",
+        "    return (stemmer_fr.stem(w) for w in analyzer(doc) if not w in stop_words)\n",
+        "  return CountVectorizer(stop_words = 'french', analyzer = stemmed_words_fr, max_df= max_df, min_df = min_df, max_features = numberOfFeatures)\n",
+        "\n",
+        "\n",
+        "def tf_idf(data, max_df= 1.0 , min_df= 1, numberOfFeatures = None):\n",
+        "  stop_words = set(stopwords.words('french'))\n",
+        "  stemmer_fr = SnowballStemmer(\"french\")\n",
+        "  analyzer = TfidfVectorizer().build_analyzer()\n",
+        "  def stemmed_words_fr(doc):\n",
+        "    return (stemmer_fr.stem(w) for w in analyzer(doc) if not w in stop_words)\n",
+        "  return TfidfVectorizer(stop_words= 'french', analyzer=stemmed_words_fr, max_df= max_df, min_df = min_df, max_features= numberOfFeatures)\n",
+        "\n",
+        "\n",
+        "def tokenize_fr_text(sentence):\n",
+        "  result = string.punctuation\n",
+        "  doc = nlp(sentence)\n",
+        "  return [X.text.lower() for X in doc if not X.text in stopWords and not X.text in result and not len(X.text) < 2]\n",
+        "\n",
+        "\n",
+        "def doc2vec(tagged_tr, max_epochs, doc2vec_vec_size, doc2vec_min_count ,  doc2vec_dm, doc2vec_workers):\n",
+        " \n",
+        "  stopWords = set(stopwords.words('french'))\n",
+        "  #tagged_tr = [TaggedDocument(words = tokenize_fr_text(_d),tags = [str(i)]) for i, _d in enumerate(data)]\n",
+        "  model = Doc2Vec(vector_size=doc2vec_vec_size, min_count = doc2vec_min_count, dm = doc2vec_dm, workers = doc2vec_workers)\n",
+        "  model.build_vocab(tagged_tr)\n",
+        "  model.train(tagged_tr, total_examples=model.corpus_count, epochs = max_epochs)\n",
+        "  return model\n",
+        "  #return np.array([model.docvecs[str(i)] for i in range(len(tagged_tr))])\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Lc1DRh4b7mto"
+      },
+      "source": [
+        "## Load datasets"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ybiJYL0h3ahh"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LRKJzWmf3pCg"
+      },
+      "source": [
+        "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+        "df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nkRUCjiR84Qr"
+      },
+      "source": [
+        "## Vectorization\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6QQXybaQ3pE9"
+      },
+      "source": [
+        "data_train = df_train[columnText].tolist()\n",
+        "vectorizer_dic = {}\n",
+        "\n",
+        "\n",
+        "nlp = spacy.load(\"fr_core_news_sm\")\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "stemmer = SnowballStemmer('french').stem\n",
+        "def stem_tokenize(text):\n",
+        "  return [stemmer(i) for i in word_tokenize(text) if not i in stop_words]\n",
+        "\n",
+        "for vectorizer_name in vectorizer_list:\n",
+        "\n",
+        "  vec_file_name = vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "  if os.path.isfile(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name):\n",
+        "    \n",
+        "    # load existing vectorizers \n",
+        "    with open(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name, 'rb') as file:\n",
+        "      vectorizer = pickle.load(file)\n",
+        "  \n",
+        "  else :\n",
+        "\n",
+        "    if vectorizer_name == \"bagofwords\" :\n",
+        "      #vectorizer = count_vect(data_train, vectorization_max_df, vectorization_min_df, vectorization_numberOfFeatures)\n",
+        "      vectorizer = CountVectorizer(analyzer = \"word\", lowercase=True, token_pattern='[a-zA-Z0-9]+', strip_accents='unicode',tokenizer=stem_tokenize)\n",
+        "      vectorizer.fit(data_train)\n",
+        "\n",
+        "    if vectorizer_name == \"tf_idf\" :\n",
+        "      #vectorizer = tf_idf(data_train, vectorization_max_df, vectorization_min_df, vectorization_numberOfFeatures)   \n",
+        "      vectorizer = TfidfVectorizer(analyzer='word', lowercase=True, token_pattern='[a-zA-Z0-9]+', strip_accents='unicode',tokenizer=stem_tokenize)\n",
+        "      vectorizer.fit(data_train)\n",
+        "\n",
+        "    if vectorizer_name == \"doc2vec\" :\n",
+        "      stopWords = set(stopwords.words('french'))\n",
+        "      tagged_tr = [TaggedDocument(words = tokenize_fr_text(_d),tags = [str(i)]) for i, _d in enumerate(data_train)]\n",
+        "      vectorizer = doc2vec(tagged_tr, max_epochs, doc2vec_vec_size, doc2vec_min_count, doc2vec_dm, doc2vec_workers)\n",
+        "      \n",
+        "    # saving vectorizer\n",
+        "    with open(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name, 'wb') as file:\n",
+        "      pickle.dump(vectorizer, file)\n",
+        "    \n",
+        "  vectorizer_dic[vectorizer_name] = vectorizer    "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wntk5s8c88w5"
+      },
+      "source": [
+        "## Training classifier"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "rx_0eV-M3pHc"
+      },
+      "source": [
+        "classifier_dic = {}\n",
+        "grid_param = {}\n",
+        "\n",
+        "for classifier_name in classifier_list:\n",
+        "  if classifier_name == \"bayes\":\n",
+        "    classifier_dic[classifier_name] = MultinomialNB()\n",
+        "  elif classifier_name == \"lr\":\n",
+        "    classifier_dic[classifier_name] = LogisticRegression()\n",
+        "    grid_param[classifier_name] = {\"C\":np.logspace(-3,3,7)}\n",
+        "  elif classifier_name == \"sgd\":\n",
+        "    classifier_dic[classifier_name] = SGDClassifier()\n",
+        "    grid_param[classifier_name] = { \"loss\" : [\"log\", \"modified_huber\"]}\n",
+        "  elif classifier_name == \"svm\":\n",
+        "    classifier_dic[classifier_name] = SVC()\n",
+        "    grid_param[classifier_name] = {'kernel':['linear','rbf']}\n",
+        "  elif classifier_name == \"rfc\":\n",
+        "    classifier_dic[classifier_name] = RandomForestClassifier()\n",
+        "    grid_param[classifier_name] = { 'max_features': ['sqrt', 'log2'], 'max_depth' : [4,5,6,7,8]}\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pO7oyeAF7KPK"
+      },
+      "source": [
+        "for clf_name, clf in classifier_dic.items():\n",
+        "  if clf_name != 'bayes' :\n",
+        "    clf = GridSearchCV(clf, grid_param[clf_name], refit = True, verbose = 3, n_jobs=-1)\n",
+        "\n",
+        "  for vec_name, vectorizer in vectorizer_dic.items():\n",
+        "\n",
+        "    if vec_name != 'doc2vec' :\n",
+        "      vec_data = vectorizer.transform(data_train)\n",
+        "    else : \n",
+        "      vec_data = np.array([vectorizer.docvecs[str(i)] for i in range(len(tagged_tr))])\n",
+        "\n",
+        "    clf.fit(vec_data, df_train[columnClass])\n",
+        "\n",
+        "    clf_file_name = clf_name + '_' + vec_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "\n",
+        "    # saving classifier\n",
+        "    with open(\"drive/MyDrive/Classification-EDdA/\"+clf_file_name, 'wb') as file:\n",
+        "      pickle.dump(clf, file)\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_evrNjmZ9E0e"
+      },
+      "source": [
+        "## Evaluation\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TfKAjtVFblYe"
+      },
+      "source": [
+        "dataset_name = [\"validation\", \"test\"]"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "h8vZar8c7KRq",
+        "outputId": "83511c89-9219-43d1-9e5a-820e75012166"
+      },
+      "source": [
+        "for dataset in dataset_name:\n",
+        "  df_eval = pd.read_csv(dataset+\"_set.tsv\", sep=\"\\t\")\n",
+        "  data_eval = df_eval[columnText].tolist()\n",
+        "\n",
+        "  for classifier_name in classifier_list:\n",
+        "\n",
+        "    for vectorizer_name in vectorizer_list:\n",
+        "\n",
+        "      clf_file_name = classifier_name + '_' + vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "      with open(\"drive/MyDrive/Classification-EDdA/\"+clf_file_name, 'rb') as file:\n",
+        "        clf = pickle.load(file)\n",
+        "\n",
+        "      vec_file_name = vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "      with open(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name, 'rb') as file:\n",
+        "        vectorizer = pickle.load(file)\n",
+        "\n",
+        "      if vectorizer_name != 'doc2vec' :\n",
+        "        vec_data = vectorizer.transform(data_eval)\n",
+        "      else : \n",
+        "        tagged_test = [TaggedDocument(words=tokenize_fr_text(_d), tags = [str(i)]) for i, _d in enumerate(data_eval)]\n",
+        "        vec_data = np.array([vectorizer.infer_vector(tagged_test[i][0]) for i in range(len(tagged_test))])\n",
+        "\n",
+        "\n",
+        "      y_pred = clf.predict(vec_data)\n",
+        "\n",
+        "\n",
+        "      report = classification_report(y_pred, df_eval[columnClass], output_dict = True)\n",
+        "      precision = []\n",
+        "      recall = []\n",
+        "      f1 = []\n",
+        "      support = []\n",
+        "      dff = pd.DataFrame(columns= ['class', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+        "      for c in df_eval[columnClass].unique() :\n",
+        "        precision.append(report[c]['precision'])\n",
+        "        recall.append(report[c]['recall'])\n",
+        "        f1.append(report[c]['f1-score'])\n",
+        "        support.append(report[c]['support'])\n",
+        "\n",
+        "      accuracy = report['accuracy']\n",
+        "      weighted_avg = report['weighted avg']\n",
+        "      cnf_matrix = confusion_matrix(df_eval[columnClass], y_pred)\n",
+        "      FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+        "      FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+        "      TP = np.diag(cnf_matrix)\n",
+        "      TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+        "\n",
+        "      dff['class'] = df_eval[columnClass].unique()\n",
+        "      dff['precision'] = precision\n",
+        "      dff['recall'] = recall\n",
+        "      dff['f1-score'] = f1\n",
+        "      dff['support'] = support\n",
+        "      dff['FP'] = FP\n",
+        "      dff['FN'] = FN\n",
+        "      dff['TP'] = TP\n",
+        "      dff['TN'] = TN\n",
+        "\n",
+        "\n",
+        "      print(dataset+\"_\"+classifier_name+'_' + vectorizer_name+\"_s\"+str(maxOfInstancePerClass))\n",
+        "\n",
+        "      print(weighted_avg)\n",
+        "      print(accuracy)\n",
+        "      print(dff)\n",
+        "\n",
+        "      dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_\"+dataset+\"_\"+classifier_name+'_' + vectorizer_name+\"_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)\n",
+        "\n"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "validation_bayes_bagofwords_s10000\n",
+            "{'precision': 0.8377945389222964, 'recall': 0.619530464967571, 'f1-score': 0.6842670335331308, 'support': 10947}\n",
+            "0.619530464967571\n",
+            "                                          class  precision  ...    TP     TN\n",
+            "0                         Droit - Jurisprudence   0.963590  ...     5  10735\n",
+            "1                                     Grammaire   0.321888  ...    46  10760\n",
+            "2                            Histoire naturelle   0.938776  ...    55  10665\n",
+            "3                                      Commerce   0.310249  ...    42  10679\n",
+            "4                                    Géographie   0.958193  ...     0  10839\n",
+            "5                                  Architecture   0.158491  ...     0  10863\n",
+            "6                                       Monnaie   0.000000  ...     4  10751\n",
+            "7                          Médecine - Chirurgie   0.735981  ...     3  10860\n",
+            "8                                       Métiers   0.917495  ...     0  10925\n",
+            "9               Militaire (Art) - Guerre - Arme   0.182186  ...     1  10845\n",
+            "10                                     Anatomie   0.245989  ...     1  10853\n",
+            "11                                          Jeu   0.000000  ...   112  10553\n",
+            "12                                    Pharmacie   0.000000  ...  1138   9191\n",
+            "13                                    Antiquité   0.209125  ...     0  10921\n",
+            "14                      Belles-lettres - Poésie   0.020513  ...   150  10358\n",
+            "15              Agriculture - Economie rustique   0.023585  ...  2269   8114\n",
+            "16                                Mathématiques   0.142857  ...   357   9728\n",
+            "17                                   Beaux-arts   0.000000  ...   874   9278\n",
+            "18  Physique - [Sciences physico-mathématiques]   0.364372  ...     0  10893\n",
+            "19                                       Marine   0.410468  ...   149  10579\n",
+            "20                                       Chasse   0.009804  ...     5  10850\n",
+            "21                              Arts et métiers   0.000000  ...    18  10819\n",
+            "22                                     Religion   0.526646  ...     0  10912\n",
+            "23                                       Blason   0.034483  ...    45  10699\n",
+            "24                                        Pêche   0.025641  ...     0  10926\n",
+            "25                                     Histoire   0.603041  ...     0  10886\n",
+            "26                           Maréchage - Manège   0.051546  ...    11  10814\n",
+            "27                                       Mesure   0.000000  ...     0  10924\n",
+            "28                          Economie domestique   0.000000  ...   315  10264\n",
+            "29                                  Philosophie   0.000000  ...   923   8722\n",
+            "30                                 Superstition   0.000000  ...     0  10888\n",
+            "31                                       Chimie   0.010638  ...     0  10854\n",
+            "32                                    Médailles   0.000000  ...    90  10659\n",
+            "33                                      Musique   0.082707  ...     0  10925\n",
+            "34                                   Caractères   0.000000  ...     1  10908\n",
+            "35                                    Spectacle   0.000000  ...   168  10570\n",
+            "36                                  Minéralogie   0.000000  ...     0  10938\n",
+            "37                                    Politique   0.000000  ...     0  10926\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "validation_bayes_tf_idf_s10000\n",
+            "{'precision': 0.9361172330822201, 'recall': 0.48853567187357266, 'f1-score': 0.6289575972884817, 'support': 10947}\n",
+            "0.48853567187357266\n",
+            "                                          class  precision  ...    TP     TN\n",
+            "0                         Droit - Jurisprudence   0.922100  ...     0  10735\n",
+            "1                                     Grammaire   0.000000  ...     7  10760\n",
+            "2                            Histoire naturelle   0.888292  ...     0  10684\n",
+            "3                                      Commerce   0.036011  ...     1  10682\n",
+            "4                                    Géographie   0.995777  ...     0  10839\n",
+            "5                                  Architecture   0.003774  ...     0  10863\n",
+            "6                                       Monnaie   0.000000  ...     0  10752\n",
+            "7                          Médecine - Chirurgie   0.221963  ...     0  10860\n",
+            "8                                       Métiers   0.903579  ...     0  10925\n",
+            "9               Militaire (Art) - Guerre - Arme   0.004049  ...     0  10845\n",
+            "10                                     Anatomie   0.037433  ...     0  10853\n",
+            "11                                          Jeu   0.000000  ...    13  10585\n",
+            "12                                    Pharmacie   0.000000  ...  1089   9047\n",
+            "13                                    Antiquité   0.000000  ...     0  10921\n",
+            "14                      Belles-lettres - Poésie   0.000000  ...     0  10481\n",
+            "15              Agriculture - Economie rustique   0.000000  ...  2358   5636\n",
+            "16                                Mathématiques   0.000000  ...    14  10349\n",
+            "17                                   Beaux-arts   0.000000  ...   827   9314\n",
+            "18  Physique - [Sciences physico-mathématiques]   0.004049  ...     0  10893\n",
+            "19                                       Marine   0.088154  ...    32  10583\n",
+            "20                                       Chasse   0.000000  ...     0  10850\n",
+            "21                              Arts et métiers   0.000000  ...     0  10821\n",
+            "22                                     Religion   0.003135  ...     0  10912\n",
+            "23                                       Blason   0.000000  ...     1  10700\n",
+            "24                                        Pêche   0.000000  ...     0  10926\n",
+            "25                                     Histoire   0.023649  ...     0  10886\n",
+            "26                           Maréchage - Manège   0.000000  ...     0  10814\n",
+            "27                                       Mesure   0.000000  ...     0  10924\n",
+            "28                          Economie domestique   0.000000  ...    95  10502\n",
+            "29                                  Philosophie   0.000000  ...   909   8731\n",
+            "30                                 Superstition   0.000000  ...     0  10888\n",
+            "31                                       Chimie   0.000000  ...     0  10854\n",
+            "32                                    Médailles   0.000000  ...     1  10700\n",
+            "33                                      Musique   0.000000  ...     0  10925\n",
+            "34                                   Caractères   0.000000  ...     0  10908\n",
+            "35                                    Spectacle   0.000000  ...     1  10628\n",
+            "36                                  Minéralogie   0.000000  ...     0  10938\n",
+            "37                                    Politique   0.000000  ...     0  10926\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "test_bayes_bagofwords_s10000\n",
+            "{'precision': 0.8343333806034451, 'recall': 0.6158940397350994, 'f1-score': 0.6801987597575112, 'support': 13137}\n",
+            "0.6158940397350994\n",
+            "                                          class  precision  ...    TP     TN\n",
+            "0                                      Histoire   0.579466  ...     3  12882\n",
+            "1                         Droit - Jurisprudence   0.953423  ...    44  12913\n",
+            "2                                    Géographie   0.953906  ...    58  12804\n",
+            "3                                       Métiers   0.922949  ...    48  12815\n",
+            "4                                  Architecture   0.150943  ...     0  13008\n",
+            "5                          Médecine - Chirurgie   0.744639  ...     0  13037\n",
+            "6                                 Mathématiques   0.225166  ...     2  12900\n",
+            "7                                     Grammaire   0.305357  ...     4  13032\n",
+            "8                                       Monnaie   0.000000  ...     0  13110\n",
+            "9                                      Commerce   0.327945  ...     1  13015\n",
+            "10                                     Anatomie   0.196429  ...     2  13025\n",
+            "11  Physique - [Sciences physico-mathématiques]   0.331081  ...   142  12652\n",
+            "12                                  Philosophie   0.000000  ...  1351  11028\n",
+            "13                      Belles-lettres - Poésie   0.008511  ...     0  13106\n",
+            "14              Militaire (Art) - Guerre - Arme   0.199324  ...   171  12399\n",
+            "15                                    Antiquité   0.183544  ...  2711   9779\n",
+            "16                           Maréchage - Manège   0.008621  ...   412  11633\n",
+            "17                                       Chasse   0.008197  ...  1054  11199\n",
+            "18              Agriculture - Economie rustique   0.011811  ...     0  13072\n",
+            "19                           Histoire naturelle   0.942755  ...   185  12697\n",
+            "20                                     Religion   0.535248  ...     1  13021\n",
+            "21                                       Mesure   0.000000  ...    34  12983\n",
+            "22                                      Musique   0.062500  ...     0  13095\n",
+            "23                              Arts et métiers   0.000000  ...    59  12838\n",
+            "24                                       Marine   0.425287  ...     0  13111\n",
+            "25                                       Blason   0.038095  ...     0  13064\n",
+            "26                                       Chimie   0.017857  ...    10  12976\n",
+            "27                          Economie domestique   0.000000  ...     0  13109\n",
+            "28                                   Beaux-arts   0.000000  ...   382  12312\n",
+            "29                                          Jeu   0.000000  ...  1114  10375\n",
+            "30                                        Pêche   0.000000  ...     0  13066\n",
+            "31                                    Politique   0.000000  ...     0  13025\n",
+            "32                                  Minéralogie   0.000000  ...    98  12817\n",
+            "33                                    Pharmacie   0.000000  ...     0  13111\n",
+            "34                                 Superstition   0.000000  ...     0  13090\n",
+            "35                                   Caractères   0.000000  ...   205  12686\n",
+            "36                                    Médailles   0.000000  ...     0  13126\n",
+            "37                                    Spectacle   0.000000  ...     0  13112\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "test_bayes_tf_idf_s10000\n",
+            "{'precision': 0.9374431375624079, 'recall': 0.4883915658065007, 'f1-score': 0.6291194809131295, 'support': 13137}\n",
+            "0.4883915658065007\n",
+            "                                          class  precision  ...    TP     TN\n",
+            "0                                      Histoire   0.018284  ...     0  12883\n",
+            "1                         Droit - Jurisprudence   0.928017  ...     3  12913\n",
+            "2                                    Géographie   0.997185  ...     0  12821\n",
+            "3                                       Métiers   0.906379  ...     0  12819\n",
+            "4                                  Architecture   0.000000  ...     0  13008\n",
+            "5                          Médecine - Chirurgie   0.230019  ...     0  13037\n",
+            "6                                 Mathématiques   0.000000  ...     0  12902\n",
+            "7                                     Grammaire   0.000000  ...     0  13032\n",
+            "8                                       Monnaie   0.000000  ...     0  13110\n",
+            "9                                      Commerce   0.036952  ...     0  13015\n",
+            "10                                     Anatomie   0.013393  ...     0  13025\n",
+            "11  Physique - [Sciences physico-mathématiques]   0.003378  ...    16  12701\n",
+            "12                                  Philosophie   0.000000  ...  1315  10852\n",
+            "13                      Belles-lettres - Poésie   0.000000  ...     0  13106\n",
+            "14              Militaire (Art) - Guerre - Arme   0.003378  ...     0  12577\n",
+            "15                                    Antiquité   0.000000  ...  2834   6749\n",
+            "16                           Maréchage - Manège   0.000000  ...    13  12422\n",
+            "17                                       Chasse   0.000000  ...   978  11227\n",
+            "18              Agriculture - Economie rustique   0.000000  ...     0  13072\n",
+            "19                           Histoire naturelle   0.874776  ...    42  12702\n",
+            "20                                     Religion   0.002611  ...     0  13021\n",
+            "21                                       Mesure   0.000000  ...     0  12986\n",
+            "22                                      Musique   0.000000  ...     0  13095\n",
+            "23                              Arts et métiers   0.000000  ...     1  12841\n",
+            "24                                       Marine   0.096552  ...     0  13111\n",
+            "25                                       Blason   0.000000  ...     0  13064\n",
+            "26                                       Chimie   0.000000  ...     0  12977\n",
+            "27                          Economie domestique   0.000000  ...     0  13109\n",
+            "28                                   Beaux-arts   0.000000  ...   118  12608\n",
+            "29                                          Jeu   0.000000  ...  1094  10439\n",
+            "30                                        Pêche   0.000000  ...     0  13066\n",
+            "31                                    Politique   0.000000  ...     0  13025\n",
+            "32                                  Minéralogie   0.000000  ...     1  12840\n",
+            "33                                    Pharmacie   0.000000  ...     0  13111\n",
+            "34                                 Superstition   0.000000  ...     0  13090\n",
+            "35                                   Caractères   0.000000  ...     1  12754\n",
+            "36                                    Médailles   0.000000  ...     0  13126\n",
+            "37                                    Spectacle   0.000000  ...     0  13112\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "mMiQo_sR7KWn"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/notebooks/EDdA_Classification_DeepLearning.ipynb b/notebooks/EDdA_Classification_DeepLearning.ipynb
new file mode 100644
index 0000000..d8e9ea6
--- /dev/null
+++ b/notebooks/EDdA_Classification_DeepLearning.ipynb
@@ -0,0 +1,1351 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "EDdA-Classification_DeepLearning.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0yFsoHXX8Iyy"
+      },
+      "source": [
+        "# Deep learning for EDdA classification"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EyksTV6277Jv"
+      },
+      "source": [
+        "## Configuration"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "G5LT5n9O7SLt"
+      },
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "validation_path = 'validation_set.tsv'\n",
+        "test_path =  'test_set.tsv'\n",
+        "\n",
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "minOfInstancePerClass = 0\n",
+        "maxOfInstancePerClass = 1500\n",
+        "\n",
+        "\n",
+        "batch_size = 64\n",
+        "max_len = 512 # \n",
+        "epochs = 20\n",
+        "embedding_dim = 300 "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tFlUCDL2778i"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Sp8d_Uus7SHJ",
+        "outputId": "82929364-d0a1-4962-fcb4-47224a48e6cf"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jQBu-p6hBU-j"
+      },
+      "source": [
+        "## Install packages"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bTIXsF6kBUdh"
+      },
+      "source": [
+        "#!pip install zeugma\n",
+        "#!pip install plot_model"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "56-04SNF8BMx"
+      },
+      "source": [
+        "## Import librairies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "HwWkSznz7SEv",
+        "outputId": "02ecbbf8-556f-4567-b57d-6e13a4ca28ff"
+      },
+      "source": [
+        "from nltk.tokenize import word_tokenize\n",
+        "import nltk\n",
+        "from nltk.corpus import stopwords\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')\n",
+        "\n",
+        "import keras\n",
+        "from keras import optimizers\n",
+        "from keras import backend as K\n",
+        "from keras import regularizers\n",
+        "from keras.models import Sequential\n",
+        "from keras.layers import Dense, Activation, Dropout, Flatten\n",
+        "from keras.layers import Embedding, Conv1D, MaxPooling1D, GlobalMaxPooling1D\n",
+        "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional\n",
+        "#from keras.utils import plot_model\n",
+        "from keras.preprocessing import sequence\n",
+        "from keras.preprocessing.text import Tokenizer\n",
+        "from keras.callbacks import EarlyStopping\n",
+        "\n",
+        "import string\n",
+        "import tensorflow as tf\n",
+        "#from zeugma import TextsToSequences\n",
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "from sklearn import preprocessing\n",
+        "from sklearn.metrics import classification_report\n",
+        "\n",
+        "\n",
+        "\n",
+        "from tqdm import tqdm\n",
+        "import requests, zipfile, io\n",
+        "import os, re, csv, math, codecs"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xrekV6W978l4"
+      },
+      "source": [
+        "## Utils functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4LJ5blQR7PUe"
+      },
+      "source": [
+        "\n",
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "  #random numberOfInstances elements\n",
+        "  replace = False  # with replacement\n",
+        "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "    \n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-Rh3JMDh7zYd"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MtLr35eM753e"
+      },
+      "source": [
+        "## Load Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FnbNT4NF7zal"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "WNqDms64lfaS"
+      },
+      "source": [
+        "# download FastText\n",
+        "zip_file_url = \"https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip\"\n",
+        "r = requests.get(zip_file_url)\n",
+        "z = zipfile.ZipFile(io.BytesIO(r.content))\n",
+        "z.extractall()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PGMIi0CAmqSd",
+        "outputId": "09c034fd-f689-43a9-fd75-5923906d89bf"
+      },
+      "source": [
+        "print('loading word embeddings...')\n",
+        "\n",
+        "embeddings_index = {}\n",
+        "f = codecs.open('crawl-300d-2M.vec', encoding='utf-8')\n",
+        "\n",
+        "for line in tqdm(f):\n",
+        "    values = line.rstrip().rsplit(' ')\n",
+        "    word = values[0]\n",
+        "    coefs = np.asarray(values[1:], dtype='float32')\n",
+        "    embeddings_index[word] = coefs\n",
+        "f.close()\n",
+        "\n",
+        "print('found %s word vectors' % len(embeddings_index))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "loading word embeddings...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "1999996it [03:40, 9087.22it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "found 1999996 word vectors\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nRLaQUO97zcq"
+      },
+      "source": [
+        "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+        "df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)\n",
+        "\n",
+        "df_validation = pd.read_csv(validation_path, sep=\"\\t\")\n",
+        "df_validation = resample_classes(df_validation, columnClass, maxOfInstancePerClass)\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vGWAgBH87ze8"
+      },
+      "source": [
+        "y_train  = df_train[columnClass]\n",
+        "y_validation = df_validation[columnClass]\n",
+        "numberOfClasses = y_train.nunique()\n",
+        "\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "\n",
+        "y_train = encoder.fit_transform(y_train)\n",
+        "y_validation = encoder.fit_transform(y_validation)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 452
+        },
+        "id": "7OYjo_uhoqcX",
+        "outputId": "79c4ff25-0476-4e12-d6ff-a8e073ee3f6c"
+      },
+      "source": [
+        "df_validation.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>normClass</th>\n",
+              "      <th>classEDdA</th>\n",
+              "      <th>author</th>\n",
+              "      <th>id_enccre</th>\n",
+              "      <th>domaine_enccre</th>\n",
+              "      <th>ensemble_domaine_enccre</th>\n",
+              "      <th>content</th>\n",
+              "      <th>contentWithoutClass</th>\n",
+              "      <th>firstParagraph</th>\n",
+              "      <th>nb_word</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"5\" valign=\"top\">0</th>\n",
+              "      <th>10449</th>\n",
+              "      <td>14</td>\n",
+              "      <td>2879</td>\n",
+              "      <td>Sabler une allée</td>\n",
+              "      <td>Jardinage</td>\n",
+              "      <td>terme de Jardinier.</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>v14-1651-1</td>\n",
+              "      <td>jardinage</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nSabler une allée, (terme de Jardinier.) c'es...</td>\n",
+              "      <td>\\nSabler une allée, () c'est couvrir \\navec ar...</td>\n",
+              "      <td>\\nSabler une allée, () c'est couvrir \\navec ar...</td>\n",
+              "      <td>70</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8134</th>\n",
+              "      <td>17</td>\n",
+              "      <td>1598</td>\n",
+              "      <td>Volée</td>\n",
+              "      <td>Jardinage</td>\n",
+              "      <td>Jardin.</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>v17-842-3</td>\n",
+              "      <td>jardinage</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nVolée, (Jardin.) c'est le nom qu'on donne au...</td>\n",
+              "      <td>\\nVolée, () c'est le nom qu'on donne au travai...</td>\n",
+              "      <td>\\nVolée, () c'est le nom qu'on donne au travai...</td>\n",
+              "      <td>48</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5308</th>\n",
+              "      <td>13</td>\n",
+              "      <td>2051</td>\n",
+              "      <td>PRUNELLIER</td>\n",
+              "      <td>Jardinage</td>\n",
+              "      <td>Jardinage.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v13-1146-0</td>\n",
+              "      <td>jardinage</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nPRUNELLIER, s. m. (Jardinage.) arbrisseau ép...</td>\n",
+              "      <td>\\nPRUNELLIER, s. m. () arbrisseau épineux qui ...</td>\n",
+              "      <td>\\nPRUNELLIER, s. m. () arbrisseau épineux qui ...</td>\n",
+              "      <td>275</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10064</th>\n",
+              "      <td>9</td>\n",
+              "      <td>3775</td>\n",
+              "      <td>MACQUE</td>\n",
+              "      <td>Economie rustique</td>\n",
+              "      <td>Econ. rustiq.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v9-2286-0</td>\n",
+              "      <td>economierustique</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nMACQUE, s. f. (Econ. rustiq.) instrument de\\...</td>\n",
+              "      <td>\\nMACQUE, s. f. () instrument de\\nbois dont on...</td>\n",
+              "      <td>\\nMACQUE, s. f. () instrument de\\nbois dont on...</td>\n",
+              "      <td>23</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5981</th>\n",
+              "      <td>9</td>\n",
+              "      <td>3262</td>\n",
+              "      <td>LOQUE</td>\n",
+              "      <td>Jardinage</td>\n",
+              "      <td>Jardinage.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v9-1905-0</td>\n",
+              "      <td>jardinage</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nLOQUE, s. f. (Jardinage.) terme de jardinage...</td>\n",
+              "      <td>\\nLOQUE, s. f. () terme de jardinage,\\nqui n'e...</td>\n",
+              "      <td>\\nLOQUE, s. f. () terme de jardinage,\\nqui n'e...</td>\n",
+              "      <td>61</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "         volume  ...  nb_word\n",
+              "0 10449      14  ...       70\n",
+              "  8134       17  ...       48\n",
+              "  5308       13  ...      275\n",
+              "  10064       9  ...       23\n",
+              "  5981        9  ...       61\n",
+              "\n",
+              "[5 rows x 13 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HuUVfklf-dSR"
+      },
+      "source": [
+        "## Training models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "NTNh6kMTp_eU",
+        "outputId": "3c1eb88c-7f1d-48f1-92bc-bc671f5e1bc1"
+      },
+      "source": [
+        "#https://github.com/emmanuellaanggi/disaster_tweet_sentiment/blob/master/(Medium)_Text_Classification_Disaster_Tweet_.ipynb\n",
+        "\n",
+        "raw_docs_train = df_train[columnText].tolist()\n",
+        "raw_docs_validation = df_validation[columnText].tolist() \n",
+        "\n",
+        "\n",
+        "print(\"pre-processing train data...\")\n",
+        "\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "processed_docs_train = []\n",
+        "for doc in tqdm(raw_docs_train):\n",
+        "    tokens = word_tokenize(doc, language='french')\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_train.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "processed_docs_validation = []\n",
+        "for doc in tqdm(raw_docs_validation):\n",
+        "    tokens = word_tokenize(doc)\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_validation.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "print(\"tokenizing input data...\")\n",
+        "tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
+        "tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
+        "word_seq_train = tokenizer.texts_to_sequences(processed_docs_train)\n",
+        "word_seq_validation = tokenizer.texts_to_sequences(processed_docs_validation)\n",
+        "word_index = tokenizer.word_index\n",
+        "print(\"dictionary size: \", len(word_index))\n",
+        "\n",
+        "#pad sequences\n",
+        "word_seq_train = sequence.pad_sequences(word_seq_train, maxlen=max_len)\n",
+        "word_seq_validation = sequence.pad_sequences(word_seq_validation, maxlen=max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "pre-processing train data...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 21129/21129 [00:15<00:00, 1359.31it/s]\n",
+            "100%|██████████| 10079/10079 [00:07<00:00, 1378.11it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tokenizing input data...\n",
+            "dictionary size:  95254\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Wj8RkOhT_e2c",
+        "outputId": "56152da7-47b7-4b07-84e7-8c499671d53e"
+      },
+      "source": [
+        "word_seq_validation"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([[  0,   0,   0, ..., 293,   8,   7],\n",
+              "       [  0,   0,   0, ..., 112,   8,   7],\n",
+              "       [  0,   0,   0, ..., 498, 212,   4],\n",
+              "       ...,\n",
+              "       [  0,   0,   0, ...,   1,  28,  45],\n",
+              "       [  0,   0,   0, ...,  67,  12, 460],\n",
+              "       [  0,   0,   0, ..., 188, 213,  37]], dtype=int32)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wGjQI0YgpQAS",
+        "outputId": "43a3d902-5a8d-4159-a21e-419b5ee35d7d"
+      },
+      "source": [
+        "#embedding matrix\n",
+        "\n",
+        "print('preparing embedding matrix...')\n",
+        "\n",
+        "words_not_found = []\n",
+        "nb_words = min(max_len, len(word_index)+1)\n",
+        "embedding_matrix = np.zeros((nb_words, embedding_dim))\n",
+        "\n",
+        "for word, i in word_index.items():\n",
+        "    if i >= nb_words:\n",
+        "        continue\n",
+        "    embedding_vector = embeddings_index.get(word)\n",
+        "    if (embedding_vector is not None) and len(embedding_vector) > 0:\n",
+        "        # words not found in embedding index will be all-zeros.\n",
+        "        embedding_matrix[i] = embedding_vector\n",
+        "    else:\n",
+        "        words_not_found.append(word)\n",
+        "print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "preparing embedding matrix...\n",
+            "number of null word embeddings: 70\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hjaeYIZCtGca",
+        "outputId": "5ab4dd1a-a500-479f-e289-892242c83de8"
+      },
+      "source": [
+        "print(\"sample words not found: \", np.random.choice(words_not_found, 10))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "sample words not found:  ['especes' \"d'argent\" \"d'où\" \"d'argent\" \"qu'elle\" \"qu'elle\" \"c'étoit\"\n",
+            " 'différens' 'faisoit' 'faisoit']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4O0gnsX8pNVU",
+        "outputId": "46feba64-b608-4b53-de15-b586dc24b880"
+      },
+      "source": [
+        "from keras.layers import BatchNormalization\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "model = tf.keras.Sequential()\n",
+        "\n",
+        "model.add(Embedding(nb_words,embedding_dim,input_length=max_len, weights=[embedding_matrix],trainable=False))\n",
+        "model.add(Bidirectional(LSTM(100)))\n",
+        "model.add(Dense(64,activation='relu'))\n",
+        "model.add(Dropout(0.2))\n",
+        "#model.add(Dense(numberOfClasses,activation='sigmoid'))\n",
+        "model.add(Dense(numberOfClasses,activation='softmax'))\n",
+        "model.summary()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Model: \"sequential\"\n",
+            "_________________________________________________________________\n",
+            " Layer (type)                Output Shape              Param #   \n",
+            "=================================================================\n",
+            " embedding (Embedding)       (None, 512, 300)          153600    \n",
+            "                                                                 \n",
+            " bidirectional (Bidirectiona  (None, 200)              320800    \n",
+            " l)                                                              \n",
+            "                                                                 \n",
+            " dense (Dense)               (None, 64)                12864     \n",
+            "                                                                 \n",
+            " dropout (Dropout)           (None, 64)                0         \n",
+            "                                                                 \n",
+            " dense_1 (Dense)             (None, 38)                2470      \n",
+            "                                                                 \n",
+            "=================================================================\n",
+            "Total params: 489,734\n",
+            "Trainable params: 336,134\n",
+            "Non-trainable params: 153,600\n",
+            "_________________________________________________________________\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GcfMJl8f-cBA"
+      },
+      "source": [
+        "\n",
+        "#model = NN_withEmbeddings(longueur_dict, embedding_dim, max_len, numberOfClasses)\n",
+        "\n",
+        "model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
+        "#model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.AUC(multi_label=True)])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OTQTH5VDuA3I",
+        "outputId": "b8286232-4938-4591-b483-6b6d1bdc015e"
+      },
+      "source": [
+        "#model.fit(padded, np.array(y_train), epochs=epochs, batch_size = batch_size) \n",
+        "model.fit(word_seq_train, y_train, batch_size=256, epochs=epochs, validation_data=(word_seq_validation, y_validation), shuffle=True)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/20\n",
+            "83/83 [==============================] - 530s 6s/step - loss: 3.0575 - accuracy: 0.1886 - val_loss: 2.2493 - val_accuracy: 0.4315\n",
+            "Epoch 2/20\n",
+            "83/83 [==============================] - 525s 6s/step - loss: 2.4420 - accuracy: 0.3559 - val_loss: 1.9674 - val_accuracy: 0.4978\n",
+            "Epoch 3/20\n",
+            "83/83 [==============================] - 538s 6s/step - loss: 2.1828 - accuracy: 0.4177 - val_loss: 1.8540 - val_accuracy: 0.5212\n",
+            "Epoch 4/20\n",
+            "83/83 [==============================] - 515s 6s/step - loss: 2.0359 - accuracy: 0.4555 - val_loss: 1.7155 - val_accuracy: 0.5439\n",
+            "Epoch 5/20\n",
+            "83/83 [==============================] - 533s 6s/step - loss: 1.9296 - accuracy: 0.4800 - val_loss: 1.6698 - val_accuracy: 0.5502\n",
+            "Epoch 6/20\n",
+            "83/83 [==============================] - 521s 6s/step - loss: 1.8527 - accuracy: 0.4990 - val_loss: 1.6268 - val_accuracy: 0.5634\n",
+            "Epoch 7/20\n",
+            "83/83 [==============================] - 517s 6s/step - loss: 1.7960 - accuracy: 0.5127 - val_loss: 1.6098 - val_accuracy: 0.5664\n",
+            "Epoch 8/20\n",
+            "83/83 [==============================] - 506s 6s/step - loss: 1.7429 - accuracy: 0.5213 - val_loss: 1.5687 - val_accuracy: 0.5741\n",
+            "Epoch 9/20\n",
+            "83/83 [==============================] - 524s 6s/step - loss: 1.6994 - accuracy: 0.5328 - val_loss: 1.5799 - val_accuracy: 0.5761\n",
+            "Epoch 10/20\n",
+            "83/83 [==============================] - 531s 6s/step - loss: 1.6568 - accuracy: 0.5426 - val_loss: 1.5366 - val_accuracy: 0.5874\n",
+            "Epoch 11/20\n",
+            "83/83 [==============================] - 515s 6s/step - loss: 1.6147 - accuracy: 0.5525 - val_loss: 1.5965 - val_accuracy: 0.5639\n",
+            "Epoch 12/20\n",
+            "83/83 [==============================] - 506s 6s/step - loss: 1.5833 - accuracy: 0.5601 - val_loss: 1.5263 - val_accuracy: 0.5880\n",
+            "Epoch 13/20\n",
+            "83/83 [==============================] - 505s 6s/step - loss: 1.5477 - accuracy: 0.5694 - val_loss: 1.5200 - val_accuracy: 0.5889\n",
+            "Epoch 14/20\n",
+            "83/83 [==============================] - 498s 6s/step - loss: 1.5119 - accuracy: 0.5776 - val_loss: 1.5272 - val_accuracy: 0.5887\n",
+            "Epoch 15/20\n",
+            "83/83 [==============================] - 500s 6s/step - loss: 1.4732 - accuracy: 0.5852 - val_loss: 1.5367 - val_accuracy: 0.5897\n",
+            "Epoch 16/20\n",
+            "83/83 [==============================] - 501s 6s/step - loss: 1.4471 - accuracy: 0.5914 - val_loss: 1.5411 - val_accuracy: 0.5832\n",
+            "Epoch 17/20\n",
+            "83/83 [==============================] - 501s 6s/step - loss: 1.4036 - accuracy: 0.6039 - val_loss: 1.5438 - val_accuracy: 0.5893\n",
+            "Epoch 18/20\n",
+            "83/83 [==============================] - 501s 6s/step - loss: 1.3778 - accuracy: 0.6075 - val_loss: 1.5547 - val_accuracy: 0.5825\n",
+            "Epoch 19/20\n",
+            "83/83 [==============================] - 502s 6s/step - loss: 1.3452 - accuracy: 0.6159 - val_loss: 1.5920 - val_accuracy: 0.5753\n",
+            "Epoch 20/20\n",
+            "83/83 [==============================] - 501s 6s/step - loss: 1.3247 - accuracy: 0.6223 - val_loss: 1.5850 - val_accuracy: 0.5773\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<keras.callbacks.History at 0x7f4269526a90>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 17
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Uw6YR76p_AF0"
+      },
+      "source": [
+        "## Saving models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ykTp9lyRaAma"
+      },
+      "source": [
+        "model.save(\"drive/MyDrive/Classification-EDdA/lstm_fasttext_s\"+str(maxOfInstancePerClass)+\".h5\")\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5J4xDoqRUSfS"
+      },
+      "source": [
+        "# save embeddings\n",
+        "\n",
+        "# saving embeddings index \n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HHlEtipG_Cp0"
+      },
+      "source": [
+        "## Loading models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fKt8ft1t_Cxx"
+      },
+      "source": [
+        "model = keras.models.load_model(\"drive/MyDrive/Classification-EDdA/lstm_fasttext_s\"+str(maxOfInstancePerClass)+\".h5\")\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zbS4poso-3k7"
+      },
+      "source": [
+        "## Evaluation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "G9pjdMdNW_KS"
+      },
+      "source": [
+        "predictions = model.predict(word_seq_validation)\n",
+        "predictions = np.argmax(predictions,axis=1)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "IHpVJ79IW_M0",
+        "outputId": "78e2a1aa-d35c-428c-e6c3-0ad332abcdfd"
+      },
+      "source": [
+        "report = classification_report(predictions, y_validation, output_dict = True)\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.5773390217283461 {'precision': 0.5977985581006744, 'recall': 0.5773390217283461, 'f1-score': 0.5808733866443131, 'support': 10079}\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "9SKjWffUW_PC"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LpgkGq-fW_RN"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4gGNaPY1iuXD"
+      },
+      "source": [
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "y_test = encoder.fit_transform(df_test[columnClass])\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "P67p7BUZiuZV",
+        "outputId": "f958a063-ee95-4157-fcd9-796991615f03"
+      },
+      "source": [
+        "raw_docs_test = df_test[columnText].tolist()\n",
+        "\n",
+        "print(\"pre-processing test data...\")\n",
+        "\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "processed_docs_test = []\n",
+        "for doc in tqdm(raw_docs_test):\n",
+        "    tokens = word_tokenize(doc, language='french')\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_test.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "print(\"tokenizing input data...\")\n",
+        "#tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
+        "#tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
+        "word_seq_test = tokenizer.texts_to_sequences(processed_docs_test)\n",
+        "\n",
+        "#pad sequences\n",
+        "word_seq_test = sequence.pad_sequences(word_seq_test, maxlen=max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "pre-processing test data...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 13137/13137 [00:09<00:00, 1317.07it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tokenizing input data...\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "czeIqlD5iudH"
+      },
+      "source": [
+        "predictions = model.predict(word_seq_test)\n",
+        "predictions = np.argmax(predictions,axis=1)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Q9eYqi5SW_Ta",
+        "outputId": "3682a42a-7c07-446e-d913-3d20640fb2bf"
+      },
+      "source": [
+        "report = classification_report(predictions, y_test, output_dict = True)\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.5957220065463956 {'precision': 0.6075119377257042, 'recall': 0.5957220065463956, 'f1-score': 0.59493432234528, 'support': 13137}\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ra4FOHVniwUI",
+        "outputId": "cbe576f6-ce14-49ef-9aba-2d26f76cab92"
+      },
+      "source": [
+        "from sklearn.metrics import confusion_matrix\n",
+        "\n",
+        "classesName = encoder.classes_\n",
+        "classes = [str(e) for e in encoder.transform(encoder.classes_)]\n",
+        "\n",
+        "precision = []\n",
+        "recall = []\n",
+        "f1 = []\n",
+        "support = []\n",
+        "dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+        "for c in classes:\n",
+        "  precision.append(report[c]['precision'])\n",
+        "  recall.append(report[c]['recall'])\n",
+        "  f1.append(report[c]['f1-score'])\n",
+        "  support.append(report[c]['support'])\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "\n",
+        "cnf_matrix = confusion_matrix(y_test, predictions)\n",
+        "FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+        "FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+        "TP = np.diag(cnf_matrix)\n",
+        "TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+        "\n",
+        "dff['className'] = classesName\n",
+        "dff['precision'] = precision\n",
+        "dff['recall'] = recall\n",
+        "dff['f1-score'] = f1\n",
+        "dff['support'] = support\n",
+        "dff['FP'] = FP\n",
+        "dff['FN'] = FN\n",
+        "dff['TP'] = TP\n",
+        "dff['TN'] = TN\n",
+        "\n",
+        "print(\"test_lstm_s\"+str(maxOfInstancePerClass))\n",
+        "\n",
+        "print(weighted_avg)\n",
+        "print(accuracy)\n",
+        "print(dff)\n",
+        "\n",
+        "dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_test_lstm_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "test_lstm_s1500\n",
+            "{'precision': 0.6075119377257042, 'recall': 0.5957220065463956, 'f1-score': 0.59493432234528, 'support': 13137}\n",
+            "0.5957220065463956\n",
+            "                                      className  precision  ...    TP     TN\n",
+            "0               Agriculture - Economie rustique   0.259843  ...    66  12780\n",
+            "1                                      Anatomie   0.446429  ...   100  12818\n",
+            "2                                     Antiquité   0.525316  ...   166  12425\n",
+            "3                                  Architecture   0.518868  ...   165  12597\n",
+            "4                               Arts et métiers   0.007752  ...     1  13002\n",
+            "5                                    Beaux-arts   0.020000  ...     2  13016\n",
+            "6                       Belles-lettres - Poésie   0.200000  ...    47  12667\n",
+            "7                                        Blason   0.466667  ...    49  12908\n",
+            "8                                    Caractères   0.074074  ...     2  13110\n",
+            "9                                        Chasse   0.262295  ...    32  12929\n",
+            "10                                       Chimie   0.348214  ...    39  12952\n",
+            "11                                     Commerce   0.524249  ...   227  12442\n",
+            "12                        Droit - Jurisprudence   0.750176  ...  1063  11473\n",
+            "13                          Economie domestique   0.000000  ...     0  13106\n",
+            "14                                    Grammaire   0.587500  ...   329  12094\n",
+            "15                                   Géographie   0.830753  ...  2361  10167\n",
+            "16                                     Histoire   0.459916  ...   327  11749\n",
+            "17                           Histoire naturelle   0.687835  ...   769  11871\n",
+            "18                                          Jeu   0.415385  ...    27  13034\n",
+            "19                                       Marine   0.708046  ...   308  12497\n",
+            "20                           Maréchage - Manège   0.784483  ...    91  12991\n",
+            "21                                Mathématiques   0.450331  ...    68  12922\n",
+            "22                                       Mesure   0.333333  ...    14  13078\n",
+            "23              Militaire (Art) - Guerre - Arme   0.510135  ...   151  12719\n",
+            "24                                  Minéralogie   0.000000  ...     0  13111\n",
+            "25                                      Monnaie   0.041096  ...     3  13057\n",
+            "26                                      Musique   0.525000  ...    84  12922\n",
+            "27                                    Médailles   0.000000  ...     0  13109\n",
+            "28                         Médecine - Chirurgie   0.584795  ...   300  12279\n",
+            "29                                      Métiers   0.592378  ...   715  11248\n",
+            "30                                    Pharmacie   0.014085  ...     1  13065\n",
+            "31                                  Philosophie   0.160714  ...    18  12934\n",
+            "32  Physique - [Sciences physico-mathématiques]   0.533784  ...   158  12690\n",
+            "33                                    Politique   0.000000  ...     0  13111\n",
+            "34                                        Pêche   0.127660  ...     6  13067\n",
+            "35                                     Religion   0.357702  ...   137  12580\n",
+            "36                                    Spectacle   0.000000  ...     0  13126\n",
+            "37                                 Superstition   0.000000  ...     0  13112\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "x03FC0D-iwWP"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gSVqcywgiwYH"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-T5LfFtwiwaV"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Yjd5c70_iwcY"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2UNjiHYliwes"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vLGTnit_W_V8"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "R-3lBXjDD9wE"
+      },
+      "source": [
+        "def predict(data, max_len):\n",
+        "  \n",
+        "  pad_sequ_test, _ = prepare_sequence(data, max_len)\n",
+        "  pred_labels_ = model.predict(pad_sequ_test)\n",
+        "\n",
+        "  return np.argmax(pred_labels_,axis=1)\n",
+        "\n",
+        "\n",
+        "def eval(data, labels, max_len):\n",
+        "  \n",
+        "  pred_labels_ = predict(data, max_len)\n",
+        "  report = classification_report(pred_labels_, labels, output_dict = True)\n",
+        "\n",
+        "  accuracy = report['accuracy']\n",
+        "  weighted_avg = report['weighted avg']\n",
+        "  \n",
+        "  print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6T3kAvKvExgc",
+        "outputId": "c6d4560e-fc64-4579-9adb-79c2e36d2386"
+      },
+      "source": [
+        "# evaluation sur le jeu de validation\n",
+        "eval(df_validation[columnText], y_validation, max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+            "  return np.array(self.texts_to_sequences(texts))\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "0.06925290207361841 {'precision': 0.09108131158125257, 'recall': 0.06925290207361841, 'f1-score': 0.06099084715237025, 'support': 10079}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pTDJA03_-8yu",
+        "outputId": "d8bcdf73-c4c3-4c88-b063-90bd1cad5122"
+      },
+      "source": [
+        "# evaluation sur le jeu de test\n",
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "#df_test = resample_classes(df_test, columnClass, maxOfInstancePerClass)\n",
+        "\n",
+        "y_test = df_test[columnClass]\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "y_test = encoder.fit_transform(y_test)\n",
+        "\n",
+        "eval(df_test[columnText], y_test, max_len)\n"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+            "  return np.array(self.texts_to_sequences(texts))\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "0.07231483595950369 {'precision': 0.081194635559303, 'recall': 0.07231483595950369, 'f1-score': 0.06322383877903374, 'support': 13137}\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/notebooks/EDdA_Classification_DeepLearning_2.ipynb b/notebooks/EDdA_Classification_DeepLearning_2.ipynb
new file mode 100644
index 0000000..444fc9a
--- /dev/null
+++ b/notebooks/EDdA_Classification_DeepLearning_2.ipynb
@@ -0,0 +1,1349 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "EDdA-Classification_DeepLearning_2.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0yFsoHXX8Iyy"
+      },
+      "source": [
+        "# Deep learning for EDdA classification"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EyksTV6277Jv"
+      },
+      "source": [
+        "## Configuration"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "G5LT5n9O7SLt"
+      },
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "validation_path = 'validation_set.tsv'\n",
+        "test_path =  'test_set.tsv'\n",
+        "\n",
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "minOfInstancePerClass = 0\n",
+        "maxOfInstancePerClass = 10000\n",
+        "\n",
+        "\n",
+        "batch_size = 64\n",
+        "max_len = 512 # \n",
+        "epochs = 20\n",
+        "embedding_dim = 300 "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tFlUCDL2778i"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Sp8d_Uus7SHJ",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "20e599da-b04f-4ed9-95b0-ce22c094eff0"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jQBu-p6hBU-j"
+      },
+      "source": [
+        "## Install packages"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bTIXsF6kBUdh"
+      },
+      "source": [
+        "#!pip install zeugma\n",
+        "#!pip install plot_model"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "56-04SNF8BMx"
+      },
+      "source": [
+        "## Import librairies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "HwWkSznz7SEv",
+        "outputId": "046fd487-180e-4c50-ae33-d5ccc122ef46"
+      },
+      "source": [
+        "from nltk.tokenize import word_tokenize\n",
+        "import nltk\n",
+        "from nltk.corpus import stopwords\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')\n",
+        "\n",
+        "import keras\n",
+        "from keras import optimizers\n",
+        "from keras import backend as K\n",
+        "from keras import regularizers\n",
+        "from keras.models import Sequential\n",
+        "from keras.layers import Dense, Activation, Dropout, Flatten\n",
+        "from keras.layers import Embedding, Conv1D, MaxPooling1D, GlobalMaxPooling1D\n",
+        "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional\n",
+        "#from keras.utils import plot_model\n",
+        "from keras.preprocessing import sequence\n",
+        "from keras.preprocessing.text import Tokenizer\n",
+        "from keras.callbacks import EarlyStopping\n",
+        "\n",
+        "import string\n",
+        "import tensorflow as tf\n",
+        "#from zeugma import TextsToSequences\n",
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "from sklearn import preprocessing\n",
+        "from sklearn.metrics import classification_report\n",
+        "\n",
+        "\n",
+        "\n",
+        "from tqdm import tqdm\n",
+        "import requests, zipfile, io\n",
+        "import os, re, csv, math, codecs"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xrekV6W978l4"
+      },
+      "source": [
+        "## Utils functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4LJ5blQR7PUe"
+      },
+      "source": [
+        "\n",
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "  #random numberOfInstances elements\n",
+        "  replace = False  # with replacement\n",
+        "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "    \n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-Rh3JMDh7zYd"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MtLr35eM753e"
+      },
+      "source": [
+        "## Load Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FnbNT4NF7zal"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "WNqDms64lfaS"
+      },
+      "source": [
+        "# download FastText\n",
+        "zip_file_url = \"https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip\"\n",
+        "r = requests.get(zip_file_url)\n",
+        "z = zipfile.ZipFile(io.BytesIO(r.content))\n",
+        "z.extractall()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PGMIi0CAmqSd",
+        "outputId": "f7f16180-fc1d-4163-c10b-0e7cae00b701"
+      },
+      "source": [
+        "print('loading word embeddings...')\n",
+        "\n",
+        "embeddings_index = {}\n",
+        "f = codecs.open('crawl-300d-2M.vec', encoding='utf-8')\n",
+        "\n",
+        "for line in tqdm(f):\n",
+        "    values = line.rstrip().rsplit(' ')\n",
+        "    word = values[0]\n",
+        "    coefs = np.asarray(values[1:], dtype='float32')\n",
+        "    embeddings_index[word] = coefs\n",
+        "f.close()\n",
+        "\n",
+        "print('found %s word vectors' % len(embeddings_index))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "loading word embeddings...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "1999996it [03:42, 9002.96it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "found 1999996 word vectors\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nRLaQUO97zcq"
+      },
+      "source": [
+        "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+        "df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)\n",
+        "\n",
+        "df_validation = pd.read_csv(validation_path, sep=\"\\t\")\n",
+        "df_validation = resample_classes(df_validation, columnClass, maxOfInstancePerClass)\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vGWAgBH87ze8"
+      },
+      "source": [
+        "y_train  = df_train[columnClass]\n",
+        "y_validation = df_validation[columnClass]\n",
+        "numberOfClasses = y_train.nunique()\n",
+        "\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "\n",
+        "y_train = encoder.fit_transform(y_train)\n",
+        "y_validation = encoder.fit_transform(y_validation)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "7OYjo_uhoqcX",
+        "outputId": "17cccba3-2878-4cf0-e86c-33a20510f0a4"
+      },
+      "source": [
+        "df_validation.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>normClass</th>\n",
+              "      <th>classEDdA</th>\n",
+              "      <th>author</th>\n",
+              "      <th>id_enccre</th>\n",
+              "      <th>domaine_enccre</th>\n",
+              "      <th>ensemble_domaine_enccre</th>\n",
+              "      <th>content</th>\n",
+              "      <th>contentWithoutClass</th>\n",
+              "      <th>firstParagraph</th>\n",
+              "      <th>nb_word</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>3</td>\n",
+              "      <td>3723</td>\n",
+              "      <td>Condition de Droit ou légale</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v3-1814-8</td>\n",
+              "      <td>jurisprudence</td>\n",
+              "      <td>Droit - Jurisprudence</td>\n",
+              "      <td>\\nCondition de Droit ou légale, est celle que\\...</td>\n",
+              "      <td>\\nCondition de Droit ou légale, est celle que\\...</td>\n",
+              "      <td>\\nCondition de Droit ou légale, est celle que\\...</td>\n",
+              "      <td>72</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>10</td>\n",
+              "      <td>177</td>\n",
+              "      <td>MANIER</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>Gramm.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v10-112-0</td>\n",
+              "      <td>grammaire</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nMANIER, v. act. (Gramm.) c'est ou toucher de...</td>\n",
+              "      <td>\\nMANIER, v. act. () c'est ou toucher de\\nla m...</td>\n",
+              "      <td>\\nMANIER, v. act. () c'est ou toucher de\\nla m...</td>\n",
+              "      <td>109</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>7</td>\n",
+              "      <td>1357</td>\n",
+              "      <td>GALAIQUE, galaïcos</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "      <td>Hist. nat.</td>\n",
+              "      <td>d'Holbach5</td>\n",
+              "      <td>v7-606-0</td>\n",
+              "      <td>histoirenaturelle</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "      <td>\\nGALAIQUE, galaïcos, s. f. (Hist. nat.) nom d...</td>\n",
+              "      <td>\\nGALAIQUE, galaïcos, s. f. () nom donné \\npar...</td>\n",
+              "      <td>\\nGALAIQUE, galaïcos, s. f. () nom donné \\npar...</td>\n",
+              "      <td>33</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>3</td>\n",
+              "      <td>3198</td>\n",
+              "      <td>Commis ambulant</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v3-1623-2</td>\n",
+              "      <td>commerce</td>\n",
+              "      <td>Commerce</td>\n",
+              "      <td>\\nCommis ambulant, est un commis dont l'emploi...</td>\n",
+              "      <td>\\nCommis ambulant, est un commis dont l'emploi...</td>\n",
+              "      <td>\\nCommis ambulant, est un commis dont l'emploi...</td>\n",
+              "      <td>43</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>17</td>\n",
+              "      <td>3047</td>\n",
+              "      <td>ZURMENTUM</td>\n",
+              "      <td>Géographie ancienne</td>\n",
+              "      <td>Géog. anc.</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>v17-2047-0</td>\n",
+              "      <td>géographie</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\nZURMENTUM, (Géog. anc.) ville de l'Afrique\\n...</td>\n",
+              "      <td>\\nZURMENTUM, () ville de l'Afrique\\npropre. Pt...</td>\n",
+              "      <td>\\nZURMENTUM, () ville de l'Afrique\\npropre. Pt...</td>\n",
+              "      <td>27</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   volume  numero  ...                                     firstParagraph nb_word\n",
+              "0       3    3723  ...  \\nCondition de Droit ou légale, est celle que\\...      72\n",
+              "1      10     177  ...  \\nMANIER, v. act. () c'est ou toucher de\\nla m...     109\n",
+              "2       7    1357  ...  \\nGALAIQUE, galaïcos, s. f. () nom donné \\npar...      33\n",
+              "3       3    3198  ...  \\nCommis ambulant, est un commis dont l'emploi...      43\n",
+              "4      17    3047  ...  \\nZURMENTUM, () ville de l'Afrique\\npropre. Pt...      27\n",
+              "\n",
+              "[5 rows x 13 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 60
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HuUVfklf-dSR"
+      },
+      "source": [
+        "## Training models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "NTNh6kMTp_eU",
+        "outputId": "1ff499d7-a98e-47f9-815e-cbb13b5f307f"
+      },
+      "source": [
+        "#https://github.com/emmanuellaanggi/disaster_tweet_sentiment/blob/master/(Medium)_Text_Classification_Disaster_Tweet_.ipynb\n",
+        "\n",
+        "raw_docs_train = df_train[columnText].tolist()\n",
+        "raw_docs_validation = df_validation[columnText].tolist() \n",
+        "\n",
+        "\n",
+        "print(\"pre-processing train data...\")\n",
+        "\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "processed_docs_train = []\n",
+        "for doc in tqdm(raw_docs_train):\n",
+        "    tokens = word_tokenize(doc, language='french')\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_train.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "processed_docs_validation = []\n",
+        "for doc in tqdm(raw_docs_validation):\n",
+        "    tokens = word_tokenize(doc)\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_validation.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "print(\"tokenizing input data...\")\n",
+        "tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
+        "tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
+        "word_seq_train = tokenizer.texts_to_sequences(processed_docs_train)\n",
+        "word_seq_validation = tokenizer.texts_to_sequences(processed_docs_validation)\n",
+        "word_index = tokenizer.word_index\n",
+        "print(\"dictionary size: \", len(word_index))\n",
+        "\n",
+        "#pad sequences\n",
+        "word_seq_train = sequence.pad_sequences(word_seq_train, maxlen=max_len)\n",
+        "word_seq_validation = sequence.pad_sequences(word_seq_validation, maxlen=max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "pre-processing train data...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 30650/30650 [00:23<00:00, 1324.19it/s]\n",
+            "100%|██████████| 10947/10947 [00:08<00:00, 1355.66it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tokenizing input data...\n",
+            "dictionary size:  115205\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Wj8RkOhT_e2c",
+        "outputId": "7f486466-bf76-4b82-ed32-56c31ae6dc2f"
+      },
+      "source": [
+        "word_seq_validation"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([[  0,   0,   0, ...,   9,  64, 116],\n",
+              "       [  0,   0,   0, ..., 301,  57, 313],\n",
+              "       [  0,   0,   0, ...,   9, 285,   6],\n",
+              "       ...,\n",
+              "       [  0,   0,   0, ...,  26, 142,   6],\n",
+              "       [  0,   0,   0, ..., 333, 198,   2],\n",
+              "       [  0,   0,   0, ...,  24, 335,   1]], dtype=int32)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 62
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wGjQI0YgpQAS",
+        "outputId": "b2856dc6-793f-491e-8a90-bd5553f71933"
+      },
+      "source": [
+        "#embedding matrix\n",
+        "\n",
+        "print('preparing embedding matrix...')\n",
+        "\n",
+        "words_not_found = []\n",
+        "nb_words = min(max_len, len(word_index)+1)\n",
+        "embedding_matrix = np.zeros((nb_words, embedding_dim))\n",
+        "\n",
+        "for word, i in word_index.items():\n",
+        "    if i >= nb_words:\n",
+        "        continue\n",
+        "    embedding_vector = embeddings_index.get(word)\n",
+        "    if (embedding_vector is not None) and len(embedding_vector) > 0:\n",
+        "        # words not found in embedding index will be all-zeros.\n",
+        "        embedding_matrix[i] = embedding_vector\n",
+        "    else:\n",
+        "        words_not_found.append(word)\n",
+        "print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "preparing embedding matrix...\n",
+            "number of null word embeddings: 73\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hjaeYIZCtGca",
+        "outputId": "3ce480ec-21fa-4a94-f21d-586fd44c51bf"
+      },
+      "source": [
+        "print(\"sample words not found: \", np.random.choice(words_not_found, 10))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "sample words not found:  ['ptolomée' \"l'amérique\" \"l'une\" \"qu'on\" \"lorsqu'il\" \"aujourd'hui\"\n",
+            " \"c'étoit\" \"qu'elle\" \"l'une\" 'lieues']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4O0gnsX8pNVU",
+        "outputId": "28807df5-3c6f-4b62-fe32-a8ae250ddb7b"
+      },
+      "source": [
+        "from keras.layers import BatchNormalization\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "model = tf.keras.Sequential()\n",
+        "\n",
+        "model.add(Embedding(nb_words,embedding_dim,input_length=max_len, weights=[embedding_matrix],trainable=False))\n",
+        "#model.add(Bidirectional(LSTM(100)))\n",
+        "model.add(Conv1D(64,5,activation='relu'))\n",
+        "model.add(MaxPooling1D(pool_size=(max_len - 5 + 1)))\n",
+        "model.add(Flatten())\n",
+        "model.add(Dense(numberOfClasses,activation='softmax'))\n",
+        "model.summary()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Model: \"sequential_2\"\n",
+            "_________________________________________________________________\n",
+            " Layer (type)                Output Shape              Param #   \n",
+            "=================================================================\n",
+            " embedding_2 (Embedding)     (None, 512, 300)          153600    \n",
+            "                                                                 \n",
+            " conv1d_2 (Conv1D)           (None, 508, 64)           96064     \n",
+            "                                                                 \n",
+            " max_pooling1d_2 (MaxPooling  (None, 1, 64)            0         \n",
+            " 1D)                                                             \n",
+            "                                                                 \n",
+            " flatten_2 (Flatten)         (None, 64)                0         \n",
+            "                                                                 \n",
+            " dense_2 (Dense)             (None, 38)                2470      \n",
+            "                                                                 \n",
+            "=================================================================\n",
+            "Total params: 252,134\n",
+            "Trainable params: 98,534\n",
+            "Non-trainable params: 153,600\n",
+            "_________________________________________________________________\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GcfMJl8f-cBA"
+      },
+      "source": [
+        "\n",
+        "#model = NN_withEmbeddings(longueur_dict, embedding_dim, max_len, numberOfClasses)\n",
+        "\n",
+        "model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
+        "#model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.AUC(multi_label=True)])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OTQTH5VDuA3I",
+        "outputId": "f01b4a29-6599-49b0-b1ed-52d241a68b19"
+      },
+      "source": [
+        "#model.fit(padded, np.array(y_train), epochs=epochs, batch_size = batch_size) \n",
+        "model.fit(word_seq_train, y_train, batch_size=256, epochs=epochs, validation_data=(word_seq_validation, y_validation), shuffle=True)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 2.4656 - accuracy: 0.3793 - val_loss: 2.1042 - val_accuracy: 0.4652\n",
+            "Epoch 2/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.9110 - accuracy: 0.5068 - val_loss: 1.8333 - val_accuracy: 0.5262\n",
+            "Epoch 3/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.6637 - accuracy: 0.5682 - val_loss: 1.6986 - val_accuracy: 0.5556\n",
+            "Epoch 4/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.5183 - accuracy: 0.6033 - val_loss: 1.6377 - val_accuracy: 0.5657\n",
+            "Epoch 5/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.4169 - accuracy: 0.6247 - val_loss: 1.5928 - val_accuracy: 0.5782\n",
+            "Epoch 6/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.3342 - accuracy: 0.6436 - val_loss: 1.5676 - val_accuracy: 0.5847\n",
+            "Epoch 7/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.2657 - accuracy: 0.6595 - val_loss: 1.5651 - val_accuracy: 0.5860\n",
+            "Epoch 8/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.2061 - accuracy: 0.6747 - val_loss: 1.5505 - val_accuracy: 0.5917\n",
+            "Epoch 9/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.1518 - accuracy: 0.6897 - val_loss: 1.5586 - val_accuracy: 0.5873\n",
+            "Epoch 10/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.1022 - accuracy: 0.7027 - val_loss: 1.5791 - val_accuracy: 0.5850\n",
+            "Epoch 11/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.0543 - accuracy: 0.7150 - val_loss: 1.5675 - val_accuracy: 0.5873\n",
+            "Epoch 12/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.0111 - accuracy: 0.7260 - val_loss: 1.5801 - val_accuracy: 0.5852\n",
+            "Epoch 13/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 0.9718 - accuracy: 0.7358 - val_loss: 1.5925 - val_accuracy: 0.5855\n",
+            "Epoch 14/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 0.9371 - accuracy: 0.7463 - val_loss: 1.5984 - val_accuracy: 0.5864\n",
+            "Epoch 15/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 0.9032 - accuracy: 0.7556 - val_loss: 1.6136 - val_accuracy: 0.5816\n",
+            "Epoch 16/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 0.8684 - accuracy: 0.7655 - val_loss: 1.6376 - val_accuracy: 0.5775\n",
+            "Epoch 17/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 0.8394 - accuracy: 0.7744 - val_loss: 1.6575 - val_accuracy: 0.5781\n",
+            "Epoch 18/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 0.8105 - accuracy: 0.7831 - val_loss: 1.6596 - val_accuracy: 0.5779\n",
+            "Epoch 19/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 0.7826 - accuracy: 0.7910 - val_loss: 1.6774 - val_accuracy: 0.5741\n",
+            "Epoch 20/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 0.7560 - accuracy: 0.7996 - val_loss: 1.6946 - val_accuracy: 0.5727\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<keras.callbacks.History at 0x7f6ca8a6d890>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 67
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Uw6YR76p_AF0"
+      },
+      "source": [
+        "## Saving models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ykTp9lyRaAma"
+      },
+      "source": [
+        "model.save(\"drive/MyDrive/Classification-EDdA/cnn_fasttext_s\"+str(maxOfInstancePerClass)+\".h5\")\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5J4xDoqRUSfS"
+      },
+      "source": [
+        "# save embeddings\n",
+        "\n",
+        "# saving embeddings index \n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HHlEtipG_Cp0"
+      },
+      "source": [
+        "## Loading models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fKt8ft1t_Cxx"
+      },
+      "source": [
+        "model = keras.models.load_model(\"drive/MyDrive/Classification-EDdA/cnn_fasttext_s\"+str(maxOfInstancePerClass)+\".h5\")\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zbS4poso-3k7"
+      },
+      "source": [
+        "## Evaluation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "G9pjdMdNW_KS"
+      },
+      "source": [
+        "predictions = model.predict(word_seq_validation)\n",
+        "predictions = np.argmax(predictions,axis=1)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "IHpVJ79IW_M0",
+        "outputId": "2e1657b3-04d1-42f1-ea8b-9bbcd4744108"
+      },
+      "source": [
+        "report = classification_report(predictions, y_validation, output_dict = True)\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.5726683109527725 {'precision': 0.6118028288513718, 'recall': 0.5726683109527725, 'f1-score': 0.5870482221489528, 'support': 10947}\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "9SKjWffUW_PC"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LpgkGq-fW_RN"
+      },
+      "source": [
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "y_test = encoder.fit_transform(df_test[columnClass])\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Q9eYqi5SW_Ta",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "31e45f20-583a-4ca6-eac8-21863f6fef5b"
+      },
+      "source": [
+        "raw_docs_test = df_test[columnText].tolist()\n",
+        "\n",
+        "print(\"pre-processing test data...\")\n",
+        "\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "processed_docs_test = []\n",
+        "for doc in tqdm(raw_docs_test):\n",
+        "    tokens = word_tokenize(doc, language='french')\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_test.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "print(\"tokenizing input data...\")\n",
+        "#tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
+        "#tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
+        "word_seq_test = tokenizer.texts_to_sequences(processed_docs_test)\n",
+        "\n",
+        "#pad sequences\n",
+        "word_seq_test = sequence.pad_sequences(word_seq_test, maxlen=max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "pre-processing test data...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 13137/13137 [00:09<00:00, 1331.48it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tokenizing input data...\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_WjpJN-Bqjeb"
+      },
+      "source": [
+        "predictions = model.predict(word_seq_test)\n",
+        "predictions = np.argmax(predictions,axis=1)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "zUwjL_dQqjgx",
+        "outputId": "912642ad-95eb-413a-d074-8d4881a57359"
+      },
+      "source": [
+        "report = classification_report(predictions, y_test, output_dict = True)\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.5698409073608891 {'precision': 0.6081680700148677, 'recall': 0.5698409073608891, 'f1-score': 0.5847417616022411, 'support': 13137}\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ka6DcPe7qqvg",
+        "outputId": "0c8cfbe6-178d-4208-98ba-4ba688e32939"
+      },
+      "source": [
+        "from sklearn.metrics import confusion_matrix\n",
+        "\n",
+        "classesName = encoder.classes_\n",
+        "classes = [str(e) for e in encoder.transform(encoder.classes_)]\n",
+        "\n",
+        "precision = []\n",
+        "recall = []\n",
+        "f1 = []\n",
+        "support = []\n",
+        "dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+        "for c in classes:\n",
+        "  precision.append(report[c]['precision'])\n",
+        "  recall.append(report[c]['recall'])\n",
+        "  f1.append(report[c]['f1-score'])\n",
+        "  support.append(report[c]['support'])\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "\n",
+        "cnf_matrix = confusion_matrix(y_test, predictions)\n",
+        "FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+        "FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+        "TP = np.diag(cnf_matrix)\n",
+        "TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+        "\n",
+        "dff['className'] = classesName\n",
+        "dff['precision'] = precision\n",
+        "dff['recall'] = recall\n",
+        "dff['f1-score'] = f1\n",
+        "dff['support'] = support\n",
+        "dff['FP'] = FP\n",
+        "dff['FN'] = FN\n",
+        "dff['TP'] = TP\n",
+        "dff['TN'] = TN\n",
+        "\n",
+        "print(\"test_cnn_s\"+str(maxOfInstancePerClass))\n",
+        "\n",
+        "print(weighted_avg)\n",
+        "print(accuracy)\n",
+        "print(dff)\n",
+        "\n",
+        "dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_test_cnn_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "test_cnn_s10000\n",
+            "{'precision': 0.6081680700148677, 'recall': 0.5698409073608891, 'f1-score': 0.5847417616022411, 'support': 13137}\n",
+            "0.5698409073608891\n",
+            "                                      className  precision  ...    TP     TN\n",
+            "0               Agriculture - Economie rustique   0.216535  ...    55  12636\n",
+            "1                                      Anatomie   0.459821  ...   103  12768\n",
+            "2                                     Antiquité   0.287975  ...    91  12710\n",
+            "3                                  Architecture   0.339623  ...   108  12722\n",
+            "4                               Arts et métiers   0.015504  ...     2  12995\n",
+            "5                                    Beaux-arts   0.060000  ...     6  13018\n",
+            "6                       Belles-lettres - Poésie   0.127660  ...    30  12761\n",
+            "7                                        Blason   0.228571  ...    24  12993\n",
+            "8                                    Caractères   0.037037  ...     1  13110\n",
+            "9                                        Chasse   0.221311  ...    27  12962\n",
+            "10                                       Chimie   0.160714  ...    18  12991\n",
+            "11                                     Commerce   0.443418  ...   192  12490\n",
+            "12                        Droit - Jurisprudence   0.762879  ...  1081  11263\n",
+            "13                          Economie domestique   0.000000  ...     0  13102\n",
+            "14                                    Grammaire   0.408929  ...   229  12254\n",
+            "15                                   Géographie   0.917312  ...  2607   9910\n",
+            "16                                     Histoire   0.405063  ...   288  11777\n",
+            "17                           Histoire naturelle   0.743292  ...   831  11661\n",
+            "18                                          Jeu   0.061538  ...     4  13067\n",
+            "19                                       Marine   0.590805  ...   257  12549\n",
+            "20                           Maréchage - Manège   0.620690  ...    72  13001\n",
+            "21                                Mathématiques   0.549669  ...    83  12903\n",
+            "22                                       Mesure   0.095238  ...     4  13087\n",
+            "23              Militaire (Art) - Guerre - Arme   0.476351  ...   141  12704\n",
+            "24                                  Minéralogie   0.000000  ...     0  13111\n",
+            "25                                      Monnaie   0.054795  ...     4  13051\n",
+            "26                                      Musique   0.287500  ...    46  12904\n",
+            "27                                    Médailles   0.000000  ...     0  13107\n",
+            "28                         Médecine - Chirurgie   0.376218  ...   193  12149\n",
+            "29                                      Métiers   0.605634  ...   731  11047\n",
+            "30                                    Pharmacie   0.070423  ...     5  13045\n",
+            "31                                  Philosophie   0.071429  ...     8  12996\n",
+            "32  Physique - [Sciences physico-mathématiques]   0.378378  ...   112  12674\n",
+            "33                                    Politique   0.000000  ...     0  13110\n",
+            "34                                        Pêche   0.170213  ...     8  13069\n",
+            "35                                     Religion   0.326371  ...   125  12488\n",
+            "36                                    Spectacle   0.000000  ...     0  13121\n",
+            "37                                 Superstition   0.000000  ...     0  13112\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "BqJ1_hUUqqx5"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bhfuGNwIqrOQ"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NkL3MopyqrQk"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XLHl-pvzqjjI"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lLR_Xvi9qjlo"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8cGcLOFTqjoP"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vLGTnit_W_V8"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "R-3lBXjDD9wE"
+      },
+      "source": [
+        "def predict(data, max_len):\n",
+        "  \n",
+        "  pad_sequ_test, _ = prepare_sequence(data, max_len)\n",
+        "  pred_labels_ = model.predict(pad_sequ_test)\n",
+        "\n",
+        "  return np.argmax(pred_labels_,axis=1)\n",
+        "\n",
+        "\n",
+        "def eval(data, labels, max_len):\n",
+        "  \n",
+        "  pred_labels_ = predict(data, max_len)\n",
+        "  report = classification_report(pred_labels_, labels, output_dict = True)\n",
+        "\n",
+        "  accuracy = report['accuracy']\n",
+        "  weighted_avg = report['weighted avg']\n",
+        "  \n",
+        "  print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6T3kAvKvExgc",
+        "outputId": "c6d4560e-fc64-4579-9adb-79c2e36d2386"
+      },
+      "source": [
+        "# evaluation sur le jeu de validation\n",
+        "eval(df_validation[columnText], y_validation, max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+            "  return np.array(self.texts_to_sequences(texts))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.06925290207361841 {'precision': 0.09108131158125257, 'recall': 0.06925290207361841, 'f1-score': 0.06099084715237025, 'support': 10079}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pTDJA03_-8yu",
+        "outputId": "d8bcdf73-c4c3-4c88-b063-90bd1cad5122"
+      },
+      "source": [
+        "# evaluation sur le jeu de test\n",
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "#df_test = resample_classes(df_test, columnClass, maxOfInstancePerClass)\n",
+        "\n",
+        "y_test = df_test[columnClass]\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "y_test = encoder.fit_transform(y_test)\n",
+        "\n",
+        "eval(df_test[columnText], y_test, max_len)\n"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+            "  return np.array(self.texts_to_sequences(texts))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.07231483595950369 {'precision': 0.081194635559303, 'recall': 0.07231483595950369, 'f1-score': 0.06322383877903374, 'support': 13137}\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/notebooks/EDdA_Classification_Generate_ConfusionMatrix.ipynb b/notebooks/EDdA_Classification_Generate_ConfusionMatrix.ipynb
new file mode 100644
index 0000000..14a33f7
--- /dev/null
+++ b/notebooks/EDdA_Classification_Generate_ConfusionMatrix.ipynb
@@ -0,0 +1,1181 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "EDdA-Classification_Generate_ConfusionMatrix.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "F-x2Ei_TdhSs"
+      },
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "validation_path = 'validation_set.tsv'\n",
+        "test_path =  'test_set.tsv'\n",
+        "\n",
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "minOfInstancePerClass = 0\n",
+        "maxOfInstancePerClass = 10000"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "En632UWohZBW"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WoNGyMbFdsh1",
+        "outputId": "c5542219-0412-4e16-9779-122d5f99a1e2"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1LXBuRs9kOOc",
+        "outputId": "1f5fe407-4a46-4b96-8124-1a0c334616df"
+      },
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import pickle\n",
+        "import matplotlib.pyplot as plt\n",
+        "from sklearn.metrics import plot_confusion_matrix\n",
+        "\n",
+        "from nltk.stem.snowball import SnowballStemmer\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.tokenize import word_tokenize\n",
+        "import nltk\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 4
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FNPXtQ19kbco"
+      },
+      "source": [
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "  #random numberOfInstances elements\n",
+        "  replace = False  # with replacement\n",
+        "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "    "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jHyc3VeFhrxs"
+      },
+      "source": [
+        "## Load data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "8-1HRF3Vhr3y",
+        "outputId": "bd5f5881-363f-41a9-ade7-33bbd1158adb"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "--2021-11-26 08:17:56--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 189925180 (181M) [text/tab-separated-values]\n",
+            "Saving to: ‘training_set.tsv’\n",
+            "\n",
+            "training_set.tsv    100%[===================>] 181.13M  31.9MB/s    in 6.3s    \n",
+            "\n",
+            "2021-11-26 08:18:02 (28.9 MB/s) - ‘training_set.tsv’ saved [189925180/189925180]\n",
+            "\n",
+            "--2021-11-26 08:18:03--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 67474385 (64M) [text/tab-separated-values]\n",
+            "Saving to: ‘validation_set.tsv’\n",
+            "\n",
+            "validation_set.tsv  100%[===================>]  64.35M  24.4MB/s    in 2.6s    \n",
+            "\n",
+            "2021-11-26 08:18:06 (24.4 MB/s) - ‘validation_set.tsv’ saved [67474385/67474385]\n",
+            "\n",
+            "--2021-11-26 08:18:06--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 79961640 (76M) [text/tab-separated-values]\n",
+            "Saving to: ‘test_set.tsv’\n",
+            "\n",
+            "test_set.tsv        100%[===================>]  76.26M  25.5MB/s    in 3.0s    \n",
+            "\n",
+            "2021-11-26 08:18:09 (25.5 MB/s) - ‘test_set.tsv’ saved [79961640/79961640]\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "P_Psa_NhhyAA"
+      },
+      "source": [
+        "\n",
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "df_test = resample_classes(df_test, columnClass, maxOfInstancePerClass)\n",
+        "#df_test.dropna(subset = ['content', 'contentWithoutClass', 'firstParagraph', 'ensemble_domaine_enccre', 'domaine_enccre', 'normClass'], inplace=True)\n",
+        "\n",
+        "\n",
+        "data_eval = df_test[columnText].tolist()\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AfsjFx1L_ddl"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iPQmgaSw_dnw"
+      },
+      "source": [
+        "## Test sur l'ensemble du corpus"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "I-BT_jRs74tI"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/EDdA_dataframe_withContent.tsv"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0NrbzDu66-k3"
+      },
+      "source": [
+        "\n",
+        "## test sortie pour Katie avec la classification de tous les articles\n",
+        "df = pd.read_csv(\"EDdA_dataframe_withContent.tsv\", sep=\"\\t\")\n",
+        "df.dropna(subset = ['content', 'contentWithoutClass', 'firstParagraph', 'ensemble_domaine_enccre', 'domaine_enccre', 'normClass'], inplace=True)\n",
+        "\n",
+        "\n",
+        "data_eval = df[columnText].tolist()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k07oOrFyhPJ-"
+      },
+      "source": [
+        "## Load model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "SHCqMPk8iPZS"
+      },
+      "source": [
+        "classifier_name = \"sgd\"       # sgd | lr | rfc | svm | bayes | bert-base-multilingual | camembert-base\n",
+        "vectorizer_name = \"tf_idf\"    # bagofwords | tf_idf | doc2vec"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oJ2xKgoVSQFC"
+      },
+      "source": [
+        "# récupérer les modèles depuis le serveur\n",
+        "\n",
+        "\n",
+        "# récupéréer les modèles depuis Google Drive\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xI_4exathQdd"
+      },
+      "source": [
+        "if classifier_name in [\"sgd\", \"lr\", \"rfc\", \"svm\", \"bayes\"]:\n",
+        "\n",
+        "  stop_words = set(stopwords.words('french'))\n",
+        "  stemmer = SnowballStemmer('french').stem\n",
+        "  def stem_tokenize(text):\n",
+        "    return [stemmer(i) for i in word_tokenize(text) if not i in stop_words]\n",
+        "\n",
+        "  vec_file_name = vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "  with open(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name, 'rb') as file:\n",
+        "    vectorizer = pickle.load(file)\n",
+        "\n",
+        "  clf_file_name = classifier_name + '_' + vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "  with open(\"drive/MyDrive/Classification-EDdA/\"+clf_file_name, 'rb') as file:\n",
+        "    clf = pickle.load(file)\n",
+        "\n",
+        "  if vectorizer_name != 'doc2vec' :\n",
+        "    vec_data = vectorizer.transform(data_eval)\n",
+        "  else : \n",
+        "    tagged_test = [TaggedDocument(words=tokenize_fr_text(_d), tags = [str(i)]) for i, _d in enumerate(data_eval)]\n",
+        "    vec_data = np.array([vectorizer.infer_vector(tagged_test[i][0]) for i in range(len(tagged_test))])\n",
+        "\n",
+        "elif classifier_name in [\"bert-base-multilingual\", \"camembert-base\"]:\n",
+        "\n",
+        "  clf_file_name = \"drive/MyDrive/Classification-EDdA/model_\"+classifier_name + '_s' + str(maxOfInstancePerClass) +\".pt\"\n",
+        "\n",
+        "  model = torch.load(clf_file_name)\n",
+        "\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jJjCGPTFjC78",
+        "outputId": "099e267e-8f5e-4c85-ef8e-b6bb60104c8d"
+      },
+      "source": [
+        "df_test[columnClass]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0               Commerce\n",
+              "1                    NaN\n",
+              "2                 Marine\n",
+              "3             Géographie\n",
+              "4               Histoire\n",
+              "              ...       \n",
+              "15849         Géographie\n",
+              "15850                NaN\n",
+              "15851    Arts et métiers\n",
+              "15852           Anatomie\n",
+              "15853                NaN\n",
+              "Name: ensemble_domaine_enccre, Length: 15854, dtype: object"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 853
+        },
+        "id": "o2J8mU_djDsm",
+        "outputId": "aa2784b6-623d-4605-cdfb-93e2b6adb3c1"
+      },
+      "source": [
+        "plot_confusion_matrix(clf, vec_data, df_test[columnClass], normalize=\"true\", include_values=False, xticks_rotation=\"vertical\", cmap=plt.cm.Blues)\n",
+        "name = classifier_name + '_' +vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".png\"\n",
+        "\n",
+        "print(name)\n",
+        "pathSave = \"drive/MyDrive/Classification-EDdA/\" + name\n",
+        "plt.rcParams[\"figure.figsize\"] = (10,10)\n",
+        "plt.rcParams[\"font.size\"] = 10\n",
+        "\n",
+        "plt.savefig(pathSave, bbox_inches='tight')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function plot_confusion_matrix is deprecated; Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.\n",
+            "  warnings.warn(msg, category=FutureWarning)\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "sgd_tf_idf_s10000.png\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAL5CAYAAAAt27JEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzde7yd453//9c7O5GkEtQEdUgEpQ5BJDtOQRM07dRM0EYjVa3Shk5RbSkz/AzaDkbbIVQ1/ExoDYpqU1VxjEOKnI+KToXWUE1CS4pI9v58/7ivxW1l7cO9s9daOzvv5+OxHvu+r/tzX/d1r0OyrnWdFBGYmZmZmZmtix71LoCZmZmZma3/XLEwMzMzM7N15oqFmZmZmZmtM1cszMzMzMxsnbliYWZmZmZm68wVCzMzMzMzW2c9610AMzMzM7PurmGT7SPWvFWTa8Vby6ZFxCdqcrEcVyzMzMzMzKos1rxF7498pibXenv+DwfU5EJl3BXKzMzMzMzWmVsszMzMzMyqTqDu/Zt+9747MzMzMzOrCbdYmJmZmZlVmwCp3qWoKrdYmJmZmZnZOnOLhZmZmZlZLXiMhZmZmZmZWevcYmFmZmZmVgseY2FmZmZmZtY6t1iYmZmZmVWd17EwMzMzMzNrk1sszMzMzMxqwWMszMzMzMzMWucWCzMzMzOzahMeY2FmZmZmZtYWVyzMzMzMzGyduSuUmZmZmVnVyYO3zczMzMzM2uIWCzMzMzOzWvDgbTMzMzMzs9a5xcLMzMzMrBY8xsLMzMzMzKx1brEwMzMzM6s6eYyFmZmZmZlZW9xiYWZmZmZWbcJjLMzMzMzMzNriFgszMzMzs1rwGAszMzMzM7PWucXCzMzMzKzqPCuUmZmZmZlZm9xiYWZmZmZWCz08K5SZmZmZmVmrXLEwMzMzM7N15q5QZmZmZmbVJjx428zMzMzMrC1usTAzMzMzqwV58LaZmZmZmVmr3GJhZmZmZlZ1XiDPzMzMzMysTW6xMDMzMzOrBY+xMDMzMzMza51bLMzMzMzMasFjLMzMzMzMzFrnFgszMzMzs2qTPMbCzMzMzMysLW6xMDMzMzOrBY+xMDMzMzMza51bLMwMgIa+m0TPTbZqd/yQ7TatYmmsHqLK+RftWVykPNXutby6qdiz06uhe/ejtrY1RbH3TEMV+94X/WxvKO/eF154nuXLl9f2drv5GAtXLMwMgJ6bbMWHjv1Bu+NnfP+fq1gaq4co+EWoYDg9ehT7D3X1muZ2x/bqWd0G+GWvryoUv8UmvatUEltfvP7W6kLxm/TtVaWSQHNzsQ9r0c/q+mrkfo31LkK3465QdSbpKEkhadcOnn+RpMM7eO7zkgZI2kzSv3Qkj3WVyrBI0vz0mFSPclQiaRtJt9fwekMlfTK3P1bSObW6vpmZmdm6cItF/U0AHkt//73IiZIaIuL8TijDZsC/AFd34PpNnXD90RGxvBPy6VQR8RIwriPnShKgiGj/T64wFGgE7k7XnwpM7cj1zczMrKuRB29b9UjqBxwEnAQcm0vvIelqSU9Luk/S3ZLGpWPPS7pU0lzgGElTcsdGSPqtpAWSZkrqL+kESVfl8r5L0qiyolwC7JRaDC6TNErSXblzrpJ0QgvXHyPpcUlzJd2W7qkznpsPS7o/3ctcSTspc5mkxamVY3yKHSVpuqTb03N2U/pij6TDJM1L8ddL6p27j4vTPc+WNEzSNEl/kHRKihksaXHabkjXniVpoaSTK5R5sKRnJN0ILAYGSlqZOz5O0pS0fUy6jwWSHpG0EXARMD6VaXz+tZO0Q3qeF0n6TinfNl6r4ZIeljQn3dvWnfHamJmZmVXiikV9HQncExHPAiskDU/pnwIGA7sDxwMHlJ23IiKGRcQtpYT0xfRW4GsRsTdwOPBWO8txDvCHiBgaEWe1I35FRAwD7gfOAw5P+7OBb7TzmnkP5bpCfT2l3QT8MN3LgcDLZM/LUKB0f5flvizvA5xB9pztCIyU1AeYAoyPiD3JWui+krvuHyNiKPBoihsH7A9cWKGMJwF/i4gRwAjgy5J2qBC3M3B1ROwRES+0cs/nAx9P9zc2It5Jabem1+HWsvgrgB+l+3i5lXwBkNQLuBIYFxHDgeuB77Z1npmZmVVRaZG8aj/qxF2h6msC2RdGgFvS/hyyVozbUjeaP0t6qOy88i+dAB8BXo6IWQAR8TqAqvPmKl1/f7Iv8jPSdTYCHu9Afu/rCiWpP7BtRNwJEBFvp/SDgJtT96tXJD1M9iX/dWBmRLyY4uaTVczeAJamihvADcBXgcvTfqmb0SKgX0S8AbwhaZWkzcrKOAbYq9Q6BGxKVolYWhb3QkQ80Y57ngFMkfQz4OftiB8JfDpt/wS4tI34jwBDgPvSa9NAhQqJpInARICG/lu0oxhmZmZmlbliUSeSNgcOBfaUFGRf/EJSe1oM/l7gUmt4f8tUn044p3R9AfdFxISWMpI0EPhV2r0mIq5px/U7Ij9lSxPte2+XzmkuO7+5wvkCTouIaW3kWf7a5KfiePd5jIhTJO0HHAHMybVWtabStB4tvVYClkREeWvX+zOMmAxMBui91c7Vnm3UzMxswyU8xsKqZhzwk4jYPiIGR8RAsl+/Dyb7NfvTysZabAWMakd+zwBbSxoB2a/+knoCzwNDU14DgX0rnPsG0D+3/wKwu6Te6Zf7w1q45hNkXY4+nK65saRd8gER8afUtWdoeysVqeXgRUlHpXx7S/oAWZel8Wm8wxbAIcDMVrJ6BhhcKh9Zt7KH21OGCqYBX0ldjJC0i6SN23HeK5J2k9QDOLqUKGmniHgyDb5fBgxk7dchbwbvjcM5Lpfe0mv1DLCFpAPS9XpJ2qNdd2pmZmbWAW6xqJ8JrN2d5Y6U/lWyL4hPAX8C5gJ/ay2ziHgnDWa+UlJfsvEVh5N9IV2a8vpdyqv83BWSZqSByr+JiLNSF53F6dx5LVxzWRoofHNpUDTZmItnK8W34iFJpdmlFkbE58kqAT+WdBGwGjgGuJNsvMkCsl/vvxURf1YLU/VGxNuSvgjclipZs4COtphcR9a9am4aGL4MOKod550D3JXiZwOlwe2XSdqZ7PeLB9I9/RE4J3Xlurgsn68B/yPpbOCXpcSI+FOl1yq9H8YBkyRtSvZZvxxYUvC+zczMrFN0/1mhVHRBJKsNSf0iYqWkfyD7VX5kRPy53uWyrkHSyojolBm4SnpvtXMUWSDvGS+Q1+14gbyWeYE8K8oL5HV9I/drZM6c2TW72R6bbR+9Dz67Jtd6+66vzomImq8A6BaLruuu1LVlI+DbrlRYtQ3ZbtNCq2l/8Ijvtzv21buKTRZWpUkHNkjV/PGo2l8+GgrkX+0vTv/Qb6NC8UUUfY2Kfj6qnf+G4u3VxZZt6te763zF8kvahXTzF6PrvOvtfSJiVL3LYF1XZ7dWmJmZma2r7t3Ry6xKJB0lKVoa39HOPE6QtE0nlWcbSbd3Rl5mZmZWJepRm0eduGJh1jETgMfS3446AeiUikVEvBQR49qONDMzM6sOVyzMCpLUj2wRw5NIU8BKGiVpuqTbJT0t6aY0exSSzpc0S9JiSZOVGQc0AjelFcf7SjpM0jxJiyRdX5ppS9Lzki5OcbMlDZM0TdIfJJ2SYganWb1I0/Felq65UNLJdXiazMzMrFw3X3nbFQuz4o4E7kkreq/ILW63D3AG2WrkO5Ktlg1wVUSMiIghQF/gnyLidrLpZ4+LiKFk0+dOAcZHxJ5k45++krvmH1PcoyluHNnK5xdWKN9JwN8iYgTZyuRflrRDp9y5mZmZWQtcsTArbgJwS9q+hfe6Q82MiBcjohmYT7buBcBoSU9KWkS22nqlheo+AixNlRWAG8gWACyZmv4uAp6MiDciYhmwKs0eljcG+HxaD+NJ4B+AnSvdiKSJqRVk9rLly9q8cTMzM+sgqduPsfCsUGYFSNqcrHKwp6QAGshaG34N5Ce6bwJ6SuoDXA00psXsLgD6dODSpbyby67TzNqfYwGnRcS0tjKNiMnAZIDhwxu9qI2ZmZl1mFsszIoZB/wkIraPiMERMZBsxeuDW4gvVSKWp7EZ+QHWbwD90/YzwGBJH077xwMPd7CM04CvSOoFIGkXSRt3MC8zMzPrLB5jYWY5E4A7y9LuoIXZoSLir8C1wGKyL/yzcoenANekLksCvgjclrpMNQPXdLCM1wFPAXPTgO4f49ZJMzMzqzJ/2TArICJGV0ibBEwqSzs1t30ecF6F8+4gq5SUPEA2ALw8bnBuewpZhaT82HJgSEprBv4tPczMzMxqwhULMzMzM7MaUB27KdWCKxZm1iGv/fqb7Y7d8vgbC+X9yo3HF4rv7v9Qr4v1+bnp0aP9ZX9z1ZpCeTc1F5uroH/fXoXii6j2a7Q+vwe6kj69GupdBLMuz2MszLopST0lnVpaaM/MzMzqR2QV/Vo86sUVC7N1IOkoSSFp13bEniHpA7n9uyusQdHe6zZKmpS2R0k6sOy4gMuBhRGxqlIeZmZmZp3JFQuzdTMBeIwWZoUqcwbwbsUiIj6ZZo0qLCJmR8TpaXcUcGDZ8YiIUyPikY7kb2ZmZp1MNXzUiSsWZh2U1qU4CDgJODaljZI0XdLtkp6WdJMypwPbAA9JeijFPi9pQNo+V9Kzkh6TdLOkM1P6dEmNaXuApOdz17lL0mDgFODrkuZLOljSFpLukDQrPUbW8nkxMzOzDZMHb5t13JHAPRHxrKQVkoan9H2APYCXgBnAyIiYJOkbwOiIWJ7PJJ13LDCU7DM5F5jTngJExPOSrgFWRsT3Un7/A/xXRDwmaRDZ+hm7revNmpmZ2bqo7/iHWnDFwqzjJgBXpO1b0v5dwMyIeBEgLX43mKy7VEsOBu6MiDfTOVPXsVyHA7vn/vHaRFK/iFhZHihpIjARYOCgQet4WTMzM9uQuWJh1gGSNgcOBfaUFEADEMCvgfxg6SbW7XO2hve6LPZp5zk9gP0j4u22AiNiMjAZYPjwxmLzb5qZmVkh3b3FwmMszDpmHPCTiNg+IgZHxEBgKVnrQ0veAPpXSH8EOEpSX0n9gX/OHXseKHWxGtfOfO8FTivtSBra2o2YmZmZdQZXLMw6ZgJwZ1naHbQ+O9Rk4J7S4O2SiJgL3AosAH4DzMod/h7wFUnzgAEt5Psr4OjS4G3gdKBR0kJJT5EN7jYzM7M66+7rWLgrlFkHRMToCmmTgEllaafmtq8ErsztD85tfxf4LoCkC3LpTwN75bI8L6VPB6an7WfLYgDGF7kfMzMzs3XlioWZAdkAkabm9g+zaOjR/l9EXrnx+EJl2fJzNxaKX3bTFwrFW+eIKDYsp+ivaEXy792roVDeBd6+673mAp9rgB4Fn5wir1N371+e9/pbqwvFb9K3V5VKAgU/qmxAL1PNdffPgCsWZl1MRFxQ7zKYmZmZFeWKhZmZmZlZtdV5Vexa8OBt2+BJOkpSSNq14Hnvrpxdlj5W0jm5vHfvYLk2k/QvHTnXzMzMrNZcsTDLZnJ6jAozOkkq3KoXEVMj4pK0exTQoYoFsBlQuGIhqVhnczMzM7NO4IqFbdAk9QMOAk4Cjk1poyQ9mlbAfkpSg6TvSVqcpnA9LZfFaZLmSlpUavGQdIKkqyQdCIwFLktTwe6UHvdImpOuUTpnK0l3SlqQHgcClwA7pXMvS+W6K1f2qySdkLafl3SppLnAMZLGSHo8le22dJ9mZmZWJ6I2U816ulmz+jkSuCcinpW0QlJpMbphwJCIWCrpK8BgYGhErEmrbpcsj4hhqcvSmcCXSgci4repcnJXRNwOIOkB4JSI+L2k/YCryVbwngQ8HBFHpxaHfsA5qQxD07mj2riXFaksA4CfA4dHxN8lnQ18A7ioo0+SmZmZWVtcsbAN3QTgirR9S9q/C5gZEUtT+uHANRGxBiAiXs2d//P0dw7wqdYulFoNDgRuy/2a0Dv9PRT4fMq/CfibpA8WvJdb09/9ybpfzUjX2Qh4vIUyTQQmAgwcNKjg5czMzKwITzdr1k2llodDgT0lBdBAtpzDr4G/tzObVelvE21/nnoAfy21QHTAGt7ffbFP2fFSmQXcFxGtrQIOQERMJlsRnGHDGwvOdG5mZmb2Ho+xsA3ZOOAnEbF9RAyOiIHAUuDgsrj7gJNLA7nLukK15Q2gP0BEvA4slXRMykeS9k5xDwBfSekNkjbNn5u8AOwuqbekzYDDWrjmE8BISR9O+W0saZcCZTYzM7Mq6O5jLFyxsA3ZBODOsrQ7WHt2qOuAPwILJS0APlvgGrcAZ0maJ2kn4DjgpJTPErIxHgBfA0ZLWkTWrWr3iFhB1p1psaTLIuJPwM+AxenvvEoXjIhlwAnAzZIWknWDKjSVrpmZmVlR7gplG6yIGF0hbRLZQOp82hqywc/fKEsfnNueDYxK21OAKWl7BmtPN/uJCtd9hfcqGfn0z5btfwv4VoW4wWX7DwIjyuPMzMysfjzGwsw2CAIaerT/H7yI9g/JKPoP6bKbvlAo/oGnXykUf+hHtiwU393/Iyhpai42zKbIewCgZ0Ox57FIcYqWRT2KNdivaWouFN+zoet0COhR4HO9viv8Pijw2S6a95lTnyoUP3n83m0HdVC1B9BV83m39YsrFmZmZmZm1ab06Ma6zk8qZhs4SSMlHVLvcpiZmZl1hCsW1q1IOkpSlFa0biFms7SgXWdc7wRJ23RCPvsAXyS33oSkfyuL+e26XsfMzMzqx7NCma1fJgCPsfbMTgCkKWM3AzqlYkE2+9I6VywiYl5EfCkiVueS/60s5sD25pemsvXn28zMzGrGXzys20grWx8EnAQcm0sfJelRSVOBp4BLgJ0kzZd0maStJT2S9hdLKl/HAknDJT0saY6kaemccUAjcFM6t2/ZOdMl/Zek2ZJ+J2mEpJ9L+r2k7+TiPidpZsrjx2kdi0uAvintphS3MnfOWZJmSVoo6cKUNljSM5JuJJuSdqCkKemeFkn6eqc92WZmZlaIqE1rRT1bLDx427qTI4F7IuJZSSskDY+IOenYMGBIRCyVNDhtDwWQ9E1gWkR8V1ID8IF8ppJ6AVcCR0bEMknjge9GxImSTgXOTNPNVvJORDRK+hrwS2A48CrwB0n/BWwJjAdGRsRqSVcDx0XEOZJOrbRKt6QxwM7AvmTDwKYqG5vxx5T+hYh4QtJwYNuIGJLO26zwM2pmZmbWTq5YWHcyAbgibd+S9ksVi5kRsbSF82YB16cKxC8iYn7Z8Y8AQ4D70q8ADcDL7SzT1PR3EbAkIl4GkPQcMJCshWU4MCvl3Rf4Sxt5jkmP0gJ5/cgqFH8EXoiIJ1L6c8COkq4Efg3cW56RpInARICBgwa185bMzMysI+rZmlBO0ifIvjc1ANdFxCVlxwcBN5B1IW8AzomIu1vL0xUL6xYkbQ4cCuwpKcg+ACHprBTy95bOjYhH0i/+RwBTJP0gIm7MZ09WKTigA0Vblf4257ZL+z1T3jdExL8WyFPAxRHx4/clZi0x795nRLwmaW/g48ApwGeAE/PnRMRkYDLA8OGN1Z7q3MzMzLqA1EPjh8DHgBfJfuCcGhH5BVjOA34WET+StDtwNzC4tXw9xsK6i3HATyJi+4gYHBEDgaXAWuMlgDeA/qUdSdsDr0TEtcB1ZN2m8p4BtpB0QIrvJWmPSnl1wAPAOElbprw3T+UBWJ1aUcpNA05MY0qQtG3p/DxJA4AeEXEH2T8O5fdlZmZmG6Z9gf+NiOci4h2ynh5HlsUEsEna3hR4qa1M3WJh3cUE4NKytDtS+q35xIhYIWmGpMXAb8gGOp8laTWwEvh8Wfw7aaD2JEmbkn1uLgeWAFOAayS9BRwQEW8VKXREPCXpPOBeZbM4rQa+CrxA1pKwUNLciDgud869knYDHk9NqiuBzwFNZdlvC/y33psdqkiriJmZmXW2rtMTalvgT7n9F4H9ymIuIPt+chqwMXB4W5m6YmHdQkSMrpA2Kbc7vezYZ8vCb2gj//nAWovXpdaAO1o4Z1Rue3q+DGXHbqWs8pPSzwbOzu33y21fwXvjSfKG5GIW4FYKMzOzDdEASfmJZSan7s9FTACmRMT3U6+Nn0gaEhHNLZ3gioWZdUgUGJERRYKB5oLxh+26VaH4gRPXqse16k+TxxeKX1819Cj2U1pzi/+1dI4ipelKAyI3NF3pua9mWQr+s8QlR+xWnYIkzc3tL1DRz3ZRXek90KWpps/V8ohobOX4/5FNIlOyXUrLOwn4BEBEPC6pDzCAViaZ8RgLMzMzM7MNyyxgZ0k7SNqIbP2vqWUxfwQOA0hdsPsAy1rL1BULsxZIakoL1C2QNFdSu1e+rhdJZ0j6QNuRZmZmVmtdZYG8iFgDnEo2IczvyGZ/WiLpIkljU9g3gS9LWgDcDJwQbXRBcFcos5a9lVtE7+PAxcBH61uklqWp484Afgq8WefimJmZWReW1qS4uyzt/Nz2U8DIInm6xcKsfTYBXivtSDpL0ixJCyVdmEv/haQ5kpakxedK6Stz2+MkTUnbv5T0+bR9sqSbKl28tXwlfT/9mnAusA3wkKSHJDVImiJpsaRFkr7eac+GmZmZFdZVWiyqxS0WZi3rK2k+WZ/CrckW4EPSGLKVrvclG186VdIhEfEIcGJEvCqpL9liM3dExIpWrjERmCFpKVmT4/4txLWU78bAkxHxzVS2E4HREbFc0nBg24gYko5ttk7PhpmZmVkrXLEwa1m+K9QBwI2ShgBj0mNeiutHVtF4BDhd0tEpfWBKb7FiERGvSDofeAg4OiJebSG0pXybaGG6W+A5YEdJVwK/Bu4tD0itHxMBBg4a1FIxzczMbB2J+rYm1IIrFmbtkKZZGwBsQdZKcXFE/DgfI2kU2eIxB0TEm5Kmk7V2QLZ6ZUkf3m9PskrCNimfgcCv0rFrgKdbyfftiChfGK9U5tck7Q18HDgF+AxwYlnMZLKF+Bg+vLHgZIpmZmZm7/EYC7N2kLQr0EBWAZgGnCipXzq2raQtyZa7fy19+d+V93drekXSbspWwT46l+++wD8C+wBnStohIv4UEUPT45o28i33BtA/5T0A6JEW8TsPL5ZnZmZWX6rRo07cYmHWstIYC8g+pl9IrQP3KpvP+fHUpLkS+BxwD3CKpN8BzwBP5PI6B7iLbP7n2UA/Sb2Ba4EvRsRLkr4JXC/p0LLp3FrLt9xk4B5JL5HNEPXfqTID8K8dexrMzMzM2uaKhVkLIqKhlWNXAFdUOPSPLcTfDtxe4dDeuZiprL04DRGxqpV8+5XtXwlcmUtyK4WZmVlXUNuVt+vCXaHMzMzMzGyducXCzDqkR4/2/+rypxXF1uvbbvO+RYtTyJ8mjy8UP2bSY+2Ovff0g4oWp5A2Fj1dy/r861hzgXtd3VT0eSlWlp4F3u8Azc3tL0/RWRMaCpal2oq8J7va+7Ho56mID27cq2p5Q7HPB83F8i7y73tHFPl8VLsstdbVPgOdzS0WZusBSdtIOr7e5TAzMzNriSsWts4kNUmaL2mBpLmSDmzHOSvT38GSFndCGUZJuquNmKGSPll2Tptl7UySTpC0LD1fT0n6cjvO2Qz4AfBAG3FjJZ3TWWU1MzMzK8Jdoawz5BeS+zhwMfDR+hapoqFAI3B32h9FNqPTb8sDJfWMiDVVKsetEXFqmqJ2iaSpEfFKS8ER8Vfg2LYybWnwt5mZmXUN7gplVswmwGulHUlnSZolaaGkC1s7UVKDpMty8Sen9K0lPZJ+5V8s6eA28tlY0vWSZkqaJ+lISRsBFwHjUz5nky0a9/W0f7CkKZKukfQk8J+SdpJ0j6Q5kh5Na0gg6ZhUjgWSHunoExURfwH+AGwv6bBU1kWp7L3TtYZLejiVYZqkrVP66anFY6GkW1LaCZKuSttbSLojPZezJI3saDnNzMzM2sMtFtYZSus99AG2Bg4FkDQG2BnYl2wdiKmSDomIlr6MnwT8LSJGpC/WMyTdC3wKmBYR35XUAHygjfKcCzwYESembkQzgfuB84HGiDg1la8vsDIivpf2TwK2Aw6MiCZJDwCnRMTvJe0HXJ3u7Xzg4xHxfyn/DpG0I7Aj8CLwJHBYRDwr6UbgK5J+SDZ17JERsUzSeOC7ZKtnnwPsEBGrWijDFcB/RcRjkgaRLeq3W0fLamZmZp2gezdYuGJhnSLfFeoA4EZJQ4Ax6TEvxfUjq2i0VLEYA+wlaVza3zTFzyJbOK4X8IuImN/C+fl8xko6M+33AQa1815uS5WKfsCBwG25Zsve6e8MYIqknwE/b2e+eeMlHQSsAk4GtgCWRsSz6fgNwFfJKkNDgPtSGRqAl1PMQuAmSb8AflHhGocDu+fKvomkfhGxMh8kaSIwEWDgoPY+RWZmZmZrc8XCOlVEPC5pANmXZQEXR8SP23m6gNMiYtpaB6RDgCPIvtD/AHgD+Pd0+EsV8vl0RDxTlsd+7SjD39PfHsBfSxWmvIg4JeV1BDBH0vCIWJG7znfTMSqdTxpjkYvfu0JM6T6WRMQBFY4dARwC/DNwrqQ9y473APaPiLdbyLt0L5PJVutm+PDG6s27aGZmZh5jYVZEGofQAKwg635zYvr1H0nbpgHLLZlG1gWoV4rfJY2X2B54JSKuBa4DhkXEnRExND1mV8jnNKVPr6R9UvobQP9cXPn+uyLidWCppGNSHipVACTtFBFPRsT5wDJgYNm555bK1sq95j0DDJb04bR/PPBwSt8itQIhqZekPST1AAZGxEPA2WQtO/3K8rwXOK20I6m9ZTEzMzPrELdYWGcojbGA7Ff2L0REE3CvpN2Ax9N3/JXA54C/tJDPdcBgYG6qFCwDjiKbveksSatTHp9vozzfBi4HFqYv4UuBfwIeAs5JZb0Y+BVwu6QjyX0JzzkO+JGk84BewC3AAuAySTune30gpXVYRLwt6Ytk3a56knX9uiYi3kndwiZJ2pTs83o58Czw05QmYFJE/LXsV5DTgR9KWpjOe4RssLqZmZnVgaRu32Khaq46aWbrj+HDG2PGk+WNP52j2itvV/sfaq+8XVmR1XOh+Aq6a5rav1xwV1t5u0eBC3jl7fopUvaiX5eK3mrR56bI56PI+xE2nJW3R1deqhkAACAASURBVO7XyJw5s2v2puy91c7xofE/qMm1/njl2DkR0ViTi+W4xcLMqm7bD3atikJRRSoLHzzyykJ5v/bLSo1lLetqz001NRX48tF3o4ZCea9e0/4vZdC1vphVu0JX1Pr8nixS9qK3WfQ91qtnsQv0bFh/e7NX+z3Zla3Pn5f2WH/flWZmZmZm1mW4xcLMzMzMrAbcYmFmnUJSk7JVvhdImivpwJQ+WNLiepfPzMzMbF24xcKsdvILCX6cbGaqj9a3SGZmZlYz3bvBwi0WZnWyCfBaeWJqvXg0tWjkWzW2lvRIavFYLOnglD5B0qKUdmkun5WSvptaR56QtFXN7szMzMw2SK5YmNVO31QxeJpszY5vV4j5C/CxiBgGjAcmpfTPAtNSi8fewHxJ2wCXAocCQ4ERko5K8RsDT0TE3mRrWHy5WjdlZmZm7VNay6Laj3pxxcKsdt5KK3LvCnwCuFFrf/p7AddKWgTcBuye0mcBX5R0AbBnRLwBjACmR8SyiFgD3AQckuLfAe5K23PIFh5ci6SJkmZLmr1s+bJOuUkzMzPbMLliYVYHEfE4MADYouzQ14FXyFolGoGNUvwjZJWG/wOmSGpr9fHV8d7KT020MJ4qIiZHRGNENG4xoLwoZmZmZu3nioVZHUjaFWgAVpQd2hR4OSKageNTDJK2B16JiGvJulENA2YCH5U0QFIDMAF4uEa3YGZmZkWo+3eF8qxQZrXTV9L8tC3gCxHRVPYPwNXAHalF4h7g7yl9FHCWpNXASuDzEfGypHOAh1J+v46IX9bgPszMzMzW4oqFWY1EREML6c8DQ9L274G9cofPTuk3ADdUOPdm4OYK6f1y27cDt69D0c3MzGwdCejm6+O5YmFmmeaAt1c3tTu+T6+K9aSKevTo5v+S5rz2y9MKxZ/682JrI171qSGF4qup2q9rtB3yriLv3Y5oaipSGujTo/2fj6KKPu9Fn5sin+2i+RfNe3328l/fLhQ/aMAHqlSS6r8HivJ7pvtyxcLMzMzMrOrqO/6hFjx426yLkvSZNGjbzMzMrMtzxcKsBZI+JOkWSX+QNEfS3ZJ2qdK1hkr6ZFnyg8AlkjwPrJmZWTcg1eZRL65YmFWQFq67k2wBup0iYjjwr8BW7TlXUtHP1lDgfRWLiFgeERMiouLKdZLcldHMzMy6DFcszCobTbbI3DWlhIhYAMyT9ICkuZIWSToSQNJgSc9IuhFYDAyU9KO0qvUSSReW8pE0QtJvJS2QNFPSpsBFwHhJ8yWNl7SxpOslzUppR6VzT5A0VdKDwAO5uJmS5uXKs0dKmy9poaSda/bMmZmZWUVex8JswzQEmFMh/W3g6Ih4XdIA4AlJU9OxncnWpngCQNK5EfFqWrzuAUl7AU8DtwLjI2KWpE2AN4HzgcaIODWd+x/AQxFxoqQPAjMl3ZeuMwzYK+X9H8CDKW6zFHc/cApwRUTcJGkj0kJ7ZmZmZtXiioVZMQL+Q9IhQDOwLe91j3qhVKlIPiNpItnnbGtgd7IZNF+OiFkAEfE6UOnXhTFkq2qflPabgIFp+76IeDUXN1bSmWm/DzAIeBw4V9J2wM/T+hhr30xWvokA2w0c1O4nwczMzAqq8/iHWnDFwqyyJcC4CunHAVsAwyNitaTnyb7Mw3urZCNpB+BMYEREvCZpSi6uPQScFBFPvy9R2j9/nRT36Yh4puz830l6EjgCuFvSyRHxYPlFImIyMBlgn2GNxSbqNzMzM8vxGAuzyh4Eeqdf9AFIXZm2B/6SKhWj034lm5BVAP4maSvgH1P6M8DWkkakPPunQdhvAP1z508DTk2DyJE0vIXrTANOy8Xtk/7uCDwXEZOAX/L+1bzNzMysxkS2wGUtHvXiioVZBRERwNHA4Wm62SXAxcDdQKOkRcDnycZMVDp/ATAvHf8fYEZKfwcYD1wpaQFwH1lLxkPA7qXB28C3gV7AwnTtC9e+ClSI+3ZK/wywWNJ8svEiN3b4yTAzMzNrB3eFMmtBRLxE9gW93AEtnDKk7PwTWsh3FrB/hUMjyvZPrnDuFGBKbv+tFuIuAS5poZxmZmZWBx5jYWYbBAk2anAjZq1d9akhbQflfHDspELxr009vVB8EavXNBeK79Wz2PurT6/qTWbW3FxsSFFzdJ0hSGuaij3v1Xwea5H/+mqrTXvXuwjv6mqvUVcrj3Uef4swMzMzM7N15hYLsyqT9CHgcrKuTn8FXgF+AYyNiH+qZ9nMzMysduq5eF0tuMXCrIrSbE13AtMjYqeIGA78K++tfWFmZmbWLbhiYVZdo4HVEXFNKSHNGPUo0E/S7ZKelnRTbsrY8yXNkrRY0uRc+umSnpK0UNItKe2jaSap+ZLmSeqf0s9KeSyU1NKMUmZmZlYraYG8WjzqxRULs+oaAsxp4dg+wBlkK3LvCIxM6VdFxIiIGAL0BUrdpc4B9omIvYBTUtqZwFcjYihwMPCWpDHAzsC+wFBgeFop3MzMzKxqXLEwq5+ZEfFiRDQD84HBKX20pCfTWhmHAnuk9IXATZI+B6xJaTOAH0g6HdgsItYAY9JjHjAX2JWsorEWSRMlzZY0e/nyZZ1/h2ZmZgZkC+RJqsmjXlyxMKuuJUBLq2avym03AT0l9QGuBsZFxJ7AtWQL6AEcAfwQGAbMktQzrVfxJbKWjRmSdiX7t+viiBiaHh+OiP+/UgEiYnJENEZE44ABW6zjrZqZmdmGzBULs+p6EOgtaWIpQdJeZN2WKilVIpZL6geMS+f0AAZGxEPA2cCmZGM0doqIRRFxKTCLrHViGnBiOh9J20rasgr3ZmZmZu1Wm9aKerZYeLpZsyqKiJB0NHC5pLOBt4HnyaabrRT/V0nXAouBP5NVFgAagJ9K2pSsRWJSiv22pNFAM1nryG8iYpWk3YDH0z8uK4HPAX+p1n2amZmZuWJhVmUR8RLwmQqHrs3FnJrbPg84r0L8QRXyPq2Fa14BXFG4sGZmZlY13XwZC3eFMjMzMzOzdecWCzMDsv5VPXp0859SkubmKBTflZ6X16aeXih+/+880O7YJ847rFDeDVV+Xoq8TkVfo6Lxb77dVCi+X0P7f7cr+n7sWSDvWqjm69SVPqsvvfZWofhNP9CrUHzvQtHFn5siutK/ed2NV942MzMzMzNrgysWZjUi6UOSbpH0B0lzJN2d1pG4q4X46yTtXiD/RkmTOq/EZmZm1mk2gJW33RXKrAaUtX3eCdwQEcemtL2BsS2dExFfKnKNiJgNzF6XcpqZmZl1lFsszGpjNLA6Iq4pJUTEAuBRsvUobpf0tKSbUiUESdMlNabtlZIuk7RE0v2S9k3Hn5M0NsWMKrV+SNpY0vWSZkqaJ+nImt+xmZmZvcsrb5tZZxkCzGnh2D7AGcDuwI7AyAoxGwMPRsQewBvAd4CPAUcDF1WIPzfF70tWqblM0sbrdAdmZmZmrXDFwqz+ZkbEixHRDMwHBleIeQe4J20vAh6OiNVpu1L8GOAcSfOB6WQreg8qD0pjPGZLmr1s+bJ1vQ8zMzPbgHmMhVltLAHGtXBsVW67icqfy9URUZpbsLl0TkQ0S6oUL+DTEfFMa4WKiMnAZIDhwxurN3ehmZmZeYE8M+sUDwK9JU0sJUjaCzi4StebBpyWG6+xT5WuY2ZmZga4YmFWE6m14Wjg8DTd7BLgYuDPVbrkt4FewMJ0rW9X6TpmZmbWTt198La7QpnVSES8BHymwqFrczGn5rZH5bb75bYvKMu3X/o7nWw8BRHxFnByZ5TbzMzMrD1csTCzDU6PHt28k2vOE+cd1u7YLY+/sVDeL/735wrFb1TweS8y6Oe9IUjV0Xejhqrlvb73ud5QPk/bfLBvofjX31pdpZJkNpTnvbtZ3z/vbXFXKDMzMzMzW2dusTAzMzMzqzZR1/EPteAWC7NWSPqQpFvSgOs5ku6WtEu9y2VmZmbW1bjFwqwFaarWO4EbIuLYlLY3sBXwbD3LViKpISKa6l0OMzMza53wGAuzDdlosoXpriklRMQC4DFJl0laLGmRpPEAkkZJeljSLyU9J+kSScdJmpnidkpxUyT9SNITKW6UpOsl/U7SlNK1JI2R9LikuZJuk9QvpT8v6VJJc4FjJH0ixSyQ9ECK2TjlOVPSPElH1u5pMzMzsw2RWyzMWjYEmFMh/VPAUGBvYAAwS9Ij6djewG7Aq8BzwHURsa+krwGnAWekuA8CBwBjganASOBLKa+hwIvAecDhEfF3SWcD3wAuSueviIhhkrYA5gKHRMRSSZun4+cCD0bEiZI2A2ZKuj8i/t4Jz4uZmZkVVt81JmrBFQuz4g4Cbk5dkF6R9DAwAngdmBURLwNI+gNwbzpnEVkLSMmvIiIkLQJeiYhF6ZwlwGBgO2B3YEb6R2gj4PHc+bemv/sDj0TEUoCIeDWljwHGSjoz7fcBBgG/y99IWgl8IsDAQYM69GSYmZmZgSsWZq1ZAowreM6q3HZzbr+Z93/eVlWIycc1AfdFxIQWrtNWy4OAT0fEM60FRcRkYDLA8OGN1V0IwMzMbAPXzRssPMbCrBUPAr3Tr/oASNoL+CswXlJD6op0CDCzk6/9BDBS0ofTdTduYTaqJ4BDJO2Q4kpdoaYBp6UB6Ejap5PLZ2ZmZvY+brEwa0HqqnQ0cHka4/A28DzZOIl+wAKyxYG/FRF/lrRrJ157maQTgJsl9U7J51E2G1WKmwj8XFIP4C/Ax4BvA5cDC1P6UuCfOqt8ZmZmVpzHWJhtwCLiJeAzFQ6dlR752OnA9Nz+qErHIuKEXPrzZIPEqXDsQbKxG+VlGly2/xvgN2VpbwEnV7onMzMzs2pwxcLMzAB45cbjC8VvftC3CsW/NuOyQvENPar3y15EsSFF1SxLd/8Fc130qOLzXm39+/grlm14/K43MzMzM6s2efC2mZmZmZlZm1yxsKqQ1CRpvqQlaUXob6ZBxEXyGCvpnLR9lKTdO1CO6ZIaO3rdWpG0spbXMzMzs9oSWdfHWjzqxV2hrFreioihAJK2BP4H2AT493yQpJ4RsaZSBhExlWxVaoCjgLuAp6pW4vfKk79uR/MRoIho7pySmZmZmXVtbrGwqouIv5Ct7nyqMidImirpQeABSZtL+oWkhZKeSGtFkOKuknQgMBa4LLWC7NSRcuRbBSSNkzQlbU+RdI2kJ4H/LF03HTtG0uLU6vJIrly/TK0hv5f07yl9sKRnJN0ILAYGtnLNHSQ9LmmRpO+UlfMsSbPS83FhLu/fSbo2tQLdK6lvOvZhSfenMs4tPT+V8jEzM7P66e4tFq5YWE1ExHNAA7BlShoGjIuIjwIXAvMiYi/g34Aby879LVkLwlkRMTQi/lCFIm4HHBgR3yhLPx/4eETsTVa5KdkX+DSwF3BMrrvVzsDVEbFHRLzQyvWuAH4UEXsCL5cSJY1JeewLDAWGSzokl/cPI2IPskX6Pp3Sb0rpewMHAi+3kc+7JE2UNFvS7GXLl7VSXDMzM7PWuWJh9XJfRLyatg8CfgLvrt3wD5I2qXF5bouIpgrpM4Apkr5MVjEquS8iVqT1In5Odg8AL0TEE+243kjg5rT9k1z6mPSYB8wFdiWrIAAsjYj5aXsOMFhSf2DbiLgTICLejog328jnXRExOSIaI6JxiwFbtKPYZmZm1lFSbR714jEWVhOSdgSayFaGBvh7J+XbQPYlG2BqRJzfSnh+4vo+ZccqliciTpG0H3AEMEfS8Ap55ffL82ntmpUm0hdwcUT8+H2J0mBgVS6pCehbqcyt5WNmZmZWLW6xsKqTtAVwDXBVVF6V6lHguBQ7ClgeEa+XxbwB9C8/MSKaUveooW1UKgBekbSbstmpjm5n2XeKiCdT3suAgenQx9LYkL5kA8tnFLzmDODYtH1cLn0acKKkfun626bB7xVFxBvAi5KOSvG9JX2gaD5mZmZWfR5jYdYxfZWmmwXuB+4lG0tRyQVkYwAWApcAX6gQcwtwlqR5KjZ4uyfv/dJ/DtnMUr8lN66hDZelAdaL03kLUvpM4A5gIXBHRMxu4fyWrvk14KuSFgHblhIj4l6yGbQeT8dup0KFqszxwOnp+fst8KEO5mNmZmbWYar8A7LZ+k9Sb+B/gSER8bdOzPcEoDEiTu2sPLuC4cMbY8aTLdWPbENQ9P+DzQ/6VqH412ZcVii+moreaz1/AbT1k99jXd/I/RqZM2d2zZ74/oN2jcZvXl+Ta00/Y+SciCi0jldn8BgL65bSLE0/IZuhqdMqFbZhWrW60rj+ynr3amg7qIsq+sWmaEVh7I/bM6/Be276/PC2g5L+fXsVyrvovRZ5D0Cx90E187bOU/R1emH5m4Xid9najcq2/nPFwrql1DVptyrlPQWYUo28zczMrHsS9R3/UAseY2EdIqkpjaEoPc6pd5lKJDVKmrQO50+RNK4zy9TO656RBl6X9u+WtFmty2FmZmbWEW6xsI56KyKG1rsQlaTWivVxsMAZwE+BNwEi4pP1LY6ZmZl1pm7eYOEWC+tckkZI+q2kBZJmSuovqY+k/06zK82TNDrFniDp55LukfR7Sf+Zy2dCaTYmSZfm0ldKukzSEkn3S9pX0nRJz0kam2JGSborbW8s6fpUlnmSjqxQZkm6StIzku7nvdXBkXRYOm9Ryqd3Sn9e0sWptWa2pGGSpkn6g6RTcuefJWmWpIWSLsyV6dfpOVosabyk04FtgIckPZS7xoC0fa6kZyU9JulmSWem9OlpPAmSBkh6Pm03pOepdO2TO+P1NTMzM2uJKxbWUaXpZEuP8ZI2Am4FvhYRewOHA28BXwUiIvYEJgA3SCotFjcUGA/sCYyXNFDSNsClwKHp+AildRqAjYEHI2IPsrUtvgN8jGyNiIsqlPPcFL8vMJps+tiNy2KOBj4C7A58HjgQIJVxCjA+lb0n8JXceX9MrTaPprhxwP6kaXUljSFb7XrfdB/DJR0CfAJ4KSL2joghwD0RMQl4CRgdEaPzhVO2KN+xKY9PAiMq3Ge5k4C/RcSIFP9lSTu04zwzMzOzDnFXKOuotbpCSdoTeDkiZgGUFrmTdBBwZUp7WtILwC7ptAdKszZJegrYHvgHYHpELEvpNwGHAL8A3gHuSecuAlZFxOq0VsPgCuUcA4wt/cJPtvr1IOB3uZhDgJsjogl4SdKDKf0jwNKIeDbt30BWSbo87U/NlaNfWqzuDUmr0tiIMekxL8X1I6toPAp8P7XE3BURj1Yod97BwJ0R8WZ6Pqa2EV+6771yY0U2Tddemg+SNBGYCDBw0KB2ZGtmZmYd1aOb94VyxcLqbVVuu4m235Orc6t3N5fOj4hmSZXOFfDpiHhmnUu6tlLZm3n/fTST3YeAiyPix2sVShpG1vrwHUkPRESl1pb2WMN7LY99cukCTouIaa2dHBGTgcmQrWPRwTKYmZmZuSuUdapngK0ljQBI4yt6kv1Cf1xK24WsxaC1L/ozgY+mMQMNZN2nHu5gmaYBp0nZTwSS9qkQ8whZN6wGSVuTdZkq3c9gSR9O+8cXLMc04ERJ/dK1t5W0Zerq9WZE/BS4DBiW4t+g8urYjwBHSeorqT/wz7ljzwOlyf7zM1lNA74iqVe69i4VuoCZmZlZDUm1edSLWyyso/pKmp/bvycizpE0HrhSUl+y8RWHA1cDP0rdldYAJ0TEKrXwzo+Il5VNX/sQ2S/vv46IX3awnN8m67q0UFIPsq5A/1QWcyfZeI6ngD8Cj6dyvC3pi8BtqYI0C7imvReOiHsl7QY8nu51JfA54MNkYz2agdW8N25jMnCPpJfy4ywiYq6kW4EFwF9SOUq+B/wsdWn6dS79OrKuYXNTpWoZcBRmZmZmVaKiS86bWX1JugBYGRHf68x8hw9vjBlPro+z9FbfhrLydrV1pZW3i/LK2+aVt7ufkfs1MmfO7Jr9vr/p9rvF/mdPqcm17v3q/nMiorG1GEmfAK4AGoDrIuKSCjGfAS4AAlgQEZ9tLU+3WJiZmZmZbUBSV/Mfks2s+SIwS9LUiHgqF7Mz8K/AyIh4TdKWlXN7jysWZuuZiLig3mXY0PRs2DCGozU3F2vBfvXv7xSKn3ry/oXi9/n/Wp174H3mXDimUN6r1jQXiu/VUL0fNavdAlG0Z0JL3VTXB9W816L/Duy4ZXWHtRW51672mq5pav/nr7v9+9uj67wU+wL/GxHPAUi6BTiSrFt4yZeBH0bEawAR8Ze2Mu1er5aZmZmZmbVlW+BPuf0XU1reLsAukmZIeiJ1nWqVWyzMykjaCvgvssXuXiNbO+M/I+LOuhasjKSLgEci4v56l8XMzMzaVsPWowGS8gMnJ6cp5ovoSbYG1ihgO+ARSXtGxF9bO8HMkjSD0i+AG0oDlCRtD4wti+sZEWvqUMR3RcT5ldIlNaTF/szMzGzDtLyNwdv/BwzM7W+X0vJeBJ6MiNXAUknPklU0ZtECd4Uye79DgXci4t1pZSPihYi4UtIJkqamlbkfkNRP0gOS5kpaJOlIAEmDJT0taYqkZyXdJOnw1JT4e0n7prgLJN0g6VFJL0j6lKT/THndk1uD4nxJsyQtljQ5tybHlNLK2pKel3SppLnAMZLGSHo8le220loaZmZmVj9daB2LWcDOknaQtBFwLDC1LOYXZK0VSBpA1jXqudYydcXC7P32AOa2cnwYMC4iPgq8DRwdEcPIFtX7fulLP9laFd8Hdk2PzwIHAWcC/5bLbyeyysxY4KfAQxGxJ9kaIEekmKsiYkREDAH6svY6HCUrUlnuB84DDk/7s4FvtPP+zczMrJtLvS5OJVtQ93fAzyJiiaSLJJV6aUwDVkh6imxtsbMiYkVr+borlFkrJP2QrELwDtm0bPdFxKulw8B/SDoEaCYb9LRVOrY0IhalPJYAD0REpEUCB+cu8ZuIWJ3SG4B7Uno+brSkbwEfADYHlgC/qlDcW9Pf/YHdgRmpnrMRadG/Cvc3Efh/7N15nFxVnf//17uTkISEffuCQwgiewgh3UEhikEhLjigsoQA40TUgOOg4wx+xYHBADqo4E9BZYlOJijKJsJEyJeAQBQhhOwbi44sgqAECEhiCCH9+f1xT5Gborq7bndV9ZL3M4969L3nnnvu51ZXderU2SYD7D5sWEdPh5mZmXWSANFzpoWKiJnAzLK083PbQfbFZNVfTrpiYbapFcDxpZ2I+Fxq/isNgFqTy3sqsBPQnCoHTwKD0rF1uXytuf1WNn3frUvXaZW0PjbOH9gK9Jc0iGzl8paIeDotjjeIykqxiawCNLGjm00DuaZCtkBeR/nNzMzM2uKuUGabugcYJOmzubQt28i7DfB8qlQcCexRh3hKlYgX0jiJE6o450FgrKR3AEgaImmfOsRmZmZmBTSpMY/u4hYLs5zUXemjwHdS96OVZC0BXyYb35D3U+CXqRvTfODROsTzsqQfAsuBP9POTAy5c1ZKmgRcJ2lgSj4P+F2t4zMzMzMrccXCrExEPEc2O0Il03P5XgAOayPfiFy+SbntJ0vHylfQjoihue0pue3zyCoG5XHmyx1eduweYEwbsZmZmZnVnCsWZmYd6Ned7coN1FTwPnfcamDHmbpg0UUfqDrvdmP+uVDZq+Z9v2g4vVYDF+TqdvW81+J/B+r7vPfm32v/fptpT3ypV//eqrGZ/mbNzMzMzKyWXLEwK0DSKZI8L6uZmZkV1oMWyKsLVyzMEkm7SPqZpMclLUgrV38sd/xTwM4R8cdujHGSpIp9OCTNlLRto2MyMzMzA4+xMAMgrZh9K3BNRJyS0vYgWxEbgIj4rxpfs19EbKhVeRHx4VqVZWZmZrUloMljLMw2C+8DXo+Iq0oJEfFURHxPUj9Jl0iaJ2mppDMgq4yk9OWSlkmakNKbJF0h6VFJd6WWhBPSsSclfVPSQuBESZ9J5S6RdLOkLVO+6ZKukjRf0u8kfSQX626S7pD0e0nfKiWmsndM26dJekjSYklXS+pX92fQzMzMNmtusTDLHAgsbOPYp4BXImJMWhfifkl3AqOBUcDBwI7APEm/AcYCw4EDgJ2BR4BpufJejIjRAJJ2iIgfpu2vpWt9L+UbDhwK7AXcW1rwLl3zELJVux+T9L2IeLpUuKT9gQnA2LR43xVkq4T/uDNPjJmZmdVGH2+wcMXCrBJJPwDeDbwOPAWMLLU6kK24vXc6fl3qzvQXSb8mWzvi3cBNEdEK/FnSvWXF35DbHpEqFNsCQ4FZuWM3pjJ+L+lxYL+UfndEvJLifJhsxe+nc+e9H2gmq+hAtrDf823c52RgMsDuwzwm3czMzDrPFQuzzArg+NJORHwudSuaD/wROCsi8h/6kfShTl5rTW57OvDRiFiSVsselzsWZeeV9tfl0jbw1vexyMaKfKWjQCJiKjAVoLm5pfx6ZmZmVkNex8Js83APMEjSZ3NpW6afs4DPShoAIGkfSUOA+4AJaQzGTsARwEPA/cDxaazFLmxaWSi3FfBcKvvUsmMnpjL2At4OPFblvdwNnCBp5xTv9mkgupmZmVnduMXCDIiIkPRR4DuS/i+wkqxl4cvATWTjHRam2aNWAh8FbgEOA5aQtSb834j4s6SbybojPUzWRWkh8Eobl/4PYG4qcy5ZRaPkj2QVla2BMyPitWq+6YiIhyWdB9wpqQlYD3yOrEuXmZmZdYPuXmOiEVyxMEsi4jng5DYO/3t6lPtSeuTLaZV0dkSslrQDWeVgWTo2vCzvlcCVbVzzVxFxZln+6WTdp0r7H8ltD89t38CmYznMzMzM6soVC7P6uC0tVrcFcFFE/Lm7A+pO69YXW65ji/7Femn29T6rjdLaWmyYzV/Xri+Uf9shWxTKv6FAPM/df1mhsg8+945C+RdcOL5Q/v79qn8NRxR73jen13tPem56UixF9bTY17/RWnXeAQX/P+jp+vo6Fq5YmNVBRIzr4vmTahOJmZmZWWO4YmFmZmZm1gB9u73CPngBlAAAIABJREFUs0KZ1Y2k1WX7kyR9P22fKekT7Zw7TtLhVVyj3XLMzMzMGsUtFmbdICKu6iDLOGA18EBnypHUPyLe6Fx0ZmZmVg89aexNPbjFwqwbSJoi6ey0/XlJD0taKul6ScOBM4EvSlos6T2Shku6J+W5W9KwCuXMlvRdSfOBL0hqlvRrSQskzZK0azfdrpmZmW0G3GJhVj+DJS3O7W8PzKiQ7xxgz4hYJ2nbiHhZ0lXA6oi4FEDSL8lW075G0unA5WRraZTbIiJa0oJ7vwaOi4iVkiYAXwdOz2eWNBmYDLD7sGFdu1szMzPbrLliYVY/ayNiVGlH0iSgpUK+pcBPJd0K3NpGWYcBH0/bPwG+1Ua+0toV+wIjgLtSs2s/4LnyzBExFZgK0NzcUmw+QjMzM6uagKa+3RPKFQuzHuAY4Ajg74FzJR3UhbLWpJ8CVkTEYV0NzszMzKwaHmNh1o0kNQG7R8S9wJeBbYChwKvAVrmsD7BxVfBTgfs6KPoxYCdJh6XrDJB0YC1jNzMzswIk1KBHd3HFwqx79QOulbQMWARcHhEvA78EPlYavA2cBXxS0lLgH4AvtFdoRLwOnAB8U9ISYDHQ4fS1ZmZmZp3lrlBmdRIRQ8v2pwPT0/aU3KF3Vzj3d8DIsuT3Vcg3Jbc9ruzYYrIuVmZmZtYD9PHZZl2xMLNMABHVj98u0tQ6cEC/TkRktVDkd9pUcFThtkO2KBpOIf0KxNM0oFgD/JKvf7BQ/u0O/7dC+Vc98O2q8/b1ee27oic9Nz0plqJ6WuwD+rvDTF/VZsVC0vfIPmtUFBGfr0tEZmZmZmZ9UE+r5NVaey0W8xsWhVkFklbnuxOVpmuNiH+WdCbwt4j4cRvnjgNej4h2V67uqJxak7QtcEpEXFHDMmcDZ0fEfElPkj1HL9SqfDMzM7NqtFmxiIhr8vuStoyIv9U/JLOORcRVHWQZB6wmm02pcDmS+kfEG52Lrl3bAv8EFKpYSOoXERvqEI+ZmZk1wOawjkWHndwkHSbpYeDRtH+wpJp922rWGZKmSDo7bX9e0sOSlkq6XtJw4Ezgi6VZlSQNl3RPynO3pGEVypkt6buS5gNfkNQs6deSFkiaJWnXCnFMl3S5pAckPS7phJQ+NF1noaRlko5Lp3wD2CvFdYmkcZJuy5X3/dQyg6QnJX1T0kLgREnjJc1JZd4kaWh5PGWxnSbpoXStqyV5oIOZmZnVTTWDt78LfACYARARSyR5phlrhMGSFuf2tye9DsucA+wZEeskbRsRL0u6ClgdEZcCSPolcE1EXCPpdOBy4KMVytoiIlokDQB+DRwXESslTQC+Dpxe4ZxdyWZ22i/F93PgNeBjEfFXSTsCD0qakWIdUVqRO3XZas+LETE6lfEL4KiIWCPpy8C/AhdWOknS/sAEYGxErE9fBpwKNKTLl5mZmb3V5jzG4k0R8XTZE+EuGdYIa0sfwGHjGIsK+ZYCP5V0K3BrG2UdBnw8bf8E+FYb+W5IP/cFRgB3pdd+P+C5Ns65NSJagYcl7VIKF/jPVAlvBd4G7NLG+e0pxfMu4ADg/hTPFsCcds57P9AMzEv5BwPPl2eSNBmYDLD7sGGdCM/MzMwsU03F4mlJhwORvsX9AvBIfcMyK+QYsvUa/h44V9JBXShrTfopYEVEHFbFOety26Ua+KnATkBzajF4EhhU4dw32LRLYnmefDx3RcTEKuIp5b8mIr7SXqaImApMBRjd3FL9vKRmZmZWWN9ur6hu5e0zgc+RfeP6LDAq7Zt1O0lNwO4RcS/wZWAbYCjwKrBVLusDwMlp+1Tgvg6KfgzYSdJh6ToDJB1YILRtgOdTpeJIYI+UXh7XU8ABkgamGaPe30Z5DwJjJb0jxTNE0j7tXP9u4ARJO6f820vao538ZmZmZl3SYYtFmrby1AbEYtYZ/YBrJW1D9kXA5WmMxS+Bn6dB02elx39L+hKwEvhke4VGxOtpIPblqez+ZOONVlQZ10+BX0paRjZ186Op3Bcl3S9pOfD/IuJLkm4ElgNPAIvaiGdl6gp2naSBKfk84Hdt5H9Y0nnAnanytZ7sC4GnqozfzMzMakiCpj4+xkIdrcoq6e3AZWR9vIOsX/cXI+Lx+odnZo0yurkl7n9wXtX5+/oAtL6iXqup9zRF7hOK32s9V942s+4x9p0tLFgwv2F/+Hba68A47j9v6DhjDfzXyQctiIhK41LrqpquUD8DbiSb+WY34CbgunoGZWZmZmZmvUs1g7e3jIif5PavTd1JzKwPEb37G+t62tBav3Ht/eq8WlKR32k97xOK32uReOpZNhRvgdjt9J9VnffZaacUKruoovda79fk5uKVv60vlH+bLQfUKRLrSfr6f7NtViwkbZ82/5+kc4DrybpCTQBmNiA2MzMzMzPrJdprsVhAVpEo1a3OyB0LoN1pLM2sfiStjoh2V942MzOznqWv9wxos2IREXs2MhAzMzMzM+u9qlp5W9IIslV/31y8KyJ+XK+gzKw6abzTScBA4JaI+Kqk4cBtETEi5TkbGBoRU7orTjMzM9uMx1iUSPoqMI6sYjET+BDwW8AVC7NuJGk8sDdwKFmXxRmSjgD+2K2BmZmZ2WapmhaLE4CDgUUR8UlJuwDX1jcsM6vC+PQoLao3lKyiUXXFQtJkYDLA7sOG1To+MzMzS4T6/AJ51VQs1kZEq6Q3JG0NPA/sXue4zKxjAi6OiKs3SZT+jk3XqBlEGyJiKjAVoLm5pb5zjZqZmVmfVs0CefMlbQv8kGymqIVkq2+bWfeaBZwuaSiApLdJ2hn4C7CzpB0kDQQ+0p1BmpmZGaBsjEUjHt2lwxaLiPintHmVpDuArSNiaX3DMrO2SOoPrIuIOyXtD8xJ09etBk6LiOclXQg8BPwJeLT7ojUzM7PNRXsL5I1u71hELKxPSGbWgQOBPwBExGXAZeUZIuJy4PIGx2VmZmbt2GzXsQC+3c6xAN5X41jMrAOSzgQ+D/xLd8eyOenX1Lf/IyjpaXdZ5GmPKDZEqN6/02ennVJ13u0+cHGhslfNKrY+bb3vtbW1+ue+aTN5LwEM3qJfd4dg1nDtLZB3ZCMDMbOORcRVwFXdHYeZmZkVV83g5t6sr9+fmZmZmZk1gCsWZg0iKSRdm9vvL2mlpNsKlrObpJ/XPkIzMzOrF5GNsWjEo7u4YmHWOGuAEZIGp/2jyWZtqpqk/hHxbEScUPPozMzMzLqgw4qFMqdJOj/tD5N0aP1DM+uTZgLHpO2JwHWlA5IOlTRH0iJJD0jaN6VPkjRD0j3A3ZKGS1qeO/YLSXdI+r2kb+XKG5/KWyjpptJ6F2ZmZmb1UE2LxRXAYWQfggBeBX5Qt4jM+rbrgZMlDQJGAnNzxx4F3hMRhwDnA/+ZOzYaOCEi3luhzFHABOAgYIKk3SXtCJwHHBURo4H5wL/W/G7MzMysak1qzKO7dLhAHvDOiBgtaRFARKyStEWd4zLrkyJiqaThZBX1mWWHtwGukbQ32ZTOA3LH7oqIl9oo9u6IeAVA0sPAHsC2wAHA/amv5RbAnPITJU0GJgPsPmxY527KzMzMjOoqFusl9SP7oIOknYDWukZl1rfNAC4FxgE75NIvAu6NiI+lysfs3LE17ZS3Lre9gex9LbLKyMTKp2QiYiowFaC5uaXYQgBmZmZWSF9fyqWarlCXA7cAO0v6OvBbNu2iYWbFTAMuiIhlZenbsHEw96QuXuNBYKykdwBIGiJpny6WaWZmZtamDlssIuKnkhYA7yf7FvSjEfFI3SMz66Mi4hmyCnu5b5F1hToPuL2L11gpaRJwnaSBKfk84HddKdfMzMw6R6Jbp4JthA4rFpKGAX8DfplPi4g/1jMws74mIt4yK1NEzCZ1eYqIOUC+VeG8lD4dmJ4750lgRBvHPpLbvgcYU5vozczMzNpXzRiL28nGVwgYBOwJPAYcWMe4zKzBAmhtrc8wi6a+3qm0gYr+joo89z3t9xQFbrWnxV7EqllfKZR/u2O+Xaz82/+tUP6ievNzX8T6N4oNL40iL2DbbPT1t0s1XaEOyu9LGg38U90iMjMzMzOzXqfwytsRsRB4Zx1iMatI0hmStuvuOLqTpKGSzlRf75xpZmbWh2XjLOr/6C7VjLHIL6rVRLZQ17N1i8h6NUkB/DQiTkv7/YHngLn5/v8FyjsfeDQiVnWQ70mgJSJeKB51baVYno6I9+TSFgP9I2JEZ8qMiNWSnga+Bpxbk0DNzMzMaqiaMRZb5bbfIBtzcXN9wrE+YA0wQtLgiFgLHM3GKVSrIql/RLwBEBEX1iHGRthK0u4R8bSk/WtRYETcThdnizIzM7PuIaCpj3c8aLcrVFoYb6uIuCA9vh4RP42I1xoUn/VOM4Fj0vZE4LrSAUmHSpojaZGkByTtm9InSZoh6R7g7rTuwjRJD6W8x6V8/SRdKmm5pKWSzspd9yxJCyUtk7RfB9fbUtKNkh6WdIukuZJa0rHx6ZyFkm6S9JbZnKpwIzChjedguKT7UvkLJR2e0sdJmi3p55IelfTTUtcnSc2Sfp3yz5K0a0ofk56HxZIukbQ89zxdImleOn5GJ+7BzMzMrGptVizSt8YbgLENjMf6huuBkyUNAkYCc3PHHgXeExGHAOez6WKLo4ETIuK9ZN197omIQ4EjgUskDQEmA8OBURExEvhp7vwXImI0cCVwdgfX+ydgVUQcAPwH0AwgaUeyaV6PSmXNB/LdAat1M/DxtP335KZrBp4Hjk7lT2DTNS0OAf4FOAB4O9kidwOA7wMnpnOm5+7jv4EzImIU2arbJZ8CXomIMWRTzn5G0p7lQUqaLGm+pPkvvLCyE7dpZmZm1Wpq0KO7tNcV6iGyD3qLJc0AbiLr5gJARPyizrFZLxURSyUNJ/umfmbZ4W3IFoHbm2yG0wG5Y3dFxEtpezxwrKRSBWEQMAw4Crgq11Xqpdz5pdfkAjZ+qG/reu8GLktlLJe0NKW/i+xD/f2psWALYE6R+09eBFZJOhl4hGwtmJIBwPcllSoD+bUrHkoL6JXGZQwHXk55bkwx9QdelrQtWYtiKb6fAaVxLOOBkZJOyD0PewNP5IOMiKnAVIDRzS2eG9HMzMw6rZoxFoPIPiS9j43rWQQbP8SZVTIDuBQYB+yQS78IuDciPpYqH7Nzx9bktgUcHxGP5QvtYFKkdennBja+ttu7XiUiq+BMbDODtDsbWyCuioir2sh6A/ADYFJZ+heBvwAHk32xkO9auC63XboPAb+PiHFlcWzbwX2cFRGz2sljZmZmDdTHh1i021qyc5oRajmwLP1ckX4ub0Bs1rtNAy6IiGVl6duwcTD3pHbOn0U2ZqI0xuCQlH4XcEaabQpJ23cQR1vXux84KZVxAFBar+VBsu5H70jHhkjKtygQEU9HxKj0aKtSAXAL8K10L+UxPRcRrcA/AP06uIfHgB0lHZZiGiDpoIh4GXhVUmn655Nz58wCPpu6USFpn9SVzMzMzKwu2qtY9AOGpsdWue3Sw6xNEfFMRFxe4dC3gIslLaL9FrOLyLoMLZW0Iu0D/Aj4Y0pfApzSQShtXe8KYCdJD5NN4bqCbEzCSrIKyHWpe9QcYL8OrlFRRLwaEd+MiNfLDl0B/GOKfz82bampVM7rwAnAN9M5i8m6bEE2luKHqdvUEOCVlP4j4GFgYRrQfTXVtVCamZmZdYraWnJe0sI0UNSsz0kzng2IiNck7QX8Cti3QiWgR5M0NCJWp+1zgF0j4gudKWt0c0v8ds68msZX0tTUx9t+G6i1tdhQmN783Be51958n0Vtd8y3C+Vfdfu/1SmSzcv6N1oL5W9t4/NVWwYO6Kjx2mpt7DtbWLBgfsP+eOy2z0HxqcsbM5Lgax/aZ0FEtDTkYjntfYO5+fyVts3RlsC9qauQgH/qbZWK5BhJXyF7Lz9F+93L2iV6zoezDQU/PPfrIXE3Qk/5HUH9f09F+iL3tNdMW1/aVdLB2LG3KFpR2P7kaYXyv3jdJwvlLxp/bzWgf7G5doq+JosqUv7a1zd0nCln6CA3cFvntPfKeX/DojBrsIh4FWh4Tb7WIuIGskHiZmZm1sP19Xp4m9Xvsmk8zTZrksZKOqK74zAzMzPrqbpzDQ2zLpEUkq7N7feXtFLSbR2cN0rSh3P7U3LrZVTKfwjwSTq3nkWl8oZLOiW33yKp0kB3MzMz60Oa1JhHt91f913arMvWACMkDU77R7Nxatn2jAI+3GGuJCIWRcSnI2J9J2KsZDi52awiYn5EfL5GZZuZmZl1C1csrLebCRyTticC15UOSDpU0hxJiyQ9IGlfSVsAFwITJC2WNCFlP0DSbEmPS/p8rozTJD2U8l6dZpNC0mpJl0haIelX6Vql849NeYZLuk/SwvQ4PBX7DeA9qcwvShpXamWRtIOkO1O5P5L0lKQdU1nLc3GdLWlK2t5L0h2SFqTr7ZfST5S0XNISSb+p+TNvZmZmVRPQJDXk0V1csbDe7nrgZEmDgJHA3NyxR4H3RMQhwPnAf6aZn84HbkgL3JUGPu8HfAA4FPhqWoRuf2ACMDYiRpGthH1qyj8EuCciDgReJVsL42jgY2QVF4DngaPTtM0TgFJ3p3OA+9L1v1N2P18FfpvKvQUYVsVzMJVsle1m4GyydTJI9/mBiDgYOLbSiZImS5ovaf7KF1ZWcSkzMzOzyjyfmPVqEbFU0nCy1oqZZYe3Aa6RtDcQZAvuteX2iFgHrJP0PLAL2cxozcC8NJ3iYLLKAsDrwB1pexmwLiLWS1pG1tWJdL3vSypVSjZZwbsNRwAfT/d2u6RV7WWWNBQ4HLgpN+XjwPTzfmC6pBuBihNnR8RUsooJzc0t9Z0b0czMbDPX12eFcsXC+oIZwKXAOGCHXPpFwL0R8bFU+ZjdThnrctsbyN4bAq6JiK9UyL8+Nk5U31o6PyJaJZXeV18E/gIcTNY6+FrVd/RWb7BpC+Og9LMJeDm1qGwiIs6U9E6yrmILJDVHxItdiMHMzMysTe4KZX3BNOCCiFhWlr4NGwdzT8qlvwpsVUW5dwMnSNoZQNL2kvYoENc2wHMR0Qr8A1BaVrW96/+GNLBb0oeA7VL6X4Cd0xiMgcBHACLir8ATkk5M50jSwWl7r4iYGxHnAyuB3QvEbmZmZrXUoBmhPCuUWRdExDMRUWm61m8BF0taxKatc/eSDdbOD96uVO7DwHnAnZKWAncBuxYI7QrgHyUtIRvDsSalLwU2pEHVXyw75wLgCEkryLpE/THFsp5s7MZDKY5Hc+ecCnwqXWcFcFxKv0TSsjTo+wFgSYHYzczMzApxVyjrtSJiaIW02aQuTxExh03HNZyX0l8CxrRT7ojcdsWVrfPXjogplY5FxO/JBpSXfDmlrwfeV1ZkKeYXgfGlRElP5sq9nI0DwPPXewL4YIX0j7/l5szMzKzbiL49yMIVCzOruzc2tNb5Cn37D3Wj3P+/LxTKP/YdO9YpkowKjHJ8ff2GQmWvL/ia3Hpwe3M/vFWR2OvtpetPL5T/jBuXFsp/9UkjO860Gdo4DK9axV4z/Qr0dxk6yB/3rDH8SjPrwSJieHfHYGZmZl2XrWPR3VHUl8dYmNWZpJB0bW6/v6SVpUXxzMzMzPoCVyzM6m8NMELS4LR/NBtnq6qL3JS3ZmZmZg3hioVZY8wkW08CssX8risdkDRE0jRJD0laJOm4lH5gSlssaamkvSUNT7M8lc49W9KUtD1b0nclzQe+IKlZ0q8lLZA0S1KRGa3MzMysxjzdrJnVwvXAyZIGkc0UNTd37Fzgnog4FDiSbJrYIcCZwGVp8bsW4JkqrrNFRLSQzR71PeCEiGgmW+vj6zW7GzMzM7My7i5h1gARsTSt/j2RrPUibzxwrKSz0/4gYBgwBzhX0t8Bv4iI31cx00xpatx9gRHAXemcfsBz5ZklTQYmA+w+bFixmzIzM7NCetKMcfXgioVZ48wALgXGATvk0gUcHxGPleV/RNJcsi5UMyWdAfyOTVsaB5WdU1qET8CKiDisvYAiYiowFaC5uaXo3IhmZmZmb3JXKLPGmQZcEBHLytJnAWcpfY0h6ZD08+3A42lhvP8h60L1F2BnSTtIGgh8pI1rPQbsJOmwVNYASQfW/I7MzMysKqXpZj3Gwsy6LCKeSZWEchcBA4ClklakfYCTgOWSFpN1a/pxWrX7QuAh4C7g0Tau9TpwAvBNSUuAxcDhtbwfMzMzszx3hTKrs4gYWiFtNjA7ba8FzqiQ5xvANyqkX042OLs8fVzZ/mLgiM5FbWZmZjUl6ONDLNxiYWZmZmZmXecWCzOru/79/B1GbzD2HTt2dwidNniLfoXyN62vUyCdEFFs3oR6zypz9UkjC+Xf7iPfqTrvqtu+WDScXmtDa7Hfa/9iL+FCisbSrzs76fdxTX28ycL/25uZmZmZWZe5YtFLSApJ1+b2+0taKem2tH+spHPS9pTSmgiSLpR0VNr+F0lbVnGtH0k6oIvx/jzNalTaH5Xu4YMdnPfvue0tJP1GUuGWNUmHppWofy9poaTbJR1UtJxGSr+f1yRt092xmJmZWW31tFmhJH1Q0mOS/rf0GbKNfMenz3AtHZXpikXvsQYYIWlw2j8a+FPpYETMSIN9NxER50fEr9LuvwAdViwi4tMR8XB5uqSqGmrTtKb9IuLxXPJE4LfpZ6VzJKkJeLNikWY2uhuYUM11c2XtAtwI/HtE7B0Ro4GLgb2KlNNBnBX3u2giMA/4eBvXdtdFMzMz67L0me4HwIeAA4CJlb5UlrQV8AVgbjXlumLRu8wkWywNsg+h15UOSJok6fvlJ0iaLukESZ8HdgPulXRvOnalpPmSVki6IHfO7FKtVNJqSd9OU5YeJuk0SQ9JWizp6jYqG6eSrbtQKk/AicAk4GhJg1L68FRT/jGwHPgvYHAq+6fp9FtTeUX8M3BNRDxQSoiI30bErfnnJBff6tz2lyTNk7S09JxUiPM9Zfu7VzqvCEl7AUOB88hVvtLvdYake4C70/6tku6S9KSkf5b0r5IWSXpQ0val8iTdIWmBpPsk7Vc0JjMzM6stqTGPKhwK/G9EPJ6+yL0eOK5CvouAbwKvVVOoKxa9y/XAyemD+UiqrD3Cm1OUPgscGRFHpuRzI6IllfVeSZVG7A0B5kbEwcCLZK0HYyNiFLCByh/6xwILcvuHA09ExB/Iplg9Jndsb+CKiDgwIj4JrI2IURFRKnc5MKba+0wOBBYWPAdJ41M8hwKjgGZJpela34wTeKpsf992zqvWyWS/3/uAfVOrS8lo4ISIeG/aH0HWqjEG+Drwt4g4BJgDfCLlmQqcFRHNwNnAFQXjMTMzs77rbcDTuf1nUtqbJI0Gdo+I26st1F0repGIWCppONk32jNrUORJkiaTvQ52JWsKW1qWZwNwc9p+P9AMzMsaIRgMPF+h3F2Blbn9iWQfmkk/P5Er86mIeLCtACNig6TXJW0VEa9We2N5kuYCWwN3RsQX2sk6Pj0Wpf2hZBWGP1aIM7/f1nm/KRDmROBjEdEq6WayFp5SC9RdEfFSLu+96bl4VdIrwC9T+jJgpKShZJW5m7Txa4uBlS6afv+TAXYfNqxAuGZmZlaMaKJhs0LtKGl+bn9qREyt9uTUzfv/I+ttUjVXLHqfGcClwDhgh84WImlPsm+yx0TEKknTgUEVsr4WERtKp5F1MfpKB8WvLZWVukodDxwn6dxUxg6pzx5kY0c6MpCyJjhJnwM+k3Y/HBHP5g6vIPuW/38AIuKdqevTR9LxN0itdemNs0Xu/i6OiKvLrjW8Qpz5/YrnlZXxMeCraffTETE/d+wgsorIXakisAXwBBsrFuXXXpfbbs3tt5K9p5uAl1OrUrvSH5mpAM3NLcXmIzQzM7Oe6oXUK6UtfwJ2z+3/Hbmxu8BWZD0kZqfPJv8HmCHp2PxnmHLuCtX7TAMuiIhlnTj3VbIXCmTf4K8BXkndbj5Uxfl3AydI2hlA0vaS9qiQ7xHgHWn7/cDSiNg9IoZHxB5krRUfa+Ma6yUNKO1I2oHszbHJrPMR8YPUZWpUWaUCssFIkyQdnkvLD1p/kqzlBeBYoHS9WcDp6Rt/JL2tdK8d6PC8iLglF2/5G3IiMCU9P8MjYjdgtzae2w5FxF+BJySdmOKRpIM7U5aZmZn1SfOAvSXtKWkLsi7ZM0oHI+KViNix9NkEeBBot1IBrlj0OhHxTBov0RlTgTsk3RsRS8i67jwK/Ay4v4prP0w2uPhOSUuBu8i6PZW7naxFBbIPzbeUHb+ZNmaHSjEuzQ3ePjKVV7WI+DPZWJCLlU2h9gBwAhtbAH5INqZkCXAYqUUgIu4key7mSFoG/JyNFbH2rtep83JO5q3P0S0pvbNOBT6V7nEFlQdkmZmZWYOInjN4OyLeIJvsZhbZF8I3RsQKZcsUHNvpeyy64qdZR5RNiXsv2SDvDR3l76CsXwDnRMTvahKctam5uSXun9vuFxFmfca69cX+NA0cUL9lkXvayttFeeXtynrSa8wrb1c29p0tLFgwv2E3u8d+I+PL02Z0nLEGPjd2zwUddIWqC4+xsJqLiLWSvko2u8AfO1tOapq71ZUKs8575W/rO86UDBxQrBF7UB0/CNVb0Q9xD/7hxUL537VX9UPgelpFoagilYXJNywpVPbUCb23F+dr61sL5a9nxWJzqSj0eAUWr+utXLGwuoiIWTUo43XgxzUIx8zMzMzqzBULszqQdAzwdESUT99rZmZmm6mmXt5C2REP3jYDJIWka3P7/SWtlHRb2j9W0jlVlvVB4L1k60oUjePNVc87ce6Zkj7RcU4zMzOz2nOLhVlmDTBC0uCIWAscTW4+54iYQW4atvZExB3AHZWOKetMrYgo1vm2uuteVesyzczMrDZKs0L1ZW6xMNtoJnBM2p4IXFc6IGmSpO+n7emSLpf0gKTH0+J7pXxfkjRxNN1sAAAgAElEQVRP0lJJF6S04ZIek/RjYDmwu6QrJc2XtKKUr5ykiZKWSVou6Zu59E9J+p2khyT9MBfXFElnp+29JN0haYGk+yTtV9NnyszMzKyMKxZmG10PnCxpEDASmNtO3l2Bd5Ot5v0NAEnjyVbQPhQYBTRLOiLl3xu4IiIOjIingHPTNHAjydbUGJkvXNJuwDeB96Wyxkj6aEr/D+BdwFigrQrDVOCsiGgmW2H9ikqZJE1OFZz5K19Y2c7tmpmZWVc1SQ15dBd3hTJLImKppOFkrRUzO8h+a+rO9HBauRxgfHosSvtDySoUfwSeiogHc+efJGky2XtwV+AAID/QewwwOyJWAqQFA0uVlF9HxEsp/SZgn3xgaQXww4GbctNYDmzjnqeSVUJobm7xojZmZmbWaa5YmG1qBnAp2crh7U1Evy63rdzPiyPi6nzGVFlZk9vfk6wVYUxErJI0HRjUxbjzmoCXI2JUDcs0MzOzLvIYC7PNyzTggogoPKMTMAs4PbUYIOltknaukG9rsorGK6m140MV8jxE1kVqR0n9yFpRfg3MS+nbSeoPHF9+YkT8FXhC0okpDknqvatMmZmZWa/gFguznIh4Bri8k+feKWl/YE7qgrQaOA3YUJZviaRFwKPA08D9Fcp6Lk1vey9ZS8jtEfE/AJL+k6zi8VIq45UK4ZwKXCnpPGAA2fiRYkvempmZWc2Ivv+NvisWZkBEDK2QNhuYnbanA9PT9qS2zo2Iy4DLKlxiRNk5kyrkISLG5bavIzczVc7PImJqarG4Bbg15Z+SO/cJ4IOVrmFmZmZWD65YmPU+UyQdRTYu405SxcKskm22HFB13r+uXV+o7AH9in331q+p93Yuftde7Q256poNrfWdN6EnPe9TJxTrlbnHmTcVyv/UVScWyl/P576v96W3ThCoj78wXLEw62Ui4uzujsHMzMysXF/v6mXWEJJC0rW5/f6SVkq6rYExnCnpE426npmZmVmeWyzMamMNMELS4IhYCxwN/KmRAUTEVY28npmZmRXTtztCucXCrJZmAsek7YnkBl5L2l7SrZKWSnqwtNK2pCmSpkmaLelxSZ9P6cMlPSLph5JWSLpT0uB07DOS5klaIulmSVvmyjo7be8l6Q5JCyTdJ6mtFbrNzMzMasIVC7PauR44WdIgYCQwN3fsAmBRRIwE/h34ce7YfsAHgEOBr0oqjbbdG/hBRBwIvMzGNSt+ERFjIuJg4BHgUxVimQqcFRHNZIvxXVGLGzQzM7POEdAkNeTRXdwVyqxGImJpWmV7IlnrRd67SRWDiLhH0g6Stk7Hbo+IdcA6Sc8Du6T0JyJicdpeAAxP2yMkfQ3YFhhKtjDfm9ICfYcDN+VmnxhYKWZJk4HJALsPG1bkds3MzMw24YqFWW3NAC4FxgHVzk+5Lre9gY3vy/L0wWl7OvDRtNDepHStvCbg5YgY1dGFI2IqWesGzc0t9Z3z0szMbDPnMRZmVsQ04IKIWFaWfh/ZathIGge8EBF/7eQ1tgKeS12mTi0/mMp9QtKJ6XqSVGzyeDMzM7OC3GJhVkMR8QxweYVDU4BpkpYCfwP+sQuX+Q+y8Rsr08+tKuQ5FbhS0nnAALLxH0u6cE0zMzProj6+Pp4rFma1EBFDK6TNBman7ZeAj1bIM6Vsf0Rud0Qu/dLc9pXAle2VFRFPAB+s+gbMzMzMusgVCzMzMzOzuhPq400WrliYWd1FFBsX3tf/8PZUWw8e0HGmnA2t9R3vX+R1U/Q105Nek/2a/Hpvy1NXnVgo/3ZHnl8o/6p7LyyUv4ghA3vOR6ye9HqH+r63rXv1nFe9mZmZmVkfJfr+rEl9/f7MGkZSSLo2t99f0kpJt3WyvB9JOqB2EZqZmZnVj1sszGpnDdnidYMjYi1wNPCnzhYWEZ+uWWRmZmbW7fp61y63WJjV1kzgmLQ9EbiudEDSFEln5/aXSxouaYik2yUtSWkT0vHZklrS9icl/U7SQ5J+KOn7KX26pBNyZa7ObX9J0jxJSyVdUNe7NjMzs82eKxZmtXU9cLKkQcBIsnUmOvJB4NmIODhNN3tH/qCkXYELgLHAu4EOu0dJGg/sDRwKjAKaJR1R5EbMzMysttSgR3dxxcKshiJiKTCcrLViZpWnLQOOlvRNSe+JiFfKjr8TmB0RKyPideCGKsocnx6LgIXAfmQVjU1ImixpvqT5K19YWWW4ZmZmZm/lioVZ7c0ALiXXDSp5g03fc4MAIuJ3wGiyCsbXJBWZL/HNMiU1AVukdAEXR8So9HhHRPxX+ckRMTUiWiKiZacddypwWTMzM7NNuWJhVnvTgAsiYllZ+pNkFQgkjQb2TNu7AX+LiGuBS0p5cuYC75W0g6QBQH5i9yeB5rR9LFBaiGAWcLqkoekab5O0c9dvzczMzDpF2eDtRjy6i2eFMquxiHgGuLzCoZuBT0haQVZZ+F1KPwi4RFIrsB74bFl5z0maAswBXgYW5w7/EPgfSUvIxmasSefcKWl/YE76A7MaOA14vhb3aGZmZlbOFQuzGomIoRXSZgOz0/ZasnEP5Z4ka2EoP3dcbvu/gf8GkDQJaEnpfwHelTvty7lzLgMuK3YXZmZmVg9eIM/MzMzMzKwKbrEw62UiYjowvZvDKGTlq68Xyr/z1gPrFMnm57X1G6rOO2hAv0Jl92uqbz/eevYT7uuLVPUVRV6/AKvuvbBQ/t1O/1nVeZ+ddkqhsuv9/iiip73ee1o8jdTX790tFmZ1JukMSdt1dxxmZmZm9eSKhVknSApJ1+b2+0taKem2snznA6siYlUb5by5unY71/qRpAPS9pOSdkzbq9s7z8zMzHqWvr5AnrtCmXXOGmCEpMFpUPbRwJ/KM0VEsXb5CiLi010tw8zMzKze3GJh1nkzgWPS9kRyC+JJGiJpmqSHJC2SdFxKHyzpekmPSLoFGJw758q0CvYKSRfk0qtp1fiSpHmSlpbOTTHcLmmJpOWSJtTu1s3MzKwoqTGP7uIWC7POux44P3V/Gkm2MN570rFzgXsi4nRJ2wIPSfoVcAbZYnj7SxoJLMyVd25EvCSpH3C3pJERsbSjICSNB/YGDiVrAZ0h6QhgJ+DZiDgm5dumFjdtZmZmVolbLMw6KX3oH07WWjGz7PB44BxJi8nWsRgEDAOOAK7NnZ+vOJwkaSGwCDgQOKDKUManxyKyisp+ZBWNZcDRkr4p6T0R8Ur5iZImp1aS+StfWFnl5czMzKyobB0LNeTRXdxiYdY1M4BLgXHADrl0AcdHxGP5zG1NMydpT+BsYExErJI0nawyUg0BF0fE1RXKHQ18GPiapLvLx3xExFRgKkBzc0tUeT0zMzOzt3CLhVnXTAMuiIhlZemzgLOUahKSDknpvwFOSWkjyLpQAWxNNiD8FUm7AB8qEMMs4HRJQ1O5b5O0s6TdyLpdXQtcAowufHdmZmZWMx5jYWZtiohngMsrHLoI+C6wVFIT8ATwEeBK4L8lPQI8AixI5SyRtAh4FHgauL9ADHdK2h+Yk+oxq4HTgHcAl0hqBdYDn+3UTZqZmZlVwRULs06IiKEV0maTjacgTUF7RoU8a4GT2yhzUhvp43LbwyvFEBGXAZeVnfoHstYMMzMz63ZC3brKRP25YmFmnfLGhtaq8+689cA6RmLtGTSgX3eH0GkR1Q/7aWv8Uq2sfu2NQvmHDvJ/r7VQ79fvs9NOqTrvdsdfVajsF296y3dL7Wpq6tsfOG3z4DEWZmZmZmbWZa5YWLeSdIak7bo7DjMzM7N66+uDt12xsJqTFJKuze33l7QyLSSXz3c+sCoiVrVRTocrTrdxXoukSgOqa0rS/0mraP9B0gJJMyXtI2lc+b3mzvmRpGrXp+hMTA/Uq2wzMzOz9rgTqNXDGmCEpMFpsPLRwJ/KM5WvqVArETEfmF+PskvSNLK3ANdExMkp7WBglw5i+3Qb5fWLiA1VXLd/RLTZ2TsiDu+oDDMzM2u80gJ5fZlbLKxeZgLHpO2JwHWlA5KGSJom6SFJiyQdl9IHpxaARyTdAgzOnTNe0hxJCyXdlFuzYYykByQtSeVtlW8xkDQlXWu2pMclfT5X5mnpnMWSrpZUZJTgkcD6iHhzNF9ELImI+9LuUEk/l/SopJ/m1rN4sxVG0mpJ35a0BDhM0pOSdkzHWiTNzt3DTyTdD/xE0k6S7pK0IrWAPJU7b3Xu/r4kaZ6kpZIuKHBvZmZmZoW5YmH1cj1wsqRBZIvAzc0dOxe4JyIOJfuAfomkIWTrLPwtIvYHvgo0A6QPzecBR0XEaLLWiH+VtAVwA/CFiDgYOApYWyGW/YAPAIcCX5U0IK37MAEYGxGjgA3AqQXubwRpDYo2HAL8C3AA8HZgbIU8Q4C5EXFwRPy2g+sdQHb/E8mem3si4kDg58Cw8sySxgN7k93zKKBZ0hEV8k2WNF/S/JUvrOwgBDMzM+u0Bo2v8AJ51udExFJJw8laK2aWHR4PHCvp7LQ/iOzD8RGkxebS+UvT8XeRfbC+P33xvwUwB9gXeC4i5qVz/goVp528PSLWAeskPU/WXen9ZBWXeSn/YOD5rt53zkNp8TwkLQaGA+WVhw3AzVWWNyN1KwN4N/AxgIi4Q1KlMSrj02NR2h9KVtH4TT5TREwFpgI0N7dUP7enmZmZWRlXLKyeZgCXAuOAHXLpAo6PiMfymduZh17AXenb+nz+g6qMY11uewPZ615k4yO+0uZFpXcCV6fd8yNiRu7wCuCEgtcs91rZuIo32NiKOKgs75p2rlWJgIsj4uoOc5qZmVlDdGdrQiO4K5TV0zTggohYVpY+CzgrN+7gkJT+G+CUlDaCrAsVwIPAWEnvSMeGSNoHeAzYVdKYlL6VpGory3cDJ0jaOZ27vaQ98hkiYm5EjEqPGWXn3wMMlDS5lCBppKT3VHn9Sp4kdf8Cjm8n3/3ASema44FK0/XOAk7PjUV5W+lezczMzOrBFQurm4h4JiIqTft6ETAAWCppRdoHuJJs0PMjwIWkMQwRsRKYBFyXukfNAfaLiNfJxkl8Lw2Avou3ftPfVmwPk43buDOVeRewa4F7C7LuSEel6WZXABcDf662jAouAC6TNJ+slaO9fOMlLQdOTNd8tSy+O4GfAXMkLSMbi7FVF2IzMzOzLlKD/nXb/WWfj8yst5A0ENgQEW9IOgy4Mg1A75Lm5pa4f271s/S+saG16rz9+/k7DCuuyP9P7XSlrInVr7U5y3NFQwe5p3Ffs93xV3WcKefFm84olL+pqY/3kemBxr6zhQUL5jfsid9nxKj4wU2/asi1xh+w04KIKLwWWFf5L59Z7zMMuFFSE/A68JnuCKKelYXW1mJfePg/5Noo+kVT0e+levPvacjAIrNRF1P0ea93JcoqW3XzmYXyb3f014qVf9d5hfJb7yOgF/8ZrIorFmY9hKQzgBvbWom8JCJ+TzadrZmZmVmP4f4JZnUkKSRdm9vvL2llaQG/XPr5wKpSpULSKEkfzh0/VtI5DQvczMzMaq6vj7Fwi4VZfa0BRkganNahOBr4U3mmiLiwLGkU0EJaAyTNSlU+M1WbJPWPiGKdws3MzMy6wC0WZvU3EzgmbU8ErisdSFPnTpP0kKRFko5LK4pfCEyQtFjSBEmTJH0/nbOTpJslzUuPsSl9iqSfSLof+ImkA1O5iyUtlbR3Y2/bzMzM8vr6ytuuWJjV3/XAyZIGka3NMTd37Fzgnog4FDgSuIRsKt7zgRvSGho3lJV3GfCdiBhDtt7Fj3LHDgCOSosJnglclmaMagGeqf2tmZmZmWXcFcqsziJiqaThZK0VM8sOjweOlXR22h9ENutTe44CDsjNDLN1aSE8YEbqcgXZeh/nSvo74Bdp0Pcm0gJ/kwF2H9bRZc3MzMza5oqFWWPMAC4FxgE75NIFHB8Rj+UzS3pnO2U1Ae+KiNfKzoFsTAcAEfEzSXPJumHNlHRGRNyTPycipgJTIVvHouA9mZmZWQHdObC6EdwVyqwxpgEXRMSysvRZwFlKtQJJpWlkX6XtlbLvBM4q7UiquDiepLcDj6fVz/+HrBuWmZmZWV24YmHWABHxTPqAX+4isjEVSyWtSPsA95J1d1osaULZOZ8HWtKA7IfJxlJUchKwXNJiYATw4y7fiJmZmXVKaYG8Rjy6i7tCmdVRRAytkDYbmJ221wJnVMjzEjCmLHl6OvYCUF7ZICKmlO1/A/hGZ+I2MzMzK8oVCzMzMzOzuuvexesawRULMwMggA2t1Y/f7lfHttam7mzH7WPWv9Fadd6iz3s9XwPw5oQEPcLv/7y6UP59dm1riNRb9aT7tLbN/cNLhfKvuuu8QvnP/uUjhfJf+vf7F8pv1giuWJiZmZmZ1Vs3L17XCB68bdYOSRvSAOrlkm6StKWk4ZKWd3ds5SS1SKo0QNzMzMys7lyxMGvf2rT69QjgddqegalqkurSUhgR8yPi8/Uo28zMzLpODXp0F1cszKp3H/COtN1P0g8lrZB0p6TBAJI+I2mepCWSbpa0ZUqfLumqtGDdt9L+lZIelPS4pHGSpkl6RNL00gVTnvnpOhfk0sdIeiBd5yFJW6UybkvHh6TyHpK0SNJxjXqSzMzMbPPkioVZFVIrw4eA0gJ3ewM/iIgDgZeB41P6LyJiTEQcDDwCfCpXzN8Bh0fEv6b97YDDgC+Srcz9HeBA4KDconfnRkQL2eJ275U0UtIWwA3AF9J1jgLWloV8LnBPRBwKHAlcImlIl58IMzMz65RsHQs15NFdXLEwa9/gtMDcfOCPwH+l9CciYnHaXgAMT9sjJN0naRlwKllFoeSmiNiQ2/9lRARZZeUvEbEsIlqBFbnyTpK0EFiUyjoA2Bd4LiLmAUTEXyPijbK4xwPnpNhnA4OAYeU3J2lyahGZ/8ILK6t+UszMzMzKeVYos/atjYhR+YQ0NeS6XNIGYHDang58NCKWSJoEjMvlW1NWdqmM1rLyWoH+kvYEzgbGRMSq1EVqUJVxCzg+Ih5rL1NETAWmAoxubql+rlkzMzMrrI9PCuUWC7Ma2wp4TtIAshaLrtiarDLyiqRdyLpiATwG7CppDEAaX1H+JcEs4CylWpCkQ7oYi5mZmVm73GJhVlv/AcwFVqaf1a+SVSa1eiwCHgWeBu5P6a9LmgB8Lw0aX0s2ziLvIuC7wFJJTcATwEc6G4uZmZnVQB9vsnDFwqwdETG0QtqTwIjc/qW57SuBKyucM6mt/QrlTaq0XXb+POBdZcmz04OIWAucUelcMzMzs3pwxcLMzMzMrAHUx5ssXLEwMyBrne3XVJ8/eK2txcaFN9Upjs3RgP71G0q3bv2GjjPlDBzQr1D+Z1eVz6Lctt22G9xxppyir8m37+zZmjd3Y/bcrlD+oq+xS/9+/0L5R503q+q886YcXajsev7dgGJ/O4r+3bD/n70zj9tsrv//82WMZhiGIiFSiCSmmSEkqaS0UFGSFkuhRduXH1JopdIiWpAlQpKS7EsNsmRmGGNpt6SUpSxjN+P1++PzOXOf+7rPOdd17rk3vJ/343rc1znX+3zO55zrnHN93p/3NrpE8HYQBEEQBEEQBItMKBZBUELSAklzJN0o6XRJS0paXdKNNfJfkrRlfj9D0vQR6OPBkvau+ezK4d5/EARBEASDQxqZ12gRikUQ9OdR21Nsrwc8AezZJGz7QNsXj0zXumN709HuQxAEQRAEz05CsQiCei4H1szvx0k6RtJNki7MaV6RdIKk7Ts3lLSjpBuy5ePred24LH9j/uwzef0USVdLmivpV5KWy+tnSDq8ZEHZqLSLdfPnt0j6ZGm/D5Xe7yNpZm73i0N+doIgCIIgaIVG6DVahGIRBBXkgnNbAzfkVWsB37f9cuB+YLuGbVcGvg68HpgCbCjpHfn9KrbXs/0K4Pi8yYnAvrbXz/s7qNTckrny98eA40rr1wHeBGwEHJQL8pX7sFXu80Z5v9Mkbd7uLARBEARBEPROKBZB0J+JkuYAs4B/AMfm9bfanpPfzwZWb2hjQ2CG7XtszwdOBjYHbgFeIukISW8GHpQ0GVjW9qV5259k2YJTAWxfBiwjadm8/hzbj9u+F7gbWLGjD1vl13XAtSRFZK3OjkraXdIsSbPuufeehkMKgiAIgmCReYabLCLdbBD059FsIViIUhTU46VVC4B2uS0B2/dJ2oBkadgTeA/wmW6b1Sx39qfzXhZwiO2juvTpaOBogGnTprfLjRgEQRAEQVAiLBZBMPRcA7xW0vKSxgE7ApdKWh5YzPYZwOeBqbYfAO6T9Jq87QeAS0tt7QAgaTPggSzfCxcAu0qalLdfRdLzF/nIgiAIgiAYFMmYMDJ/o0VYLIJgiLH9b0n7Ab8jPUfOsf3rbK04XlKh0O+f/38I+JGkJUnuUruUmntM0nXAeGDXFn24UNLLgKuyxeUh4P0kt6kgCIIgCIIhJxSLIChhe1LFutuA9UrLh5Xe71x6v0Xp/ank+IjSuuuBqRXtzwE2runST21/ukP+4I7lct8mld4fDhxe024QBEEQBCPJKNeYGAlCsQiCAEjBGwue6j3MYtxivT8dF2shC2APb7iHWj7Z2/SnbdtPZ54zftywtr/ycq1DmXqm9dc0jJdk2+u97TU2f8FTreQXH9fOS/rZcn+07Xqb5ynAYi3dV+Z85U09yy63w7HdhUrcd9pureTbMtzPjmD0CMUiCMYoZQtIEARBEARPf56+qnVvRPB2EFQg6eWSthntfgRBEARBEDxdGDbFQtKCUsXg0yUtKWl1STcOQdt7SvrgUPRzkPsvjm3lvLxrrqQ8Nx/vtg3brizpFyPX28FRV1F6EO1cORT9WcQ+rC7pfaXlnSUd2SC/GnAAMGOI9r+spI+VlkfsGpC0Rr5WH+ouHQRBEATBsPIMr2MxnBaLR21PyYGlT5Dy9g8Jtn9k+8Sham8QFMd2p6QXkgahm+XKyRsDc+s2tH2n7UUesD9dsL3paPeBVMzufd2ECmz/w/b7bD84RPtfllQ5u2h/xK4B23/vrMsRBEEQBEEwHIyUK9TlwJr5/ThJx0i6SdKFkibmWdVrC2FJaxXLkg6VdHO2BhyW1x0sae/8fpqk6/Prm4VFpHNWWtLZkrbI77eSdJWka7M1ZUAmoBY8H5hHSueJ7Yds35r3s6aki3Pfrs3HuXqpj+Nyn2fm49sjr99C0gxJv5D0J0knK0e8SdpQ0pW5zWskLd3QzkqSLitZjl7T2XlJt0n6Rra4XCNpzdLHm+d93VJYLySdKOkdpe1PlrRtdh26Ju9rrqS18ucPlWT3zfu5XtKhed0USVfnbX4labmqkyzpoXyMN+VzulE+R7couyzlc3t5PtfXSiqUmkOB1+S+FQXpVpZ0vqS/SvpGaT+V10Y+T4fkNmZJmirpAkl/l7Rnlpkk6ZK87Q3qs1wdChSWg292XAMTJf1M0h/z8f9B0vSKc7e9pBPy+xUknZG/75mSXp3XvzbvY46k6yQtXXUugyAIgiAYDUaqisXomSyGXbGQtDiwNXBDXrUW8H3bLwfuB7az/XfgAUnFzOoupHz/zwPeCbw8WwO+UrGL44G9bG/QY3+WJxUn29L2VGAW8NnBHR0A1wN3AbdKOl7S20ufnUw61g2ATYF/d2y7G6no2YbAhsBHJL04f/ZK4NPAusBLgFdLWgI4DfhUbnNL4NGGdt4HXJBnrDcA5tQcwwO2XwEcCXy3tH4lYDPgbaTBMcCxwM4Akibn4zqHZJE6PO9rOvDP8g4kbQ1sC7wq970YzJ8I7Ju/3xuAg2r6uBTw23zdzCNdC28kXR9fyjJ3A2/M3+sOwPfy+v2Ay7OV6Tt53ZQs8wpgB0mr9nBt/CMf3+XACcD2JAvVF/PnjwHvzNu+DvhWVgj3A/6e979Px3F9FHjE9svysU+rOf4yhwPfyd/3dsCP8/q9gY/nPr6GdG00Imn3rCjNuvfee3rYdRAEQRAEzwQkvVnSnyX9Tan+Vufnn1Xf5P4lkl7Urc3hzAo1UVIxkL2cNCBdGbg15+0HmE1yU4E0ONpF0mdJA76NgAdIg7VjJZ0NnF3egaRlgWVtX5ZXnURSYprYmDRYvyKN+VgCuGowBwhge4GkN5MG9G8AviNpGvAtYBXbv8pyj+U+lzffClhffbEMk0mK1xPANbb/mbeZQzpPDwD/tj0zt/lg/ryunZnAcZLGA2eWznsnp5b+f6e0/kzbTwE3S1ox7/NSST+QtAJpUHuG7fmSrgIOUHIN+6Xtv3bsY0vgeNuP5Hb+lxWTZW0XlaZ/Apxe08cngPPz+xuAx20/KekG+q6h8cCRWUFdALy0pi2AS4oq1pJuBl5EcllqujbOKu1/ku15wDxJj+dr8WHga5I2B54CVgFWbOgDwOZkBcj2XEm1bnQltgTWLV1Ly2TLyhXAtyWdTPoO/lnXQIHto4GjAaZOmz68OV6DIAiCIBgTSBoHfJ80SftPYKaks2zfXBK7Dphu+xFJHyVNCu/Q1O5wKhaPdvp254HQ46VVC4AiUfkZpBnb3wKzbf83b7MRacC+PfAJ4PU97n8+/S0yE4puABfZ3rFuQ0mrAr/Jiz+y/aOmHTkl8b4GuEbSRSQryrd66KNI1pYLOva/BQPPU9N3VdlObmtz4K3ACZK+XROb4pr35T6UNaITSVWc30uuEm37FEl/yPs6V9Ietn/b0Of6g0kX++y8eJbtA4En3Zcs/amib7afylYxgM+QrEcbkL77xxp2U3V+u10bxTZP0X/7p/L2OwErANOy0nMbfdfdYCh/F+V2FgM2LpTVEodKOgd4C0k5epPtPy3C/oMgCIIgGELGUCmXjYC/2b4FQNLPSJ4lCxUL278ryV9NGvs1MmbSzeZB0gXAD0kDc/Is7GTb55IGjRt0bHM/cL+kzfKqnUof3wZMkbRYVhQ2yuuvJrkVrZn3sZSkfjPbtu/IbitTuikVShl+ytWUpxh8QbIAACAASURBVAC359nsfyrHI0h6jqQlOza/APhotigg6aWSlmrY3Z+BlSRtmOWXzoPqynayyeou28eQLEIDqj5ndij978V6cwLJTYtCs5X0EuAW298Dfg2s37HNRSSL1JJZ/rnZYnCf+mI/PgBcantB6fwf2EN/CiaTLDpP5baKCjzzgF7iDbpeGz3s/+6sVLyOZAXptv/LyIHlktaj/3m7S9LLJC1GcvkquBDYq1goXAglrWH7BttfJ1mr1mnR9yAIgiAInj2sAtxRWv5nXlfHbsB53RodawXyTiYNoC7My0sDv5Y0gTSbXBULsQvJ3cel7SC5hdxK0rz+CFwLYPseSTsDp0p6Tpb9PPCXQfZ5PHCYUurZx4B76MuA9QHgKElfAp4E3k2a3S74McmN59rsi38P8A5qsP2EpB2AIyRNJPnQb9nQzhbAPpKeJAWX16XoXS674DwO1FpySv24S9IfgTNLq98DfCDv6z/A1zq2OT8PgGdJegI4F/gc8CHgR1nhuIVsARkkPwDOUEpFfD7JNQlSlq4Fkq4nKUX31RzXol4bJwO/ye5Zs4A/5Xb/K+kKpYDt80imx4IfkuKJ/ki6TmeXPtuP5P53T26vSDLwSeD7+TtbnKSc7Al8Ois0TwE30cMDIAiCIAiCkWGEM8EuL2lWafno7P7cGknvJ8XPvrarbJ93yeijlOlpsu0vDHL71YGzc4rbYUPSQ7YXJZPUmCG760y3fW+LbZYkxRlMLeIUgqFB0gxgb9uzusm2bLfrNTt12nRfftXMntsct9jwPR6H+7mklrboNv1p23YwOrS9xhY81U5+8XG9OwS07Uvba2z+gqe6C5Vo03d49twfY+maactyOxzbSv6+03Ybpp6MLV79qunMnj1rxC7Kl68/1aecfWl3wSFgyouWmW17et3nkjYBDrb9pry8P4DtQzrktgSOAF5r++5u+x0zFgtJvwLWoPcYitHkwRxQ/Rbbd452Z0aSfIEdS8pKFErFGEfSGqT4pbu6yjK8ykIbxtrgY6z1Z7h44JEnW8nf8d9HWsmvt+rkVvKPPbmglXwb7n7g8e5CJVZbvtOTdeh4fH67gf+E8eO6C5UYzgErPHvuj7bH+fDj81vJT16y3ff01//0Xve0raLQpm2AtV7wjJhrHRnGzu0yE1hLKYvov0hxs/1qfkl6JXAU8OZelAoYQ4qF7Xd2l+raxm3AsFor8n5WHu59jBS2V28pfzF9sQPBEGN7iyFu7++kuJ8gCIIgCAIAckbPT5DidMcBx9m+Kbvvz7J9FvBNkhv26Vmx/oftbZraHTOKRRA8nZG0gOQetjgpVuJDRWrdCtmdSe5nn1AqrveI7RPz+gsLK5ikHwPf7kj9FgRBEATB05TRLF7XSU6OdG7HugNL77ds2+aYyQoVBE9zHs1ZrNYj1dzYs9sGALZ/VEoBvDOp1kvx2YdDqQiCIAiC4OlCKBZBMPRcDqwp6bmSzlSqWHm1pM4UvEg6WNLeSsUNpwMnS5ojaaKkGZKmZ7ldJP1F0jWSjpF0ZF5/gvoKIyLpodL7fSTNzPv/Yue+gyAIgiAYWaSReY0WoVgEwRCS64psTXKL+iJwne31Sal1q4oTAmD7F6SUsjtly8ejpTZXym29GtiMVB28Wz+2IlVf34gUYzFNqVhiEARBEATBsBAxFkEwNEzMmcIgWSyOBf4AbAdg+7eSnidpmUG0/Spghu17ACSdBnQr3LdVfl2XlyeRFI3LykKSdgd2B1h1tdUG0bUgCIIgCHpl7ERYDA+hWATB0PCo7X7Zl0YoBeN8suUxV+heotg9cIjto5o2zsVyjgaYNm362ClqEwRBEATB045whQqC4eNyYCcASVsA99p+sEF+HqnafCd/AF6bLR7jSRXcC24DpuX325AqwUNKH7erpEl5/6tIev4gjyMIgiAIgkVFI/gaJcJiEQTDx8HAcZLmAo8AH+oifwLwI0mPApsUK23/W9LBwFXA/cCc0jbHAL+WdD1wPvBw3uZCSS8DrsqWk4eA9wM9FbgJgiAIgiBoSygWQTAE2B5QdtT2/4B3VKw/gaREYPvg0vozSFWyC7YofXY8cDz01cHI6+8CNi5ts29pm8OBw9seSxAEQRAEw8NYqmMxHIQrVBAEQRAEQRAEi0xYLILgaUbZ4hEEQ8nkJcd3F+onP3mYepKYMH5cz7LzFzzVqu3Vll+ybXeGjTbHGTx9eM7iwzt3u9YLBhjKR63t5d7QrlTSfZcc1Eo+ePoQFosgGINI+rikNUa7H0EQBEEQDA0iCuQFQTBMSFqQq2zfKOl0SUvm9R8H/gd8QdJSXdq4TdLyI9HfIAiCIAiCJkKxCILR49FcZXs94AlgTwDb37d9qu2dbT88ul0MgiAIgmCoeIZnmw3FIgjGCJcDa0paStJxkq6RdJ2kbQEkjZN0WLZuzJW0V2nbvSRdK+kGSetk+cp2giAIgiAIhosI3g6CUUbS4sDWpDoUBwC/tb2rpGWBayRdDHwQWB2YYnu+pOeWmrjX9lRJHwP2Bj5c105YQIIgCIJgFHlmZ5sNi0UQjCITJc0BZgH/AI4FtgL2y+tnABOA1YAtgaNsz4eFNTIKfpn/zyYpHzS00w9Ju0uaJWnWPffeM6QHFwRBEATBs4uwWATB6PGo7SnlFUplsrez/eeO9U3tPJ7/L6Dvnq5spxPbRwNHA0ybNt29dz0IgiAIgrZEgbwgCEaSC0gxEwKQ9Mq8/iJgj+w2RYcrVJt2giAIgiAIhoVQLIJgbPFlYDwwV9JNeRngxyR3qbmSrgfeN8h2giAIgiAYJZ7pdSzCFSoIRgnbA0qb2n4U2KNi/Xzgs/lVXr966f0sYIumdoIgCIIgCIaLUCyCIAiCIAiCYAR4ZkdYhGIRBMEIMH/BU63kFx8XXppPB8bS99q27cefXNBK/jnjx7WSD4Lxiw/vc8zuPd9GlwQgi8x9lxzUSn65bY/ove1f79VdKBgzhGIRBEEQBEEQBCPBM9xkEdOCQTDESFogaU6ukv2bXKCuSf5gSXvn91+StGUX+W0k7TeUfQ6CIAiCIFhUwmIRBEPPwvoUkn4CfBz4ai8b2j6wB5mzgLMWqYdBEARBEIwoIupYBEGwaFwFrAIgaQ1J50uaLelySet0Cks6QdL2+f1bJP0py39P0tl5/c6SjszvV5f0W0lzJV0iabVSO9+TdKWkW4o2gyAIgiAIhotQLIJgmJA0DngDfdaFo4G9bE8D9gZ+0LDtBOAoYOssv0KN6BHAT2yvD5wMfK/02UrAZsDbgEMX4VCCIAiCIAi6Eq5QQTD0TJQ0h2Sp+CNwkaRJwKbA6aXsHM9paGMd4Bbbt+blU4HdK+Q2Ad6V358EfKP02Zm2nwJulrRi1U4k7V60u+pqq3U7riAIgiAIBssoF68bCcJiEQRDTxFj8SKSS+XHSffa/banlF4vG+Z+PF56X/kos3207em2p6+wfJ1RJAiCIAiCoDuhWATBMGH7EeCTwP8BjwC3Sno3gBIbNGz+Z+AlklbPyzvUyF0JvDe/3wm4fBG7HQRBEATBMKEReo0WoVgEwTBi+zpgLrAjaeC/m6TrgZuAbRu2exT4GHC+pNnAPOCBCtG9gF0kzQU+AHxqaI8gCIIgCIKgNyLGIgiGGNuTOpbfXlp8c4X8waX3O5c++p3tdZSCMr4PzMoyJwAn5Pe3A6+vaHPnjuVJnTJBEARBEIwwz/AYi1AsgmDs8hFJHwKWAK4jZYl6WrL4uKe3cfS/Dz3Rs+zzJi0xjD0ZWzzw6PxW8sN5btp8RwB/+c+8VvKbrPm8VvLPJsbS/dH2OhjO/tz/yJOt5Nv2RS2igMfSeQG479d79Sw71voeNBOKRRCMUWx/B/jOaPcjCIIgCIKhQFEgLwiCIAiCIAiCoBuhWARBSyQdIOmmXO16jqRXDWHbn1uEbRdW5A6CIAiCYOwhjcxrtAjFIghaIGkTUiXrqbna9ZbAHUO4i0ErFkEQBEEQBKNJKBZB0I6VgHttPw5g+17bd0q6TdI3JN0g6RpJawJIWkHSGZJm5ter8/pJko7P8nMlbSfpUHLVbkknZ7kzJc3OFpKFlbclvVnStZKul3RJZyfr9hsEQRAEwegwUjUsRjOKI4K3g6AdFwIHSvoLcDFwmu1L82cP2H6FpA8C3yVZNg4HvmP795JWAy4AXgZ8oZAHkLSc7TMkfSJX7S7Y1fb/JE0EZko6gzQhcAywue1bJT23op91++1HVlZ2B1h1tdUW7cwEQRAEQfCsJhSLIGiB7YckTQNeA7wOOE3SfvnjU0v/i2xOWwLrltICLiNpUl5fVMzG9n01u/ykpHfm96sCawErAJfZvjVv+7+K7Sr3a/uhjuM5GjgaYNq06W469iAIgiAIFpFndlKoUCyCoC22FwAzgBmSbgA+VHxUFsv/FwM2tv1YuY1e8o9L2oKkIGxi+xFJM4AJPXazcr9BEARBEATDRcRYBEELJK0taa3SqinA7fn9DqX/V+X3FwILKwFJKtycLgI+Xlq/XH77pKTx+f1k4L6sVKwDbJzXXw1sLunFedsqV6i6/QZBEARBMEpohP5Gi1AsgqAdk4CfSLpZ0lxgXeDg/Nlyed2ngM/kdZ8EpucA7ZuBPfP6r2T5GyVdT3KrguSWNDcHb58PLC7pj8ChJIUC2/eQ4iJ+mbc9raKfdfsNgiAIgiAYFsIVKghaYHs2sGnn+uza9E3b+3bI30ufJaO8/iH6XKjK6/cFym1sXdOP84DzOtadAJzQtN8gCIIgCILhIhSLIAiGnceeXNBKfsL4ccPUk8Gx1HPGVn/GCs+btMSwtt/mumnbl5essFTb7gQ1DPd10IboSzVP52dY2/P4iv3P6y6UueNfD7TtziIzmsXrRoJQLIJgCLC9+mj3IQiCIAiCYDSJGIvgWY+kA3IBurm5ON2rRqEPO0taubT8Y0nr5vef65C9cqT7FwRBEATBovNML5AXikXwrEbSJqRCdlNtr09K73rHMO1LkuruuZ2BhYqF7Q/bvjkv9lMsbA+I8QiCIAiCIBhtQrEInu2sBNxr+3FIQc+275R0m6TlASRNzzUkkHSwpJMkXSXpr5I+UjQkaR9JM7Pl44t53eqS/izpROBGYFVJJ+RsUDdI+oyk7YHpwMnZYjJR0oy830OBiXn9ybnNh/J/Sfpmqa0d8vot8va/kPQnSSerl8IZQRAEQRAMH0oxFiPxGi0ixiJ4tnMhcKCkvwAXA6fZvrTLNuuTakosBVwn6RxgPVJV7I1IVsizJG0O/COv/5Dtq3PV7lVsrwcgaVnb90v6BLC37Vl5PQC295P0CdtVdSjeRaqjsQGwPDBT0mX5s1cCLwfuBK4AXg38vuW5CYIgCIIg6JmwWATPanLa12mkuhD3AKdJ2rnLZr+2/WhO6fo7kjKxVX5dB1wLrENSKABut311fn8L8BJJR0h6M/DgInR/M+BU2wts3wVcCmyYP7vG9j9tPwXMAVavakDS7pJmSZp1z733LEJXgiAIgiDozjM7yiIsFsGzHtsLgBnADEk3kOpLzKdP8Z7QuUnFsoBDbB9V/kDS6sDDpX3dJ2kD4E2konXvAXYdiuPo4PHS+wXU3Ou2jyYV5WPatOmdxxUEQRAEQdAzYbEIntVIWlvSWqVVU4DbgdtIlgyA7To221bSBEnPA7YAZgIXALtKmpTbXUXS8yv2tzywmO0zgM8DU/NH84Cla7r5pKTxFesvB3aQNE7SCsDmwDVNxxsEQRAEweggIsYiCJ7pTAKOkLQsyUrxN5Jb1MuAYyV9mWTNKDOX5AK1PPBl23cCd0p6GXBVjo94CHg/yVpQZhXg+FJ2qP3z/xOAH0l6FNikY5ujgbmSrrW9U2n9r7Ls9SSryf+z/R9J67Q7BUEQBEEQBItOKBbBsxrbs4Gq9K2XAy+t2Wyu7Q9WtHU4cHiF/Holmevps1KUtz0DOKO0aovSZ/sC+5aWJ+X/BvbJr3JbMygpQ7Y/UXMcQRAEQRCMIM/0FI3hChUEQRAEQRAEwSITFosgaIHtg0e7D8PFtdfOvnfieN1e8dHywL0tmmojP5xtD7f8WOpLW/mx1Je28mOpL23lx1Jf2sqPpb60lY++DI38WOpLW/k62Re12N+Q8EyvKhWKRRAEANheoWq9pFm2p/faThv54Wx7uOXHUl/ayo+lvrSVH0t9aSs/lvrSVn4s9aWtfPRlaOTHUl/ayrdtOxg8oVgEQRAEQRAEwQigZ3iURcRYBEEQBEEQBEGwyIRiEQRBN44eRvnhbHu45cdSX9rKj6W+tJUfS31pKz+W+tJWfiz1pa189GVo5MdSX9rKt207GCRKGSuDIAiCIAiCIBguNnjlNF9w6dUjsq+VJi8xezTiSsJiEQRBEARBEATBIhPB20EQBEEQBEEwAjyzQ7fDYhEEQdAKSUuOdh/GIpI2k7RLfr+CpBd3kX+RpC3z+4mSlh6Jfi4qSrxf0oF5eTVJG41if56W5/HpTtvrPQieLYRiEQRBPyStKOlYSefl5XUl7dZlm9aDm14G6JKWlPQFScfk5bUkva1B/lOSlsmDv2MlXStpq2776QVJm0q6GfhTXt5A0g+Gou3c3hqSnpPfbyHpk5KW7bJN28H8RElrt+hTT/KSDgL2BfbPq8YDP22Q/wjwC+CovOqFwJkN8m2vg+Ec/P8A2ATYMS/PA75f048Dq15Njbc51rbnsWL75SSt36t8D+29VNIlkm7My+tL+vxQtT+I/vR8f0h6d/HckvR5Sb+UNLVGtu31Pph7u+292niskh7s8pon6S+DaXtR+96WXn47SrKbSnqfpA8Wr+HqV2/9GbnXaBGKRRAEnZwAXACsnJf/Any6TriXwY2k8aX3bQboxwOPkwZyAP8CvtLQ911tPwhsBSwHfAA4tEG+jTLyHeBNwH8BbF8PbN6l7RUkfU7S0ZKOK1414mcACyStScpgsipwSkPbbQc3bwfmAOfn5SmSzhoi+XcC2wAPA9i+E2hSLj8OvBp4MMv/FXh+g3zb66DnwT+0VqZfZfvjwGO57/cBS9TIPlx6LQC2BlZv6De0O9a25xFJM/L1/lzgWuAYSd+ukW07yXAM6Xp8MvdnLvDehr60UkTayLe9P4Av2J4naTNgS+BY4Ic1sm2v97b3dtt7tZdj/bvtZRpeSxfHM4i2F6Xv75L0V0kPlBScB2tkW03uSDoJOAzYDNgwv6JI3jATikUQBJ0sb/vnwFMAtueTBkV19DK42T3/YEO7Afoatr9B30DlEZpdVIvP3gKcZPumLvLQQhmxfUfHqqbzAvBrYDJwMXBO6VXFU/lcvxM4wvY+wEoNbbcd3BwMbATcn+XnAE0zj23kn3BKMWgASUs1tAvwuO0nigVJixfb1tD2Omgz+Id2yvSTksbRd6wrkO+VTmx/q/T6KrAF8JKGfkC7Y217HgEm5+v9XcCJtl9FGkhXcQItJhmAJW1f07FufoN8K0WkpXzb+6O4l98KHG37HOqvmbbXe9t7+2Da3au9HOt2XfpYJzPcz5lvANvYnlwoOLaXqZFtO7kzHXi17Y/Z3iu/PtkgPyJohP5Gi1AsgiDo5GFJz6PvR3Nj4IEG+V4GNz8C3lMstBigPyFpYqkva5Bmc+uYLelCkmJxQXZtqBz0lehVGblD0qaAJY2XtDfwxy5tL2l7X9s/t31G8aqRfVLSjsCHgLPzuvE1stB+cPOk7c7vsWkQ2kb+55KOApbNFqyLSYPAOi6V9DlgoqQ3AqcDv2mQb3sd9Dz4z7RRpr8H/Ap4vqSvAr8HvtbQdpklSRa9Jtoca9vzCLC4pJVI9+PZXWTbTjLcm/tb9H174N8N8m0VkTbybe+Pf+VreAfgXCXXpboxUtvrve293fZe7Xqstm8pPpO0WH7/UknbFBblQqZt24vY97tsd3uO9jXUbnLnRuAFvbYdDA2RFSoIgk4+C5wFrCHpCmAFYPsG+c7BzcfoGNzYXgAUM0X9BujAp6gfoB9EMqmvKulkkmVk5ypBSQIOzP29xfYjWUHapcvxFsrIi4H9G5SRPYHDgVVI7ikXkqw1TZwt6S22z+0iR+7nnsBXbd+q5Md8UoN85+BmV5oHNzdJeh8wTtJapO/jyqGQt31Y/u4fBNYGDrR9UUPb+wG7ATcAewDnAj9ukO/5Osh0Dv63B5p8/XtWpm2fLGk28AaSAvqOuoGRpBvoG1SNI12bX2roB7Q71rbnkbz/C4ArbM+U9BLgrzWybScZPk5y9VlH0r+AW4H3N8i3VUTayLe9P94DvBk4zPb9Wfnap0pwENd723u77b3a5lgvA14jaTnSM2wmSZnaaQjaHkzfZ0k6jeQ+u1CBtv3LCtk2vx0AywM3S7qmo+1tGrYZfp7haaGiQF4QBAPIVoe1SY/AP9t+skF2MdLgZqssfwHwY9c8XCQtTxqgb5nlLwQ+aft/Fe1uD1wCbJxlr7Z9b0NfbrD9il6Ps7SfKSRl5P48kFolu1ksEpLmAUsBT5DdNwDXmfrzTPVqtv/cY/tvpHTemwY3SgGPB9D/e/qy7cd6kCfLf6VKPg+U/l18lo9jRdu39XIcTQzmOsjbrUPf4P+SpllRpSDdI4D1SLOcKwDbl68BpZiEWjqv37zNi0qL80mzs00z8sV2z6PFsQ4XvZyXmu2WAhazPa+L3EtIisimwH1kRaTuuhmEfM/3R5bfDFjL9vHZyjXJ9q1N23QjW85OtF03cK/aptW9mrfp6VglXWt7qqS9gIm2vyFpju0pi9r2YPou6fiK1ba9a4Vs1W/Hp2z/t6bt11att31pXf+HmylTp/miy/4wIvt6/tLjR6VAXigWQRD0QzVZM2yfOETtv9r2Fd3W5fWz2jwYJf0EONL2zBbbXGL7DXXrJP2//ON7BBUm/aHy2VUKejwMWML2iyVNAb5UN7uWB2+P2V6glIFlbeC8JiVwuJA0C9i0cImTtARpRnzDGvlbqT6XlfEHvV4Hkpax/WCdElAz+B9HmlU9ggZlutTn8nxjseyGvpcHq8sDS3cbrEpaBXgRJa8C25dVyJUtIgUPALNISuCAAZekl5KCkle0vZ5SVqhtbH+lQ66n89KxzbLAB0kB6uW+N94jvSoig5Xvsc2DSD75a9t+qaSVgdNtv7ok83vbm+UJg/J5L66BugmD3wOvL7uMjhaSriNZlb8D7Gb7psFMyDxdkLQiKWgb4Brbd49mf6ZMneaLR0ixWGGUFItwhQqCoJPyYHACadb3WqCfYiHp57bfUzO4wXZdGssjgM40jlXrAC5WimU4jVLGkqoBYuZVwPsl3Zblix/8AX2RNIHk8758dgsoBozLkNydCoqZ7lk1+2xE0jb0BRjOsF3n134wKehxBqnTc/IMbR1ll4bzc/8GuDRI+q7tT0v6DdXfU53ichHwbtv35+XlgJ/ZflOF+OLlQZPtJ7JyUUf5x24C8G6gySLQ63VwCvA2YDalQX/p/4DzmRWzHW1/B7iprgO2W9cpKA9WSdmeliBl1Hl1wzZfJ32PN9HnkmfS993JeSQf8yLD0HtJ1/R/SIHXb6/Y5hiSi89RkAKgJZ1CR+apXs9LB+cCV5Ncs2pjWiS93/ZPJX22Y32x728vinz+rDz4X4IU0/Bw3eCfFKT8StKzDtt3qiNttu3N8v+2tUJuAa5Qyo5Uvn47j7PVvTpIRefTpAD4X2Wl4iXA7+o63vY8ZsV1bwYql6+vkX8h6flf3BOXk6wQ/yzJVE7qlNquVFwlvQf4JumZKuAISfvY/kVdW8GiE4pFEAT9sL1XeTnPQv6sQvRT+X9tPYGOdjYhuTCs0DFAWIbkf17FDvl/OZahcoCYqRr01rEH6Ud2ZdJAtFAsHgSOXLgz+zf5/0+Kddk9Z5JTdp1aJB1KUtROzqs+la0z+1eIP2n7AfVPQN4UcCynOJLdgB8WLg0VcoUv92FNfa1g+UKpgJRZSVJdKtN7JG1j+ywASdsCte47FTPp31WKW6ir8dDTdWD7bfl/WyXgCklHMlBxubZ4r5qaBlWyJboOVit4B2nWvCk4vWBL2+V+3VBydamLbVjS9jUd11mde1bX89LBBNufrfmsTBEA3OsAva18v8G/0sFuS3Ivq+MJ25bUNUi5xiI2r8Ga8/f8WozmY2h1rw5G0cluQJdml6UiYLvWojSI83g6KVnHj+meNQ+Swn0KaXIBUkzO8cAbSzKDmtQhuWRtWFgpsnvbxaT06KPGaNaYGAlCsQiCoBsPU5Eu0Pa/8//bIbmh0PxMWQKYlGXKP4QPUhMc3naAaPv2Kj/pGtnD86Dpc7a/3K3tPKu7J+nHciawjKTDbX+zYbO3AFNsP5Xb+AlwHX054cu0DXpUVtZ2IsW4QIWCZnt2/t/Wr/gpSavZ/kfe2YuonzXcEzg5n08Bd5BcYuo6Xh4ML0aa1a+9dgZpLejJnShT+JeXA6sNlGdZv9Wwu07Zgp4HqyVuIc0K96JYjJO0kXOmJEkb0ncN1CkLbQKgezkvZU5SCvA9m/7Bsv0sS7YLa8kXa9rpR1v5iu0NnJktSPvViLUJUr6WVIviPtL1vizwH0l3AR8p7rnS/r8IKf7AKX1wXT+L7WYBj5aeG+OA5zQdY5ZZkf7X+z8q5DYh1eiYBKwmaQNgD9sfa2o/t9fLeZxvu67+RxUr2C7HWZwgqV9K4/KkTksW63B9+i+RDXXYCcUiCIJ+dJjhFwPWBX7eIL8H8EVSzYBiu6rZ5GKm7IRCGemhL63iPSpcT4piTpWuJ9nd411AV8UCWNfJf38nkgvKfiRLR5NiAWnQUQysJjfI7UWaYXucNIN3Ac1F4Nq6NLyNdJzFYLvRLxz4HPB7SZdm2dcAu1cJ2v47sLGkSXn5oYZ+Q/9B+nzgNkrpiEt9fr3t3+bvqGq/VZljyu5EN9M3a1rnToTt13Xpb08yFXQOVneje9amR4A5ki6h/+C8alb5w8BxxXknFQL83Vgu9gAAIABJREFUcFZgDqlpv+fMTYM45idI98MBNDwLCiR9r2L1A8As27+ukJ9AOocvJ7nQFf2sCvQtXzOF8lob/Ox2mZ4uAn5h+4K8r61INSCOJxVnfFVHX9oO5i8hBSgX99FEUqDyplXCSoHYBwF30d99rsod9bsky+5Z+bivl1RbC6LX81iy4vxG0sdIWdlqlcsS/83WtVPz8o7kOhUV+2jjnglwvqQLSm3vQHLXG0VGt8bESBCKRRAEnZTN8POB28v+rhXsDazn7ll6vmv708CRxQxumU7/4UxP8R4lBuN6comk7YBf5hm5OsYrpTh8BylA/Mmq4+jgEOA6Sb8jDc43p2KmL882npMHcgd0aRPoU9Ty9osB99b5Gme+SyqKdkOX4yzam0yKeyncHj5d9x0r5fzfjuxXXfJ9r0yt2mLA+lrgt1THChioVCzo0Z1INb77pX5W+e4vSUrJvJrt3bN1aW1XxM50DFZfCnze9sVNfSIN+GorFXe0PxN4haTJebmcCrZyMiC7vmypHgKgJVW6ptV9r8D/AWt2exaUmACsQ3KfgXQN3QpsIOl1+XlR5iRS1eU3kawoO1GfbrR8zRTK67ZNncmKRGPmqMzGtj9S2u5CSYfZ3iPfC520GsyTXMoWKue2Hypcl2r4FOkarByQd2L7jg5XuCaXpV7PYzmuCfqn6m1yX92VFGPxnSx3JfUpwldo4Z6J7X3ys72YWDra9q/q5IOhIRSLIAj6MQiXmb+TZlm70drX373HexQMxvVkD9JAcb6kx6ifyT+K9KN6PXBZdg1qjLGwfaqkGfQpSPva/k+F3AJJT0ma7IHFpSoZhGvWHcCN3ZSK3J+nlLJh/ZzuRdQgVRh/gDS46OrCUzeQL+3/2/n/Qfl/t1oknfTqTtTad580Kz2bvtnjf5EGxgvPk/oHvJZHcHvma+zvwAG2L+lsvI3bh1LGm0OA59neVtJ6pKrjxzZsc2DHcrHfKmXh4dL7CaR4qqa6AX+jt2dBwfqkysgLcl9+SAre3YwUAN7JmrbfLWlb2z/J98DlVQ33es1oYODzwo+ot+j9W9K+9D2LdgDuyhMEdVXY2wzmH5Y01TmWRdI04NEG+Ttori/ST1YtakH0eh4H466Yt7udVNm7Fxa0cM8s2j8DqCtKGgwDoVgEQdCPQfzQ7g9cKekPNLhuePC+/mUq4z1KtC3m1HPgo+3vkQqvFdwuqXHmXdKrgTm2z8rm/v+XB/9VrmAPkYJvL6J/oGydFaKta9b/I1UUvpT+39OAWflMm4xcL7T95pp2qphOUraKmfm3A9dQX6gNSW9loAvMlzpkiuwxPbkTeXC++2vY3kGpkjJOAfT9RoxN11QefK5HCuhfr7R+MFnWTgB+QkofCmk2/1SS200dPSsLtvvFlUg6jOSi19T2nGyh6+bGBbAcyT2oGBQvBTw3K9pVSmERHH1/VqL+A1TOWNe4WS2k6FOv938H7yO5Hp1J+q6uyOvGUeHSR/vCbp8GTpd0J+m5+wL6EhgspKSg3wLMkHQO3e/tVoU+ez2PJfmPAyd3uCvtaPsHHXKDSeN9AD24Z2qQaYFHAhHB20EQPPv4LimY8yTSc3AnYCXbdRl7jiK5qzSmmCxQixoGqo73OL1TrtRG24q4xX6WA9ai/6D1sg6ZFYGvASvb3lrSukDhO13HD0luHRuQrCLHkty4qgo3/ZJ6154q2rpmfZWkvEwgBdJ3o01GrislvcJ21SxzFS8EphZuOJIOJrmCVfr6S/oRKY3q60gxCtuTFJFOiuwxs+nRnSi33zXlZYknlAoAFlaxNegt0BpI1ing+jygKtNzljVJX7b9BZJryM+U4pywPV9SYyaeQSgLZZYkfXd1nJlfvfINkiIygz5Xwa9lS2OVy9jR+V79Aun7nUR9JrEJpOfFaXn53aSYm6vKQmpZ+DArhoe7vuDd3yrWtRrMO1VEX4f0DIP6+iGFUvSP/FqCLvd2dlPruVgfPZ7HEh+x/f3S/u7Lkzw/6JBrncbb9vlKiR8a3TM9+LTAwRAQBfKCIOiHpOttb9BtXemz62y/skX7zystLqxhUKW4qH/l1F7iPVoj6cOkQd0LgTmkH62rbL8+Wxkutv0fSeeR3GAOsL2BUnXy69xQWEp9qT8PBP5l+9hiXY38RGBV23/pod+fBPYluWa9FVgN+Knt19TI32h7varPFhVJNwNrkvzjH4f6+iFZ/s/A+s4xENkvfa7ttWvk59pev/R/EqkYYOWx5m2WIPnvmzQwqy1Olq1Ep9Dnrvd+YCfbb6yQfSPwedJg60KSMrKz7Rl17fdKHrRe7IYYlKxE/MP2eXlAvj3w83y9bgx83XZlxeGa9pYDZtpes+KzsvVkHKny9pdsH1kh27XvNftfiVS/hdyPO9ts39Du1cBmzpXOsxJ+ue2NO+SqCh8WuGbCY9gL3mWLzLr0n+xoLFKqlJnPbo6bOZ7qiZ0BAfBZvqfzWJK/gXRvF4r3ONK9/fKmvvdKL5NAJdmTbH+g27qR5JVTp/u3vx+ZAnnPXWrxKJAXBMGY4OHsXvMz0g/QjvR3n+jkPEm7A7+hhywgblfD4C229y2vkPT1inWLYvr+FMkt52rbr8szhV/Ln10CfJvk5rC87Z9L2j8fR9fZYWBeln8/sLlSUPT4KkGlQnqHkdw9Xp5n5g52TQG7QbhmnStpK9sXdulz0Z+eg5SBrXtps8SJwDWSikDKd5Dceuoo/MsfUaqI/F9gpYa+v4VkSfs76Rp4saQ9bJ9Xs0nXlJe53cVI7jvvIimgIlk2eg1WbsS9xdr81HZxP36WNHP/UklXAstTk7q5dAyVykKNeNl6Mh+4qxhgDrLvVWxIcmmBZPGsVSzUkSSgtO+q/i9HqpFTPIcm5XWd/R5MbECvBe8G4+5TZLfbgqRYnEu6v35PTdIKSdNJkx5L5+UHgF3dkfY2U75/J5ASXjQpcz2dxxLnA6cpuaRCimE7v05Y1cUAi+rxR9l+rCRbOQlEffrjfspMngya1tD3YAgIxSIIgk7eRzLbH05//+E6dsz/y7UZmlJMtqlh8EbSrHyZrTvXLaLp+zHbj0lC0nNs/0nS2rm9f0v6aJZ7OFtbipm4jekeMLkD6dztlq0eq9ERAyHpeVnZOpg0c/urvO9r1Vx5uzLugPpB4keBvZV815+ku9LVNUi5wC3qh2T5r2YLUDGg3MX2dXXywNlKgfvfJGX8Ms1pW78NvM7232Chu9I5pFiUKnpKeen+Qe3nNOx/UWiMtSkpFcU1sjnJZUbUu8yU6VlZIN2X/7T9uKQtgO0knehSZp42fe9EAwtIflLSJrY/V9N+myQBhzIwI9vBFX1YJ9/zlVZEVxcD7LXgXWt3n8z2wAYki+guSm6YP22QPw74mO3LAfK9eDwV6WadgpkXIulUktJSR0/nscS+JGWieG5eRPO9egtJuS2nhJ1HyqJ2DFC2LjRNApWPaX9SuuyJkooEGyKlQz66oS8jwjM9xiJcoYIgGFHyD1RBkb7wMNt/Lsl8lBSQugb9fZaXBq6s828ejOk7z5rvQgqYfD2p6NV422/pkJtK8sNfD7iR9GO4ve25jQfcQFY0DrG9k6RrbG8k6XeFO0nh+lOzbWXcge3dquQH0bdZtqer5OqmGpc4leqH2H5ptiqcbvvVHXLLOAWcV/q111m5Otp4DikdZ61SJ2mm7Q1LyyKdmw1r5F9E+m43oS/l5V6276iQPZRUVbyXoPbWSPpQ1XrXZItSCgpenf4z+LUuM3XnvrTt/0qyc0jf6+qkmfNfAy/vvDe69N11/ZE0l/4FJMeRBtN113wrdz5JL6CvpsQfXJGRTdLR2SJXVQPGtutmw3tG0rttn95tXemz4lkwm3R/zwP+aHudGvkB7qhqcLnskFubFN80wBWuJNP1PHbITyRZOv/cJJdlZ3bel8U6STeVXahK6+eQsp893inT0c4htquKkY4ar5w63b+7YmRcoZZbMlyhgiAYRRbBbD+eNDtV5GWfQTJhV86cujcf7FNIs8uH0L/uw7wuA7jWpm/b78xvD86Di8lUmO7z7PBraTE7nK0aRwAvIwVVjgMesl0UynsVqcI2pJnenUh1IF5KKpjXVHl7U/fFHXxR0reomJEf5IwstAtS7rV+yCmkGfMi5/3CbtJs5RpHiiNZnfy7Jakpo9UsSeeSajmYFMczU7nYlwcW1nthp8uZUkavAYoF7YLaW1OnQFQh6SSS8j2H/oUAm3zxq6pGFxWaO4/jqezy9y7gCNtHSKq1LHX2XdKqwHu7HEavBSShhyQBFdd78R2uLGnlzuvd9u75f8+xIfk5UfWMrFNC9mdg0omqdQWzsoXuGNK98hD1wdKQCo8eRZr1N+kanVGcg/Ixq89dtLjn/sNAq3Dr8yhpou1HlVw6v0l63r1Y0hRSXE5dStlJ6p9CdjX6rJ2dMSz/zOflTOAiSfcBAzLsFX0nZdYa8NxreOaNCFEgLwiCZwuDNdv/kBQ3UGT9+EBe9+EqYfVQwyDPRj8g6XDgf+7LHrSMpFfZ7jflU2H6Lp7cbU3fa9uulK9SoCTVKlCZI0kDq9NJM78fJJn4i2MtDyyKytsTSK4hF9BcEbzXuIPPklIyfqviM1Pvn3wQScFaVdLJ5CDlGtme6ofYflv+39av/Tekar89ZR4jncO76Mu+dQ+pevHbobKw3hGkYoDd1g06X3+vKMWyHMLAwN0qxWU6Ke1wG9eDi0jV2s/N+9saeIftPSpkn1RKq/tB+gqlVcYIlfq/AkmR2xFYmezaV0NPBSRLbAbsrBRwXZckYLDXe5uA6b1L7yeQ4j4GuJPlc/sWYBX1T9u6TJV8aZ9F+uAfSTofWKaLZbSwIh7Usf6VdByze3cX7fk8Krlsfp6U3vsgkkvnjLy/OZKa7pn/I6WQXRgPBXwsP0P6Kao1k0BV7o2DvgaCRSdcoYIg6McgzPZts0idQkMNA5dqCuTZ0anFwEkpeHZWnYl/UU3fTe4Dkn5MGlQVP3YfABbYrlSg8jaFO9FCl6Yqt4W8vu15/wJp8PsG4PvkuAOnNKRDglJMSRGkfLXrK2/vTcrU8kbSYHFX4BTbnSlVC/lLbL+h27rSZ7UuYYuCpE1IMSSfJlX+LVgGeGeN29cHq9pqcj9q2affkwZn3yHdG7uQKmRXZU07Hfik7X+3aP8Gd2Qyq1qX169LSpV6lVOxxxcD77H99Q65pUkB7e8jKc6/BHaw3ZSatth2JfoKSF7T5Gaj5LI2AFfXhWmFagKmbTcGw5e2v8b2Rh3rNgCmkOKeyt/fPOB3tu9raG99Brq4tUlHXdduo3tUhSVigksB1FXrJH0M+K/t0yRdbXtj9XehbLx/ldwbCzevP3furyTXytW1l76PNK+cNt2XXlGVKXvomTxxXLhCBUEwJmhrtl8gaQ3bf4eFs1dN2ZLa1DBQeTbWKXi26bl1gFIQ7ottfzm7Yqxku9cneZONesOOgeZvJV3fpb1HlNKezpH0DVJ9kMVqZFudd9uFNeMMSWfTJe4A2vvjk/Luj8vym2f3owGDG/dYP0TSBFJcyPJKaSOL871M3lcd56ldRqsJwG4MLKjXmVJzCZLbxeL0D8J9kPrsSmV/8Akkxe5amt2P2jDR9iWSlAfMB6s+a9rywM2SrqF/RramSsZ3Svo8fcHAO1GTFcj2zUoVplfLy7cCX68QvZs0OfB50mDckt5ZIQdUDm6LFNKVbjal/tyu5Ba3Il3GL5LeDZxve14+3qnAl12fJKDngGn1j1NZjORuOcCNy/b1pJolp+T+9hp3cBwp8Pom+ix0VZa2Qn4ySRktrKmXktyPqp4HPyCdi7mk+299kpX6Mapn869koOWu3zrbP8jKBcBNkt4HjMvWt09S4dIp6fW2f6vsnlhijbrnDANdXcfR7Orate/B0BOKRRAEwODN9sA+wO8k3UL6oXoRaca6jhXp7zv7RF5XxS1K9Rp+mJc/RsoiUu73ZqQZ1QWkmfunSD+OXyb5Jn+f/oPBJt7e8FlbBQqSVWMc8AngMyTf9u06+j/Y8z5AUcg/yHWBsq388dsObrIi0a0Y4R4k68DKJN/xQrF4kOQ2VsfVwK+yxaqXjFYnkapQv4k0W7wTFZWOnarAXyrphGLWO+9jku0HO+XzNnuVl5V8vn/W0Pe2PJ778FdJnyBl46rLsHXwINrfkTQILVyULqMvs1s/JL2dlAK5m7/8/iSXvx8Ap0o6jWaqXFQKal1VJO2V+34X/a/JqtnwL9g+PT8ftiT5/f+IviDkTh7NExfzlepB3E26X6soYoREukdvJSmy5b6W0+6+md7OY8HGttet+ayK40gJJYqq3x8gZYXqHLRDUiI/4hynouT+dXCnZUYpYHsVknvpK+k/CbBkZ6Puq6xduHQ+ToqpugD4SkU/XksqrFr1zO33nFHLLE9t+z6SiObZq2cC4QoVBAEweLN9NmNDqUosgHPxswr5A0g/gOUaBqfZPqRC9vmkWg2vJ/3YXEKqtnp3SWZTUoGy3dVXkK5rJqPS9icBnygGAdnd4rgKV503kH6sywrULrarssn0zCKc90pFwfVB9n+khT++pJu7DW40sG5IP+oG/pL2qnOTqpG/FdgWuKGX/hffv/oK6nUr6nUKyeVnATCTNAA53PY3q+Q7th0P3Oia4n5tkbQhSQlalqQcTwa+YfvqoWi/ZV9mk+69GaX7qTYzU1a230tSVNYiKzDuoeBjj/35Gykb0IBUwBWyxTVwCOm6OUUNxTwl/YA0eH0vye//IWCO7V065BYDNrF9RZf9706KD/tFzXmsdD/Lnx0LfMv2zd2OM8vPsT2l27q8fkAWpZp1HyLFVE2nf9zdPOCETotCthLuSSqUeQNwrOvTGA8K9ejq2tH3mfSN5Sv7PpJMHUFXqGXCFSoIgtGkbLZ3DkjO7iqrNvkCk6wFhWmdvN211Jib3aKGQVYgGrPK2L5S0iN58clsHi9iMlage7Dv74E/KAWVr0KywPxfWSC38wApKPH5efWfG5SnchGyqj6vX3pfnPdfAQ9ny0th5n9OTRPQPnD3RuAFJHesXrhK0rpNgxvnQFBJX87tnkT6Ed+JhgJ2wH8kLd3hpvKVOhcYUkaaG1scaxFQf3+ekf0Pfd9bFes6pcHdiRQMuh9pVnqAYqH+Bb0WI/nk17kJtsb2zPz2IVJ8RS2qzjr2cJVCJ+m7tj+t6oJkde5TT9p+QP0T79feT7ZvIdUV+Fo+7zuS4hWaUpm2qTB9B91rxxT8SylT0huBr+cJkDo3xJ4DprNV40hSYHQtto9WioOC6vPYdC2fSLr//kMPleyBRyVtZvv3AEoZzR6tkZ2rFC9WdoWrOs6fAD+RtJ07al/U8BPSfXc5KT7lZSTrZCXqIYlHxeprypagbC3cwvaZi9j3keUZbrIIxSIIgk4uUkoZuDhpcHW3pCttf6Ys1NbcrP41DG7Lr+Kz57oijaxS2tUfAivaXk8poHEb2/1M67bn5LffI1lCni/pqyS/6c83HaztoyTdBPyOVJ/glS4FkCpVe/0aqSDWi4HdbZ9V2Vgfb+vyeRUXklw2HsrLE/O6TWvke1IUSgPJpWnnj99mcLNNh1Xoh0rxJ1VxAVDtpvJD6t1UbiFl4Tqvo+916WaPzkrx50lJAiYBTUHt47Pl4R3AkbafVM5wVcFhpffzgdtt/7NGtmeUqjjXUvM9NWYd6+Ck/P+wms+r6Mlfvqa/N5JcYg6ok1HLCtP0XQfn0P06eA/ZBcn2/UpB4vtU9KHW317S1Bpl9xJJ2wG/bFJ23RcH1fY8HktyZ+o1C9pHSQPpyaT79H/UZ3DbJct/Ki9fRp+r6UIkvd/2T4HVq5SAinO+bmGByRaXbtPygylmepDthVnG8vd6ECn9bBUvVHJrm0dK3TsV2M89xmoFgyMUiyAIOpmcFYAPAyfaPkipkFUnbyL9eL2QVOm4YB7JpaCTwdQwOIY0GDgKwPbc7LZS5bOL7ZOz28EbcrvvsD3At76MpA+QBp0fJPlqnytpl2xJgDTr9nLb92RXj5Ppy2hViSuy1EhanpQ5pW4gMsF2oVRg+yFJVQpaW0XhLFIMy+Ud619Ds1LSZnDzcJ7t/1nu246UisdVULhuvRU42vY5kiq/08yt+bVEftWSXVUezFa2y+itvsRRJEX3euAyJXe4yhgL4C22++X9l/T1znWDYBPSjPypwB/ocV7T9t8kjcuWruOVMqkNcBexPTu/nWL78PJnkj5FCvjtpOwvfyrdUyC3pW2F6X/kV9frgBTYPgtAqTYCpLibTop4jwkk5ex6+gc1b1KxzR6klKbzJT1G95iftufxnh4mLxaSJ1Y2yINoXBMflD97jJRx7Dt1MpkiZXRdfE8nC9NuO9U+aRR2KftfC6osTk3j2F1tHy7pTcDzSM+zk0gTNqPGM72ORcRYBEHQj+zGsxXJtH2A7ZlqrgA9bOZm9VVaLcdMVPoOl7ZZjhR0Wc58VFsQSdKZJCvE3Xl5I9Jgd0pe7peCtnO5ps2NgUNJM4dfJv2YLU/6Yfyg7QEF+CRdQar2fG1enkaaPd+kQ+4jNCgKto/tkD8b2N8dRcUkvQL4mu3KgHVJV3Xuu+F4VwcOJ9W6MHAFKRbmthr5s0lByW8kzSI+Sko1WhsL0wblNL+L2MbirvARr/r+m+6PFvsbRzofO5IGtecAp9q+qWGby0gWnx+T3L3+TYo3aoopqup/bezBcKKWFaZL202CpHw3yBTuiCIpDS8muS/WVWn+JWlGvDGoeSRQivdYllS/pTxpUJcV6lOk+K/amXlJP7f9HtW4aQ7B9buAvskEkSyuj9BF6erVKp1ljwPuJyXkgFSk8rm2d65pu4ixOpwU3/Kr0brWC6ZOm+7LrpzZXXAIWHrCYhFjEQTBmOBLpBm132el4iXkGhNV2D5D0lsZmNrzS1XyalfD4F6lis9FzMT2NMyyK/n670xyWyp+PBsLItl+R8fyNVm5KHih+mdr6rfs6mDpI0lWm8mkzCdb275a0jqkGcsBigXJMnK6pDtJP8YvoK/Kc5ltqVYU/kdy2Tq2Q37FTtnc7xuyQlDHddk61HVwkxWIbRva6qQnN5UCSdNJM74vor/CWDcYuliptsZplCwnne52hbtHlatH5tsl2Y+SspK9pMOCtzRJkVokssXhfOB8pXiAHUluP1+0XZcxq2vWsVL/dyTVmXhxh9vV0vRVvi5kB+OW1bm/OjeiMq0qTOfB/knAc/PyvSRFfYDy5YG1OqaSvr861i7fJ7ZvlPSyjjZaVbFfhPM4kXTPbVUWpyYjG73NzBeuTz25aXY88wbQ+dyzPa6XditoY5Xei2RdPo10Pi4iKRd1zJZ0IUmp3F+p3kovrmXBIhCKRRAE/XAqyHZ6afkWagYrAJJ+RIqpeB1p5nR7KvxrNbgaBh8npRNcR9K/SO4wOzV0/z3AGrafaJDp7NdxNR8VKXM7B7yzOwUrWLyYLZT0JeeMPnlQUrlBVuLWoZRdy9VVvdsqCss29HNil896GtxIOp7qWdDKtMO2H5F0N6mS8l9JsQq1yivJ/Wwfevc5LxSy8qCjyt2ucPfoxd/7FFJg9yH0rw49r1NhGSxZoXgrSalYnb6YoUpKLnePAt1cS64kKeXL0z/d6zwGBu8Oyi2rgx/TUC9A6UY4xPb99F5h+mjgs86Z2CRtQRqY1sUhLcT2tZLqYnigt6DmthWdW5/HbLn6r+29u8mWN8v/30JyX71JHQ8a9xVR/FiVKx/Q6crXy3NuKFgyT+aU11Vmk7L9MLCfpKXy+1ry8R8IrADckp85z6NLQoSRoIuX2NOeUCyCIOhH20EisGk2N8+1/UVJ3yINwDoZTA0D295S0lKk6sPzlKr/1nEjaSB9d4NMJ+eU3k8A3kmpYJhThpG2lAe/ndlZmvxP16YvQ85UVdelaKsozJL0EdvHlFcqxdDUDh7ckWazC2eX3g84h50oBVxOJx3v8aSK5j8luVJV0dbnvOkaKcsVs6Rd/b2dMtE8QK75oJQKeQIwSdIk2//otX9VSDoRWI8UxPxFp+DnOtmmwXelJScrIbdTHTPQyQvoc8t6Hz24ZVV1s0sfLelc4BV5+bYe2lzKpfTOtmfkZ8PAnfe3Qi1GUnJqr0l6CGq2vXv+/7oe+gqDOI+2FyhldWpDm5n5NzJQidi6c13nc68X97NB0rNVWim1+I9JcR+rKaXq3sN9Gb0WUlxfZcuVU5rirqmKg0UjYiyCIOjH/2/vvsNkq6r0j3/fCwiXKCoYUDKIiETJBkTRMYGYkGBkxJzjGAbEMeHMqAPqqCiCCURAQRAQkCAKwsVLUvmJoAOYQJQo+f39sU/drqpbVV3VXd2nuvr9PM99us+pc6rXra7uPvvstddSqXbSsOQisUvKD5IutL29pAsoDZn+Blxpu2OJSQ3Qw6BLPvgi2x27rVYpMz+gDDD67UTc/hwLKGlgk94F7fEcjXzj5lxjqu0VbC/X4ZyOFXLac7wlfQc4q8tAYTfbe7Xtfzjlrvc9TAwknkhZ/LqnmypgtZ23HiX1YF1a04/6SYPp+RpKWkwp13mJJ9bO9FrH83TKxdmZ9Jdz3qkx2C2UfgbNPVAGSveoznk+JUXqUZQB7DqUdQEdc/f7JekBJtK2lipu0JyjXr1+psyinETb4NUdigc0ndt3idrq+EZa1qcpA55ejQybz3uB28qAdjjmSMo6or6SzlVKMl/CRIWr/YBtbC/V5bv6eWq4j7I4/ziXxcvdnn8h/XfHHqiL/SCvo6QvUmZxj6U1la/b+30BpRfONVVq4UOBtZpnf5pT+Sipog2rAOfb3q/Lczennwm4kS7pZ1Ohkmr7Zcqs09+pZqU7vYclXUiZET/R/fVVGej9NRu23uaJ/unPZyeclZbPGouIGAFuW4hdXcj+tMcpP6zypD9N+aNvyl2lbs9/6GR/lKuUoMc43fsQAAAgAElEQVQDq7VdJK5K0zqODo4EPkX/KTOdbETvngeTmmK+cb8Vct5O6UK9Lx0GCh1i+Quwk6SnUe6IA5xs+6xJ4vk+Zb3GSQz+Wk72Gt5T3VFs3KXseNe5yauBTSgzG5N2Aad0Qd6RUkIYyoBtEWV9wcG2GxemzTM2H6E0dJvMfwA7AGe4NGB7GuUCd1psd+2x0OHYLaufkb0pg4tfVR9P9+RNyfoqUTtoWlaHGHsOKirbA/tK+gMTA3F3G2BS0hM/wsT3/TwmUhbbv/6ks1Bq7YmwO+V32KTdsTVAF/spvo4rUG7QNKdW9ep6/4CkRwP7VClF59g+qe2wqabyTTn9rB8uqbZLZqUpN2FeRpld63T8dW1pU/d3Oq6yPbCfpN/T3/srhiADi4iYTM+LRE/Uaj9OpdrPCo0/1p30+Uf5sZRFhg8GmqsW3Qa8tkesd9rueRe6QzyN7tGNsrd/ZulUgdnwz+oC4T6VspF/pSzGbTHVgUJ1YTBIl/C7+n0ttXQH7slew++qNC97sEqVq9dQLla62daDdbZeFnhc9Vo1Zm2OolxonEt1x7s53UPS2/tMe7vX9t8kLZC0wPZPJH12gNiGwvZvKAOhAyXtRfn/fYoOTf06nNuzRO0gaVnT9KxBDnYpIdxx5rRBgy2Y3kvSzba/R3kttwPOro5b3CPtsq/mlFN9HQdMQ0TSJ4FtKWuRAN4qaUfbS8p+TyOVr+/0swFjXpWyBmotyizzGdX2uyhrW77V4bTrqptSVuk78zZKl/puBnp/zZqssYiI+WQKF4lLpQV0WRvQMOkfZds/AH5Q/XHsWiWmg/MkfYLSu6E5ZaZrdRpX3aMH0SlFawgGqpAzhYHCoD5XpZOcTo/XUuX24eN7XJgsxfZ/StqNsr7mscC/2/5xj1N+pkm6gLd5TGNQUflrte9mSZ0WxEPvtS/N/lHlm58LfEtlEXrPhaQzQdJalDu7e1JSSN5BfzMKd0p6ELBY0iGUfPb22ZL9KP+nt1EuUpd8WXqUDh2U7T+oNEncyPYRKh3ul+qbMOBgoe8F0556d+x+u9hP6XVUKXSxP0tX2uu2zu05lP4kD1TnHwn8kg79hLql8lVfq5NrqteoOf3smi7HDuIblPftzyk3iz5IeV329ETD03avp5S1XouyXuY0elSF6vf9FcOVgUVELDGVi8RB0gIqk/5RlvRe24dQpvb3bn+8U+57pVGffIfmw+lRbrb6eqtTZmaa/4if2+uUXs83qOp1H7RCzkx7AqVs5a60ph+1vJZVStPJ1fF9s/3jKme6MRjt2H29sgPlQvhaJu8CDqVM6w+ZqG724mrfSpQ6+NOxB3AX5UJ+X0pJ4Y6llWeKpHMoufHfpaSJNRakPmiS1xH6KFE7SFpWh9iWupCzfW3T45s17tyr/0X8g1RXGmjBtAfojq0Bm1NO43X8BqWZ37Mo76196X1nHsrsbuP7vlqP4wZN5WtOPzM90s8GtL4nOnUfTvl7sHavNTC2b6J3VcAWA7y/ZtUoNciT9C+UwdoywOG2P9n2+PKUv+XbUH7P7OVJCi1kYBERS0zxIrGvtIAmD2PpP8q23dwHofFH9OIB4hikWssSKoue30bpIL6Y8kf35/QejJzc47GBVa/7oBVyZtpLKH/8+ynde4mkbd3/ItzXUS5W7qIMWnp1X4fS82IQb6IUEnhStX0kZeGuKWWRG3E0z86tKKnRsbjrHWW3lrmcSsWwYViHEvfrKOVPGyZ7HQctUTuQPi/k1lbpH/J+ymzLVpS1Wdj+o0pFo3Z9DxY8tX4g0F937P/scf4wbWj7JZL2sH2kSl+H9oaYzT5B6TvzE8p74Cm0rqNo1lcqXzVr8npgQ8qatXe5c/nrqWru1H2/pOt7DSqqmNanXATvQHmf/xx4R7VOo5N+31/zkkpp489TfrauBy6SdGLbzPD+wN9tbyjpZZR0y079lZbIwCIi2g10kUj/aQENBzV9LkrH6Jc1H+Bq4WGfOe/T9TZKfvIFtp+msij2471OsP2hGYhj0Nd9pg1SunfQRbjvBjar7kBOqnEx3JQXPtnxlvRTSiUsU7p6dyqhPJU0uBdS/riuSfl/DjU9qB+2153quZKeR7lgbjQbHGb8k17I2T5FpWoa9LmIf9DBgqawYNr2nZSBxQd7HHYDpY9MS0PEapam399//WhcdP9DpSrTn+m9zu07ks6m/B4DeJ+7VHuj/1S+I6s4zqNUqHscpXDEsGzRNpBfWG33ej9+m3Ih3ChS8TLKILBbf5JBi0TMOMEo9bHYDri6MTCTdDRlRrZ5YLEHE3+zvwccJkm9biRmYBER7Qa9SOxnBmIJ2+dI2opy5/EllPKC/9vpWEkbUy5C16W1glTP1KYB3WX7LklIWt6lid0gC4WHZdDXfaY9GPiNpIuYvHTvoIskf8dECd5JqVTs+S/6zAuX9FLKIuazKa/joZLe47JId7oOAZ5ve7LUlFH1WcpszuUDzDL2q9+BwmnVp30v4u93sKABF0wPuH7jszQtcm9yS/XY8zs8NhVfrtIzP0xZL7YypdlbCy3dAfz66uOjJD3KndeW9ZvKt2lTqtJX6dD0dDo8tcp5K3qiohvANyW1NzBtNmiRiPlmLUqKYcP1LD1IW3KM7fsk3ULp7t71plAGFhHRbtCLxIOaPu84AwFLBgl7V/9uAo4BNEn60rGUQcfh9C4r2Pgay9u+e7J9ba5XWTT9feDHkv5Ol1KHM2zUKpj0U3oVWLJIcmtK6pEpdfG7LpinXJz9rFpj0Txo6bZ25qMMlhf+QUolqb8CVLn+Z1DuuE3XX+bwoALKRcIVMzCogAEv5NznIv4BBwuDLpgeZP3GoF3vp8R2o1z3OfRIa6NzB/AlT0OHdM5GKp9KVab2krTNmlOV7tNo3Gb/kaT3A0dT/n97AadIegiA29YWtb2/NmbyIhEz7pJLFp22cDk9bJa+3AqSmtOJv2z7yzP9RdMgLyKmrcMMxPFua4Kn0gDsPGB/21dX+66x3fUPp3o0w+tyfKeGen1XcJL0VModvFN7rS2o7iY+xjOwuLo93cfT7Og8zVgezkR6xS/c1Fyu7bh/p3zvG3X2XwAca/s/uhz/C0pvlJZ+I91S3yRdbPuJki4FtnIpy3up7S26HH+5mzruqjQQu7R531RJ+hwl9e/79NGsb9RI2pYyUDuH1vj/e5rPK8o6pU2AZ1Iu0E8bxoWcBmgeOIXnXoaJ9Rub02P9hqTf2t6oy/Nc7S5NQacQ0/KUBfXr0jpTO+0iAd3WN7X/HtZEk0+qYxqNPmc99a8ppkYRgMZ7oHm0s9T/oTrnEZSUHwMX9UgRm3ck7QgcZPtZ1fa/Adj+RNMxp1XH/FzSspS0vDWSChURQzeFGYgXUmYyfqJS9ehoutwdbNyBAk6S9EZKykPzRdDNbcc/gjJlu7Aa5DSed1VgxT7+L+2VbNaiDJCajzkb2J3ye3MR8FdJ59t+52TP349B031m2oDpRPsCW7hafKlSV38xpQJNJ8sN+LoNWuL11OoP4neq7b0ozcGGYVXKBdYzm/b1atY3KyQd0OfdyI9RShmvQGkGNxRVCtQp1eCtr8FEv+tVPI0qVZMZcP3GxZJe685d7xcxPD+gpFctoun3XjtJu9o+S507zXcb7Pa1vmmKqUozohoMX2d7vWr7lZSB1+8pF70dq6BV35d/B85i4nfYwba/NiuBj76LgI1U+rXcQPn7vE/bMScCr6QslH8xcNZks52ZsYiIKZnKDER1zEqUPN+9KVP1RwEn2D696ZhrmWha167T3bVXAq+iVKRpnvq9FTiy191kNVWysb2xpEdR7rbv3HbcL6s0nH+lzFYcKOmyYa2BqO7G70pbuo/t/Yfx/FOMZ7f2dKJOswQq1Wj2dCmXS5Vadny3tTCSPk65KDiJHgPGpuNXolQxWsBEXvi3bP+t0/HVOc1Voc6z3XfX6Lmo35k5SVfY3myy46YYw5HAYe6/OtjVjMB6lQ7rN04Evmb7hrbjHk65yXEPHbreD+tueL/fo2rwc6CkIzo8bHfoe1Hd1Hmhy2L1OUHSJcAzXPrQPIVyU+otwJaURpgv7nLeVcBOjd8Tkh4K/MyDNdsca5KeQ1kftAzlPf8xSQcDF9s+UaU62DcoRRluBl7m7lW4ynNmYBER7SStQ7mDf4akhcCytm9rO+YFlDscO1Pu+B1NqYPdrVttp6+zOiWFZi/bTx9C3C+yfdyA5yymqmRje6tq31IDBkmXU+5SHwl80PZFQx5YDJTuM9MGSSeS9H1KytSPKQPC3SiLPa+HpddONKU0NOuWyrAMZUAzUCnhtvfwisAy7e/hcdIY+PZx3CGU1/P0yY6dQgy/oZQn7asAQTXjV29Pgdb1G0e7j+7Yau16f6Un6Xo/hZi+DBzqDus5hvDcW1FKAfe7vql2zb8HJX0euNH2QdX2YttbdjnvZ8AurtJaVRpDnm17p9mJfH5KKlREtFBZdHkA8BBK47tHUxZQt1z42/4+8P2mGYi3A2tK+iJtMxDd2P478OXqX6dY3kS5M924E746sLftL3R5yvNVKpg8yvazJW0K7Gj7qz3C6Lck4cGU2vbnV4OK9YHfTvZ/HMBIdHRu0imd6JQux55Aa4Wes3s98SCDT5ca9w9IWs32Ld2Ok/Ro29dXn7e/h9eiw3t4zPRbkegNwLsl3UO58z6UnHlJorzmgxQ+uFjSMdS7XmXg7tieoa731c0LU67NXi3pGvpoCKnB1mR8iZIa1LK+acQtI2lZ2/dRfoabe7f0uo69GrhQ0g8or+sewGWS3gnTX1cUnWXGIiJaVHfwtwMubLqD33L3use5w56BWOpuVK87s5J+RLkb90HbW6gsNvtlr9glvZvSdXs3SqOp1wDfdtvi85kiaUPg4ZQ1Cc3pPusAJ9seZu523/HYPr8tnegflEHe7yY5f9KF7ZKWo1zgPqXadTbwJXdpwFVdGGxFmRFZMthqvsuq0qF9Ddv/M533cEzdoK/xICk880E1y9aVJ5obtp93KhNrMu5vOn6pqlH9zmyNEkkfBJ5DWcu3NrB1dTNoQ0qqa8dZryrNtSvbQ20QGUUGFhHRQtKFtrdvWlOwLCVNaNb7KVR38DZ39YuqSou5zHa3/gUX2d62+Y9nr6nypvN2Y5JKNiqL1b9IuejeTNLmwO7uUvlogP/jD4F/a097kPQE4OO2h1Ubf8biUYeF7ZSZnY4LtCUdTunK3KgC9XLgftv/2uX4VzZtLqkI47YqUpJeY/trkn5he7uZeA9XefYfZ7BZsZFRzSzsC6xn+6OSHgM80va0+xQMusYiWmnpbtdfre7ST3Ze3+tmNOD6plEhaQfgkcDpniiZuzGwsnuXto5ZllSoiGh3jqQPUCos7Qa8kd71zmfSacAxKrXxAV5HWc/RzR3VAr3GQGQHyp28nqqBxI8lPQzotiD4K8B7KKkE2L5M0rfpXvmoX7NSG38AU4lnNdu3qixsP8rVwvYeX2PbtrUjZ1VrS1pI2gN4tO3PV9u/ANagfH/f1yHGRrWXs2fwPfx1qlmxavv/USqijezAQqXq2c9dKiB9gZICsyul7OztlG7G23Z/hr5tD+wn6ff0WGMh6b22D5F0KK3lY4HRzvdvJ+l5tn84pKdr73a9KSVFazI/k/SEPtdk7F19bG70Z3r3y6id7Qs67Pt/vc5RKSrR6f01zAar0SYDi4ho935gf8ods9dR8uoP73nGzHlPFcMbqu0fTxLLOykVXTaQdD7lIrRbxZAdgE9SKl18lFL54mHAAkmvsN0+gFnR9i/U2ihq0ruJfXhwj8cWDuH5BzWVeJaV9EjgpUxccPdyv6QNGmlV1XqVTg0Q30trs8UHAdtQOhEfQWmg2Enze/gASkrZsN7DD7P9XU3UfL9Ppeb/rFNZlP4uYG3br5W0EaW6WfuF7gOU2bYDgO1tby3pl1DWOVWLWoeh3yaPjSpQF/c8am44GBjWwGKgbtdTWZMxyPqmMfDups9XoKxDGcbv7OghA4uIaGH7Acrd+a4dc2dDlfZ0pe1NKAtvJ2X7EpUmd4+l/HG9qlvePnAY8AFK6dKzgGfbvkDSJpQFy+0Di5skbcDEbMiLgT8N+N/qZLZq489kPI2F7T/tc2H7eyj9TK6hfJ/WAV7d4bgH2b6uafunVcrGzeqwyL5thuMr1SLuNYBtJP3DnXtwDGpKs2Iz5AjK92THavsGymCr5ULX9s8kNcqL3lv9bDXiX4MhLeJ16cDe3hNm5Q7HnVR97NgQcY4ZZkvqQbtdP2/QL1ANRt9JGYwe0GMwOud1WJ92fjXrGTMoaywiAgBJ37X90qa7YC1qWmPxA+AtnoHu081rLyT92vbjmh5baoFjdbH8ZWAn4O+UBnr7dltQOUAcs1Ibf9TiUalk06gnf5XtpRqBqUc3Y0m/s71B277zKXXWr6u2F1NSflYGjvBwCgpsDRxKKTd6BdWsWK/F6jNFEyWKm9cU9SxRLGlfSoWvrSmpNy8GPmS72+zPIPH02xPmxF7PY3v36cYyWyRtN4z1KdVzDdTtut81GZKeRymzertKFa5FwCuqtWIrUno79FyHNhdpotEqlKIYTwQ+5/SxmFGZsYiIhkYu78B3wWbQ6sCV1V2mxh9c295jCM/dfJf2n22PtQysqju8b7T9jOpO+QIPqSeC7b8AO6m1Nv7JHnJt/JmIZ9BceXXpEAxsKKlTmdELu8yevI7OaSIDzXAMqnofPLX618+s2Ey7R6XPTGP2YQN6dGoGsP0tSYsoZTsFvMDDa1C3J1VPmOpr/VHSKh2O2xG4jjIzeCHDves/q4Y1qKiea9Bu1/2uybiGMuu7H7CB7b1Uqqhh+071MTUyRy1i4vfSfZRF67U0HJ1PMrCICABs/6n6+AcASatS/++IDzd9LuDJtObcT8cWkm6tnndh9Xnj66zQfKBLL4UnVZ/PSG8Jz1Bt/KnqM55Bc+UbFaXWpMz8nEl5vZ8G/AxoH1i8g9IrZR+qi1XKGovlgRd0eP7Vmzdsv7lpc40+Y+yqeh/sbfszwJXTfb4hOJCSsvcYSd+iNKt8VR/n/ZbSlX5ZAElrD2lWsN+eMI+glHfeG9gHOBn4ju1ReE3nkr7WZNj+VWNNEFMYjM41krYFrmusJ1GpKvciysDiVzWGNi8kFSoiWlR3gz8C3MXE3R67Q1fkWYpnK8rFx0so6UfHu0uPCUk7A4tt3yFpP0q6x+emm65UPfcXKY3WjqW1l8JsNvMaOdVd/E/ZfvekB0+cczrwysZgtlr4/XXbHRf/StoVaJQY7trpuLq4PrvLDMcutvfudN4gJH2GUir3GFrfB7WUvKzWe+xAGaBdYPumSY5/C2VA8hfKgvmezdcGjGXgnjBVStzewKeBj9g+bLpxzBeSLrG9dbftLufsBnyIMrtxOtVg1PbZMxnrbJJ0CfAM2zdLegpwNPAWYEvgcbY7FvSI4cjAIiJaSPotpS5/zwuUGY5hY8rFxt6UpkjHAO+23bOBlEqJ0y2AzSllQQ8HXmr7qUOIKc28upD0c9s7Tn7kkuPb17QsoAwYHtfjtH6ed00mujgvNcNRpXlNi0oJy3Z2DSUsq/Ue7W4B/tAp174652pKZahuZZWnG9OkPWGq45YHnkv5GV+XUs3ta7ZvmIm4hqlKHdoXWN/2wZLWBh4xzLSoPuMYaE1G03kDDUbnmuZ1RpI+D9xo+6Bqe9K+RjE9dac5RMTo+R3lj1OdfkPJG36e7asBJL2jj/Puq1Ix9qA06vqqpGHl1B5u+/zmHdUMScDiakFuv7M5Z0o6jZJjD2Ux8RnTDcL2XynrQ5pnOIa6XsX204b1XEPwBcqs3GWUi8TNKClaq0l6g+3TO5xzHTNYxaoaSHQcTDRIOooS6ymUWYorZiqeGdLcC+Rg4DbgOIbTC6RvU1iT0bACpQDFssCm1fqmc4cXWe2WkbRsNbh+OqXMckOue2dYZiwiokWVenQEZVFlc2fWWWtaJekFlLUUO1NyyI+mXNj3rMEu6Zzq+FcDT6F0gL60kYc8zZiWSjPoJ/VgPpjKbI6kPSnfI4BzbZ8wI8ENiaT9bH9TUsdu4rb/u4aYjgc+3FiboNIF/GBK/4/jm+/MNsX9eMrC85Np/fmedvySbmMiffJBlJSxOzpUM3qApmIMzQ/R4077qGj83A9SjWtUSPoUZSB/JRMFLOw5VIlrMpI+CDyHMtu9NrB1dcNpQ+BIt1Upi+HKyC0i2n2J0tfhcoZU335Qtr9PWbS7ErAH8HZgzWqdwwld7sRC+YO5D7C/7T9XKQqfnk4sknakLDReo+2iclVgqncMx4rtTj0oJjvnBEpZ27misRC5U5WjumzcvOC5WqS7ie1rOhT6acT9f9W/B1X/oENFr6mwveS1qdKF9qCk3LQft2AYX69GM9YLZBa8gFIOeKwWbDez/TFJZwKPBE73xB30BZS1FjGDMmMRES3UoYfDKJC0OmUB917u0o9A0qdsv2+yfQN+3acCu1DqxTc36rsNOMl2r0Zw80I1Y9Gp3Oy8X38yk1R6EtxMmdGDMrB+GPBySqndpVJzJL3EbT0rOu0bYowj+ftkOjSDvUBmmqQfAS+xfXvdscR4ysAiIlpI+jilLN9JtKZK3FxXTP3qkq502ZAq3rzX9iFt+2bsgmwukfSips0VKP0M/jib6XOzRdKjKQ3yGukU5wFvs319DbEsBN4IPKnadT4l//8uYMVOF48zmdKn1j4ljYZkTx1kYf9cIWkTJnqBnOnh9QKZEZroNbMWpcDFmdSU6hrjLQOLiGgh6doOu+2ays32Q9IbKBdYGwBXNz20CnC+7f2G8DWyxqJPVZWnn9reqccxC4G1bV81e5FNn6QfA98GvlHt2o/SgX23+qKanKRnU/LOX0qpstawKqUfwnZD+BrNa20aDcm+Ui2qHytVKtTDaUop93B6gcyIqpdDN7Z91KwFE2MtA4uImPMkrUZpjvYJ4P1ND9023ZmW2bggGzeSHkupxrRhl8efD/wnpVP2epK2BA6eCwtIO5WrrKuEpaSNKO/5TWlq6tjpJoCkLSh1/A8G/r3poduAn9j++8xGOz5mshfITJP0Ntufm2xfxFRl8XZEtJC0HPAGJir2nA18yfa9tQU1Cdu3SLod2MpDaIbX5o+UztK7A4ua9t9G6Qw97zVVA1L18c9Ar3UtBwHbUd5b2F4sqWfFrxHyN5Xmi41SuXsDM9ITog9HUC5wP0PpXv5qSgrSUmxfClwq6dvD/lmW9HhgA9snVtufAVarHj7MNTUPnEFvoyyAruv7Ph2vBNoHEa/qsC9iSjKwiIh2X6SUifxCtf3yat+/1hZRH2zfL+kqSWsPMyWh+YKM8jtzzqXvzLTmakB9urcaDLY8zRBDmkmvoayx+Awl5p9RLszqsND2mZJUDagPkrSI1hmJdutK6muWYwCfpMycNDwL+DCwYhXLC6bx3KNoRnuBzARJe1Mq5q1X9ZxpWIVSACBiKDKwiIh227bVYz9L0qW1RTOY1YErJf2C1kZtw0ix+Req9B3KH+c5k74zU9S583PD3cD/2b6tw2NXStqH0shqI+CtlAv0ueDR7d/zqlHidTXEcne1nuW3kt4M3ACsPMk5fc9yDOCRtpu/f7faPg5A0uum+dyj6BrgbElD7wUyg34G/IlSNey/mvbfRmmwGDEUWWMRES0kXUIpR/i7ant94HtzYZFyVRp2KbbPGcJzL6J02j27qSnW5cNovjdXSfpJj4eXpTSn+nyHalorAh8EnklJnzoN+Kjtu2Yq1mEZpUX8krYFfg08GPgoJf3oENsX9Dhnke1tmt+7jX3TiOMq24/t8tj/s73xVJ97FEk6sNN+2x+Z7VgiRk1mLCKi3XuAn0i6hnLRtw7lrubIax9ASHoSJQd+2gML5nb6zoyw/bRej0taHvgl0DKwsH0nZWDxwZmLbrhGsVGi7YuqT2+n/5/RqcxyTOaPkra3fWHzTkk7UNYojZW5PICovieHAo+jzL4uQ4fu6BFTlYFFRLSocrY3Ahp3IK+aS11aJW1FySV+CXAtcNyQnnoup+/Uwvbdkl7e2Jb0Wdtvl3QSnRvqjXJa2YMoF+DL0tp9+1ZKg7RZJ2ljyo2AdWgte7prj9PeRln78FbKLMeulAW90/E+4BhJXwcaC7W3qZ53r2k+98iY4+/fhsOAlwHHUvqMvAIYqxmlqFdSoSJiKZJ2Atal9WJlZOucVxdYe1f/bqKUhX237XWG+DXmbPrOqJC0je1FM5myNtMkrdOoPFbd+V/Z9q01xXIppRv8IkrZUwBsL+p60szFsibwZuDx1a4rKWlwf5ntWGbKmLx/L7b9xObGoePYHT3qk4FFRLSQ9A1Ko7nFTFyseJQ7s0p6gNIBeX/bV1f7rhnlpn4xN1XVwV5P+dm4iJIK9Tnbn64hlr7XRrRVAlrKHLnbPhIkrQT80/YD1fYywPJVit9Ik3Qu8AzgcEpZ6D8Br2or2BExZRlYREQLSb+mNH6bM78cJL2AMr2/M3AqcDRwuO1p90bolvbQkAuyVpIOsn3QJMc8j5KG00jhaTQYG/k870YzPEn7AltTGjIums3maJIeUn36VuCvwAm0VidaqnyopBsplau+A1xIec1pOmfk77aPCkkXAM+wfXu1vTJweq9O86NC0jqU98xylD48qwFfaNyQiZiuDCwiooWkY4G32v5T3bEMqrqTuAclJWpX4CjgBNunT+M5O6Y9NOSCrFU/FZIkXQ28ELh8Lg1gASRdSelg/W1K87dzJF06m3d8JV3LREPCdu7SeXsZYDfKz8bmwMnAd2xfOZOxjqNR6r4eMWqyeDsi2j0M+FXVC6L5LujI35m3fQflgu/bklanLOB+HzDlgUWngYOkrcewm/CwdLrYbXcdcMVcG1RUvgT8HrgUOLe6AzyrayymMhNn+37KbN6pVbWuvSm9GD5i+7Bhx9Y6m0IAAB7TSURBVCjpEbb/POznHRF3NP8OkLQN8M+aY+rLXJ4tjLkhMxYR0WIuL0ycLXX1LZgLJC1o5J73OGZbysXNOcydBmNdSVrW9n01fN03Ad+y/Y9qe3Vgb9tf6HL88sBzKYOKdYETga/ZvmEGYhvbn5Hq/Xs0pZSugEcAe9WxaH5Qc3m2MOaGDCwiYimSHg5sW23+wvZf64xn1KSKyvRIOp3Se+FyYMkgZJT7A0jaz/Y323pYLFHHoKhLSk7H96ako4DNgFOAo21fMcOxjfXPiKTlaC3JfW+d8fSramr59MkG/xFTlVSoiGgh6aXAp4GzKXfjDpX0HtvfqzWw0TKyF8BzxKNsb1Z3EANaqfq4Ss+jZtcyktS481yto3hQl2P3A+6g9LF4a1Ojx5lKhfnKkJ9vZFSlp98JrGP7tZI2kvRY2z+sO7Y+vBc4RdJYzBbG6MmMRUS0qGrj79aYpZC0BnDGfC9HKGlnYLHtOyTtR6kI9LlGT4P5TNJ6tq+dbF/TY4dQ3lNTXvsSIOnTlFz5L1W7XgdcZ/td9UU1/iQdQ+kd8grbm1UDjZ/NhcXbc3G2MOaWDCwiooWky20/oWl7AXBp8775SNJlwBaUijpHAF8FXmq7Z9Wo+aBTPn2vHguSbqPMANwDNFJIRnoBqaT/6fV4HX1eqp/N1wFPr3b9mFJm+f7uZ8V0NTWZW5LuNduVwaZK0hVzcLYw5pCkQkVEu1MlnUapdw+wF/CjGuMZFffZtqQ9KB2Fvypp/7qDqpOkTSidlleT9MKmh1YFVuh2nu1RSifqV/PC3I8AB9YVSIPtByR9HTjL9lV1xzOP3CNpIVV/G0kb0JRWNOJOkfTMzBbGTMmMRUQspbpIfFK1eZ7tE+qMZxRUOcmnAq8BnkxpMjWvZ3KqQdYLgN0pFYYabqMsEP5Zj3N3B55SbZ49R/LTgXoXJktazfYt1ee7U9ZDPcj2epK2BA6uuzR01U/mrnGdOZG0G/AhYFNKKeudKd2rz64zrn40zRbeDTQqmY30bGHMLRlYREQLSesBf7J9V7W9EHi47d/XGljNJD0C2Ae4yPZ5ktYGdrF9VM2h1U7SjrZ/PsDxn6RUHftWtWtv4GLb/zYT8Q1bnaVUJR0A3Gz7e5IWURpBnt2UknP5bA92q5SslwH7Ur6vdwPLAzdRGvF9adw6O0t6KLADZfH7BbZvqjmkiJGQgUVEtJB0MbCT7Xuq7QcB59vetveZ469qhraR7TOqBZvL2L6t7rjqIum9tg+RdChVWkizbusOqvUqWzZKXlbVjH5pe/MZDXhI6u7RIOnDtj8q6QLbO7Tl+l82269jNZt3BvADSuPDxvf1IcDTKAPyE2x/czbjGjZJPb/nc61ppqSDbB9UdxwxXrLGIiLaLdsYVADYvqcaXMxrkl4LHAA8BNgAWAv4XyYWzs5Hv64+XjyFcx8M3Fx9vtpwwpk5VQpJY/C0oqRGt+1Z71xs+6PVp1dK2odSdnYj4K1A1/SzGfSMTn0cbN8MHAccV/V9mOv+q/q4AvBESvd1UQo6XAzsWFNcU7U7cFDdQcR4ycAiItrdKGl32yfCkjz6TPPDm4DtgAsBbP9W0pr1hlQv2ydVH48EkLRytX37JKd+Avhl1axLlLUW75/BUKdtRBecvwX4ICX16NvAacB/zHYQjUGFpG/YfnnzY419c6WBXC+2nwYg6Xhga9uXV9ubMTcv0DX5IRGDSSpURLSoKpx8C3hUtet6Sr32scqRHpSkC21v30g7kbQscMlcSd+ZSdWF1TcoszkCbqS8Z67scc4jae3u/ucZD3SMVOljZzQudkdBe4pYFePltjetMayhk3Sl7cdPtm/UNTdXjBiWzFhERAvbvwN2GODu83xxjqQPAAurqjBvBE6qOaZR8WXgnbZ/AiBpF0rn5Z16nLOAMhO2LLCxpI1tnzvTgY4L2/dLeqC5SlRdJP0b0PjZaE4Ru4fy3hg3l0k6HGisGdkXuKzGePomaTXK7MqTq+1zKJXEan0PxfjIjEVEACDps7bfXn3+Ntufa3rs67ZfVVtwI6CqfLM/8EzKRdNplGZk8/6XaKfmYL0ahkn6FKU/ypVMdP913WVS5xpJPwC2ojTGu6Oxv8ZmfYfbfs1sf+3ZJmkF4A1MlEs+F/hio5LeKJN0HHAFcGS16+XAFrZf2P2siP5lYBERQGsaQ4eUhlqr4MRok3QCcAklHQpgP2Ab23t2Of4qYHPbc6Wp2EiS9MpO+xtrXmZbHaVu61KV4V57rjUmlLTY9paT7YuYqqRCRUSDunw+r0m6nA6lVBuyxgIoTQM/AhxPea3Oq/Z1cw2wHHOnW/FIqmsA0cMlkra1fVHdgQxbt8aEwMg0JuzTPyU9yfZPASTtDPyz5phijGRgERENCyStTsl9b3zeGGAsU19YtXte3QGMsmqB7vEDLiK+E1gs6UyaBhd1pPDMZVWJ2U9QOkCv0Nhve/2aQtoe2FfSHyipWY1SvOMw+N5L0s22vwccSKkQdzaA7cVVY9G54A3AkdVaC4C/A6+qL5wYNxlYRETDasAiJgYTzc2e5m3OpO0/1B3DKJviIuITq38xPUdQLnI/Q2lE92rKjYG6PKvGrz2jbH9Z0oerzXtt3yK1TOzOid+RthcDW0hatdq+dZJTIgaSNRYRET20NUZrXEmYGhqjjaqpLCKumi5uXG1eNQ59DmabpEW2t2le29DYNwKxrQTsCext+7l1xzNMkr4KnEnpvfIiSmPC5Wy/vtbA+iDp48Ahtv9Rba8OvMv2h+qNLMZFZiwiInoY0cZoo+b46l9fqnK0RwK/pwzQHiPplSk3O7C7q2pMv5X0ZuAGYOW6gqkGi88F9qHMXhxH6U4/bpobE36HUiHuoz3PGB3Ptv2Bxobtv0t6DpCBRQxFZiwiIvok6UnARraPkPQwYBXb19Yd1yiQtAaA7Rv7OHYRsE+joo6kjYHvjMKd9rlE0rbAr4EHUy5sV6Pcjb5gluN4JrA3pRTzT4BjgENtrzubccTkJF0GbNuoyFZVt7p4rjX3i9GVgUVERB8kHQg8EXis7Y0lPQo41vbONYdWG5Uk8wOBN1Ny+wXcR7moPLjHeZe1L+jttC/mBkkPUCqBvaox0JZ0TY2LyGeEpJ7rguZCVShJ7wOeT1mfA2Vdzom2D6kvqhgnSYWKiK4kHWB7HDvnTsWelHUElwDY/qOk+Z4m9Q5gZ8od0MYF5frAFyW9w/Znupx3cYfOxRfPeLRjYgQvcLcGXgacIeka4GjGs5LcjsB1lPSnC5mDZbltf6qatXh6teujtk+rM6YYL5mxiIiu0hhvgqRf2N6u8ZpUi1N/Pp/vskv6JbCb7Zva9q8BnG57qy7nLQ+8CXhStes84AtpmNcfSTfS4wLX9jl1xAUgaSdKWtSLgEuBE8bl5kRVWnk3yv9vc+BkSgrflbUGFjFCMrCIiK4k/bLbxeF8I+ndwEaUC4tPUBrAfdv2obUGViNJV9jebAqPrQTcZfv+ansZYHnbd85ctONjLlzgVovKnwG8zHavZolzUjU43pvSKO8jtg+rOaSeJP3U9pPaqtxBqtvFkGVgERFdSXq07evrjmNUSNqNskBVwGm2f1xzSLXqNaM1yWMXAM+wfXu1vTJlhmOnmYt2PI3CBa6kdW3/vsfjAtYah98l1ev9XMprvi6lH8vXbN9QZ1wRoyIDi4iIAVUVof7mef4LVNL9NPWtaH4IWMH2cl3OW2x7y8n2RXejdIEr6VjK4v0fUJps3kjpBL4hsAtl5uLAuT4Ql3QUsBlwCnC07StqDmlgkjYArrd9d1X2eXPgqEZfi4jpysAiIqIHSTsAnwRuppT0/AbwMMqF1Ctsn1pjeHOSpPOBt9i+pNreBjjM9o71RjY3jOIFrqRNKYvwdwYeCdxJKYV7CvA923fVGN5QVNWvGgPpOZlOJGkxpbrdupTvzQ+Ax9t+Tp1xxfjIwCIiogdJFwMfoPQI+DKlwdQFkjah5LVnDcqAqv4LRwN/pFyUPYKSi5/KUH0YhwvcqEdT8Yn3UNY5HZq1dDFMKTcbES0krQi8C1jb9mslbUTp3fDDmkOry7K2TweQdHCj+Zjt35TU8RiU7Yuqgdljq11X2b63zpjmEtsL6o4h5qx7Je0NvJLSzwKgY8pixFTkl1NEtDsCuJtSsx3gBuA/6gundg80ff7Ptscy5TsASe9t2nyB7Suqf/dK+nhtgUXMH6+m/G7/mO1rJa1HSe+MGIqkQkVEC0kX235i8/S4pEttb1F3bHVoWqAsYCEldxwmWaAcS2uuFNVeNSo9UyJmnqTnAyfbfmDSgyOmIDMWEdHuHkkLqe7GV1VE5m3jMtvL2F7V9iq2l60+b2xnUDEYdfm803bMMSr2k/Tv1fbakrarO65osRfwW0mHVOmIEUOVgUVEtDsQOBV4jKRvAWcC7+19SkRf3OXzTtsx93yBkmazd7V9G/D5+sKJdrb3A7YCfgd8XdLPJR0gaZWaQ4sxkVSoiFii6pb7YspgYgfKXeQLbN9Ua2AxFpJWNt6aKg4ljXLESXoo8HLg7ZSywBsC/2P70FoDizkvVaEiYgnbD0h6r+3vAifXHU+MF9vL1B1DzKh7JS3DRBrlGrQWP4iaSdqdsoB7Q+AoYDvbf62qAf4KyMAipiUDi4hod4akdwPH0NRV2fbN9YUUEXPA/wAnAGtK+hhl9vND9YYUbV4EfMb2uc07bd8paf+aYooxklSoiGgh6doOu217/VkPJiLmhCqNcgdKh/qnU9LbzrT961oDi4hZlYFFRERETFs6OI8+SS8EPgWsSRn8pVt7DFUGFhHRQtIrOu23fdRsxxIRc4ek/wR+DhzvXFyMJElXA8/PTFLMlAwsIqKFpObFeytQ0housf3imkKKiDlA0m3ASsB9wF3kbvjIkXS+7Z3rjiPGVwYWEdGTpAcDR9v+l7pjiYiIwVUpUABPBR4BfJ+mxqe2j68jrhg/qQoVEZO5A1iv7iAiYjRJ2sT2byRt3elx25fMdkyxlOc3fX4n8MymbQMZWMRQZMYiIlpIOomJLsgLgE2BY22/r76oImJUSfqy7QMk/aTDw7a966wHFRG1yMAiIlpIemrT5n3AH2xfX1c8ERExHJLWBz5HKQ1symL7t9vuVGY8YmBJhYqIds9pn52Q9KnMWETEZCRtRpnlXKGxLxXlRsq3gc8De1bbLwOOBravLaIYK5mxiIgWki6xvXXbvstsb15XTBEx+iQdCOxCGVicAjwb+Gkqyo2OTr/LJV1qe4u6YorxkhmLiABA0huANwLrS7qs6aFVgPPriSoi5pAXA1sAv7T9akkPB75Zc0zR6keS3k+ZpTCwF3CKpIcA2L65zuBi7suMRUQAIGk1YHXgE8D7mx66LX9sImIykn5heztJi4CnAbcBv7a9Sc2hRUVSr7UUtr3+rAUTYykzFhHRsAxwK/Cm9gckPSSDi4iYxMVV35uvAIuA2ymLg2NE2E7p8JhRmbGICGDJnazGLwS1PZw7WRHRN0nrAqvavmySQyNijGRgEREREdMm6UzbT59sX0SMr6RCRUQLSU/ptN/2ubMdS0SMPkkrACsCD5O0OhMznqsCa9UWWETMugwsIqLde5o+XwHYjpIvne65EdHJ64C3A48CLmnafytwWC0RRUeSdgYW275D0n7A1sDnbP+h5tBiTCQVKiJ6kvQY4LO2X1R3LBExuiS9xfahdccR3VWlxLcANge+DhwOvNT2U+uMK8ZHZiwiYjLXA4+rO4iIGE2SdrV9FnCDpBe2P277+BrCis7us21JewCH2f6qpP3rDirGRwYWEdFC0qFMVIdaAGxJa3pDRESzpwJnAc/v8JiBDCxGx22S/g14OfBkSQuA5WqOKcZIUqEiooWkVzZt3gf83nY6b0dEzHGSHgHsA1xk+zxJawO72D6q5tBiTGRgEREtJK0E3GX7/mp7GWB523fWG1lEjCJJ7+z1uO3/nq1YYnKS1gE2sn2GpBWBZWzfVndcMR4W1B1ARIycM4GFTdsLgTNqiiUiRt8qk/yLESHptcD3gC9Vu9YCvl9fRDFussYiItqtYPv2xobt26u7WhERS7H9kbpjiL69iVJC/EIA27+VtGa9IcU4ycAiItrdIWlr25cASNoG+GfNMUXEiJL0XtuHtBV+WML2W2sIKzq72/Y9UulhKGlZOnzPIqYqA4uIaPd24FhJf6R00H0EsFe9IUXECPt19fHiWqOIfpwj6QPAQkm7AW8ETqo5phgjWbwdEUuRtBzw2GrzKtv31hlPRERMX1Vedn/gmZQbR6cBhzsXgzEkGVhEBDDR5KpTgytIk6uI6EzSib0et737bMUSvaXqX8y0pEJFREOaXEXEVOwIXAd8h7IoWPWGEz2cCTwDaBToWAicDuxUW0QxVjJjERFLVNPkL7b93bpjiYi5obrrvRuwN7A5cDLwHdtX1hpYLEXSYttbTrYvYqrSxyIilrD9APDeuuOIiLnD9v22T7X9SmAH4GrgbElvrjm0WNodkrZubKTqXwxbZiwiooWkTwI3AccAdzT22765tqAiYqRJWh54LmXWYl3gROBrtm+oM65oJWlb4Gigpeqf7UW1BhZjIwOLiGgh6doOu217/VkPJiJGnqSjgM2AU4CjbV9Rc0jRQ6r+xUzKwCIiIiKmTNIDTMxuNl9UiHJTYtXZjyq6kbQTZVZpSQEf20fVFlCMlVSFiogWXcrN3gJcbvuvsx1PRIw221mvOUdI+gawAbAYuL/abSADixiKzFhERAtJJ1PKR/6k2rULsAhYDzjY9jdqCi0iIqZB0q+BTdMQL2ZKZiwiot2ywONs/wVA0sMpd7O2B84FMrCIiJibrqAs2P5T3YHEeMrAIiLaPaYxqKj8tdp3s6Qs8ouImLseBvxK0i+Auxs70x09hiUDi4hod7akHwLHVtsvBs6RtBLwj/rCioiIaTqo7gBivGWNRUS0kCTghcCTql3n2/5ejSFFRMSQSFoH2Mj2GZJWBJaxfVvdccV4yMAiInqS9GTgZbbfVHcsERExdZJeCxwAPMT2BpI2Av7X9tNrDi3GRErERcRSJG0l6RBJvwcOBn5Tc0gRETF9bwJ2Bm4FsP1bYM1aI4qxkjUWEQGApI2Bvat/NwHHUGY1n1ZrYBERMSx3276nZLyCpGVpbWoYMS0ZWEREw2+A84Dn2b4aQNI76g0pIiKG6BxJHwAWStoNeCNwUs0xxRhJKlRENLyQUtv8J5K+IunpgGqOKSIihuf9wI3A5cDrgFOAD9UaUYyVLN6OiBZVWdk9KClRu1Ka451g+/RaA4uIiIiRloFFRHQlaXXgJcBeqRoSETE3Sfqu7ZdKupwOaypsb15DWDGGMrCIiIiIGGOSHmn7T1UPi6XY/sNsxxTjKYu3IyIiIsaY7T9Vny4A/mT7LgBJC4GH1xZYjJ0s3o6IiIiYH44FHmjavr/aFzEUGVhEREREzA/L2r6nsVF9/qAa44kxk4FFRERExPxwo6TdGxuS9qA0RI0YiizejoiIiJgHJG0AfAt4FKVP0XXAKxpNUSOmKwOLiIiIiHlE0soAtm+vO5YYL0mFioiIiBhjkp7fVmr2ncD5kk6UtF5dccX4ycAiIiIiYrx9DLgRQNLzgP2A1wAnAv9bY1wxZjKwiIiIiBhvtn1n9fkLga/aXmT7cGCNGuOKMZOBRURERMR4k6SVJS0Ang6c2fTYCjXFFGMonbcjIiIixttngcXArcCvbV8MIGkr4E+9TowYRKpCRURERIw5SWsBawKX2n6g2vdIYDnb/1drcDE2MrCIiIiIGGOSHmH7z9M9JmIyWWMRERERMd5OGdIxET1lxiIiIiJijEm6H7ij1yHArbbXmqWQYkxlYBEREREREdOWVKiIiIiIiJi2DCwiIiIiImLaMrCIiIiIiIhpy8AiIiIiYp6RdEDdMcT4ycAiIiIiYv55fd0BxPjJwCIiIiJi/lHdAcT4SbnZiIiIiHlG0qNtX193HDFeMrCIiIiIiIhpSypURERERERMWwYWERERERExbRlYRERERMwDklaU9GFJX6m2N5L0vLrjivGRgUVERETE/HAEcDewY7V9A/Af9YUT4yYDi4iIiIj5YQPbhwD3Ati+k5SdjSHKwCIiIiJifrhH0kLAAJI2oMxgRAzFsnUHEBERERGz4kDgVOAxkr4F7Ay8qtaIYqykj0VERETEPCHpocAOlBSoC2zfVHNIMUYysIiIiIiYByQ9pdN+2+fOdiwxnjKwiIiIiJgHJJ3UtLkCsB2wyPauNYUUYyZrLCIiIiLmAdvPb96W9BjgszWFE2MoVaEiIiIi5qfrgcfVHUSMj8xYRERERMwDkg6lKjVLubm8JXBJfRHFuMkai4iIiIh5QNIrmzbvA35v+/y64onxk4FFRERERERMW1KhIiIiIuYBSZczkQrV8hBg25vPckgxZjKwiIiIiJgfflR9/Eb1cd/q4xdriCXGUFKhIiIiIuYBSb+0vVXbvktsb11XTDFeUm42IiIiYn6QpJ2bNnYi14IxREmFioiIiJgf9ge+Jmk1yrqKvwOvqTekGCdJhYqIiIiYR6qBBbZvqTuWGC8ZWERERESMMUn72f6mpHd2etz2f892TDGekgoVERERMd5Wqj6uUmsUMfYyYxEREREREdOWSgARERER84CkQyStKmk5SWdKulHSfnXHFeMjA4uIiIiI+eGZtm8Fngf8HtgQeE+tEcVYycAiIiIiYn5orK19LnBsqkLFsGXxdkRERMT88ENJvwH+CbxB0hrAXTXHFGMki7cjIiIi5glJDwFusX2/pJWAVWz/ue64YjwkFSoiIiJiHpC0CHgZsCqA7TsyqIhhysAiIiIiYn7YC1gLuEjS0ZKeJUl1BxXjI6lQEREREfOIpAWUylBfBO4HjgA+Z/vmWgOLOS8zFhERERHzhKTNgf8CPg0cB7wEuBU4q864YjykKlRERETEPFCtsfgH8FXg/bbvrh66UNLO9UUW4yKpUBERERHzgKT1bV9TdxwxvjKwiIiIiJgHJC0PvAhYl6asFdsH1xVTjJekQkVERETMDz8AbgEWAXdPcmzEwDJjERERETEPSLrC9mZ1xxHjK1WhIiIiIuaHn0l6Qt1BxPjKjEVERETEGJN0OWBKCvxGwDWUVCgBtr15jeHFGMnAIiIiImKMSVqn1+O2/zBbscR4y8AiIiIiYoxJWgF4PbAhcDnwVdv31RtVjKMMLCIiIiLGmKRjgHuB84BnA3+w/bZ6o4pxlIFFRERExBiTdLntJ1SfLwv8wvbWNYcVYyhVoSIiIiLG272NT5ICFTMpMxYRERERY0zS/cAdjU1gIXAnE1WhVq0rthgvGVhERERERMS0JRUqIiIiIiKmLQOLiIiIiIiYtgwsIiIiIiJi2jKwiIiIeUXS/ZIWS7pC0rGSVpzGc31d0ourzw+XtGmPY3eRtNMUvsbvJT2s3/1tx9w+4Nc6SNK7B40xIgIysIiIiPnnn7a3tL0ZcA+lI/ESVZ3/gdn+V9u/6nHILsDAA4uIiLkiA4uIiJjPzgM2rGYTzpN0IvArSctI+rSkiyRdJul1ACoOk3SVpDOANRtPJOlsSU+sPv8XSZdIulTSmZLWpQxg3lHNljxZ0hqSjqu+xkWSdq7Ofaik0yVdKelwSknQniR9X9Ki6pwD2h77TLX/TElrVPs2kHRqdc55kjYZxosZEfPblO7KREREzHXVzMSzgVOrXVsDm9m+tro4v8X2tpKWB86XdDqwFfBYYFPg4cCvgK+1Pe8awFeAp1TP9RDbN0v6X+B22/9ZHfdt4DO2fyppbeA04HHAgcBPbR8s6bnA/n38d15TfY2FwEWSjrP9N2Al4GLb75D079Vzvxn4MvB627+VtD3wBWDXKbyMERFLZGARERHzzUJJi6vPzwO+SklR+oXta6v9zwQ2b6yfAFYDNgKeAnzH9v3AHyWd1eH5dwDObTyX7Zu7xPEMYFNpyYTEqpJWrr7GC6tzT5b09z7+T2+VtGf1+WOqWP8GPAAcU+3/JnB89TV2Ao5t+trL9/E1IiJ6ysAiIiLmm3/a3rJ5R3WBfUfzLuAttk9rO+45Q4xjAbCD7bs6xNI3SbtQBik72r5T0tnACl0Od/V1/9H+GkRETFfWWERERCztNOANkpYDkLSxpJWAc4G9qjUYjwSe1uHcC4CnSFqvOvch1f7bgFWajjsdeEtjQ1LjQv9cYJ9q37OB1SeJdTXg79WgYhPKjEnDAqAx67IPJcXqVuBaSS+pvoYkbTHJ14iImFQGFhEREUs7nLJ+4hJJVwBfoszynwD8tnrsKODn7SfavhE4gJJ2dCkTqUgnAXs2Fm8DbwWeWC0O/xUT1ak+QhmYXElJifq/SWI9FVhW0q+BT1IGNg13ANtV/4ddgYOr/fsC+1fxXQns0cdrEhHRk2zXHUNERERERMxxmbGIiIiIiIhpy8AiIiIiIiKmLQOLiIiIiIiYtgwsIiIiIiJi2jKwiIiIiIiIacvAIiIiIiIipi0Di4iIiIiImLYMLCIiIiIiYtr+P+t+BoaULmlxAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 720x720 with 2 Axes>"
+            ]
+          },
+          "metadata": {
+            "needs_background": "light"
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5KnaRf855lsv"
+      },
+      "source": [
+        "# ajouter le code pour faire la prediction avec les modèles BERT\n",
+        "\n",
+        "\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "llGjT-xsUvR4"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3dGPXQSLUvUn"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NQyuDQw_JOwB"
+      },
+      "source": [
+        "y_pred = clf.predict(vec_data)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zgNKwbp_eYos"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dZGxg_OreYrO"
+      },
+      "source": [
+        "df_test = df.copy()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "g8FfxZ7bKwCe"
+      },
+      "source": [
+        "df_test['classification'] = y_pred"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hRcYKfdIK0Tm",
+        "outputId": "db988435-9716-4cf5-a754-04bc5356369f"
+      },
+      "source": [
+        "df_test.shape"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(61738, 14)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 29
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nlV3yXcCMb8v"
+      },
+      "source": [
+        "df_test.head()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GuotNONXMXgt",
+        "outputId": "5fb34593-c97d-4401-a617-b25aa8f7e49c"
+      },
+      "source": [
+        "df_test.loc[(df_test['ensemble_domaine_enccre'] != df_test['classification'])].shape"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(8597, 14)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 30
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "raw7PJrtMsDx"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "df_test['class_is_true'] = df_test['ensemble_domaine_enccre'] == df_test['classification']"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 310
+        },
+        "id": "qDD13-3dOSgK",
+        "outputId": "a309b603-8179-48ff-ad55-f3599f0dc699"
+      },
+      "source": [
+        "df_test.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>normClass</th>\n",
+              "      <th>classEDdA</th>\n",
+              "      <th>author</th>\n",
+              "      <th>id_enccre</th>\n",
+              "      <th>domaine_enccre</th>\n",
+              "      <th>ensemble_domaine_enccre</th>\n",
+              "      <th>content</th>\n",
+              "      <th>contentWithoutClass</th>\n",
+              "      <th>firstParagraph</th>\n",
+              "      <th>nb_word</th>\n",
+              "      <th>classification</th>\n",
+              "      <th>class_is_true</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>1</td>\n",
+              "      <td>5</td>\n",
+              "      <td>A, a &amp; a</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>ordre Encyclopéd. Entend. Science de l'homme, ...</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>v1-1-0</td>\n",
+              "      <td>grammaire</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+              "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+              "      <td>A, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Scie...</td>\n",
+              "      <td>711</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>1</td>\n",
+              "      <td>6</td>\n",
+              "      <td>A</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>v1-1-1</td>\n",
+              "      <td>grammaire</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+              "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+              "      <td>A, mot, est 1. la troisieme personne du présen...</td>\n",
+              "      <td>238</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>1</td>\n",
+              "      <td>7</td>\n",
+              "      <td>A</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Dumarsais</td>\n",
+              "      <td>v1-1-2</td>\n",
+              "      <td>grammaire</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+              "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+              "      <td>A, préposition vient du latin à, à dextris, à ...</td>\n",
+              "      <td>1980</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>1</td>\n",
+              "      <td>10</td>\n",
+              "      <td>A, numismatique ou monétaire</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>v1-1-5</td>\n",
+              "      <td>numismatique</td>\n",
+              "      <td>Médailles</td>\n",
+              "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+              "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+              "      <td>A, numismatique ou monétaire, sur le revers de...</td>\n",
+              "      <td>112</td>\n",
+              "      <td>Médailles</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>1</td>\n",
+              "      <td>11</td>\n",
+              "      <td>A, lapidaire</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>v1-1-6</td>\n",
+              "      <td>inscriptions</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+              "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+              "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+              "      <td>80</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   volume  numero  ... classification class_is_true\n",
+              "3       1       5  ...      Grammaire          True\n",
+              "4       1       6  ...      Grammaire          True\n",
+              "5       1       7  ...      Grammaire          True\n",
+              "8       1      10  ...      Médailles          True\n",
+              "9       1      11  ...       Histoire          True\n",
+              "\n",
+              "[5 rows x 15 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 32
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qsAd_w_iO9LZ"
+      },
+      "source": [
+        "df_test.to_csv('result_classification_sgdtfidf_21.11.24.csv', index=False)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "H4XfLD3EaaTe",
+        "outputId": "50c60efb-6670-4bd2-8c2d-f7c309fb0932"
+      },
+      "source": [
+        "df_test.loc[(df_test['ensemble_domaine_enccre'] == 'Géographie') & (df_test['class_is_true'] == False )].shape"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(95, 15)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 32
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "J3Nbs6zMCnWh"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "s6xTROC7CnZA"
+      },
+      "source": [
+        "## test de sortie des scores (proba) pour chaque classe"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1TyETcoyCnbU"
+      },
+      "source": [
+        "y_pred_proba = clf.predict_proba(vec_data)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "2W4i8nrLC61s",
+        "outputId": "86373732-4a06-487f-db1b-0a2e867974fa"
+      },
+      "source": [
+        "clf.classes_"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array(['Agriculture - Economie rustique', 'Anatomie', 'Antiquité',\n",
+              "       'Architecture', 'Arts et métiers', 'Beaux-arts',\n",
+              "       'Belles-lettres - Poésie', 'Blason', 'Caractères', 'Chasse',\n",
+              "       'Chimie', 'Commerce', 'Droit - Jurisprudence',\n",
+              "       'Economie domestique', 'Grammaire', 'Géographie', 'Histoire',\n",
+              "       'Histoire naturelle', 'Jeu', 'Marine', 'Maréchage - Manège',\n",
+              "       'Mathématiques', 'Mesure', 'Militaire (Art) - Guerre - Arme',\n",
+              "       'Minéralogie', 'Monnaie', 'Musique', 'Médailles',\n",
+              "       'Médecine - Chirurgie', 'Métiers', 'Pharmacie', 'Philosophie',\n",
+              "       'Physique - [Sciences physico-mathématiques]', 'Politique',\n",
+              "       'Pêche', 'Religion', 'Spectacle', 'Superstition'], dtype='<U43')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 47
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 73
+        },
+        "id": "tiecHJyTC66o",
+        "outputId": "bf846387-9964-418d-d122-9bc032c60266"
+      },
+      "source": [
+        "data_eval[0]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "\"\\nLes pins ont encore le mérite de l'agrément ; ils\\nconservent pendant toute l'année leurs feuilles, qui\\ndans la plûpart des especes sont d'une très-belle verdure.\\nCes arbres sont d'une belle stature, & d'un accroissement \\nrégulier ; ils ne sont sujets ni aux insectes,\\n\\n\\nni à aucune maladie ; enfin plusieurs de ces pins sont\\nde la plus belle apparence au printems, par la couleur \\nvive des chatons dont ils sont chargés. Voyez sur\\nla culture du pin, le dictionnaire des Jardiniers de\\nM. Miller, & pour tous égards, le traité des arbres\\nde M. Duhamel, qui est entré dans des détails intéressans \\nsur cet arbre.\\n\""
+            ]
+          },
+          "metadata": {},
+          "execution_count": 44
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cFkSivM2Cndt",
+        "outputId": "8fda16d7-04cc-4609-8fa6-7995a4ffd01c"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([0.38404935, 0.        , 0.        , 0.        , 0.        ,\n",
+              "       0.01376867, 0.10553505, 0.        , 0.        , 0.        ,\n",
+              "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
+              "       0.        , 0.00485592, 0.47335577, 0.        , 0.        ,\n",
+              "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
+              "       0.        , 0.        , 0.        , 0.01843524, 0.        ,\n",
+              "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
+              "       0.        , 0.        , 0.        ])"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 42
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "3dG5qbPoCngN",
+        "outputId": "0ad887fe-dd94-4d4d-856a-b45b8091d650"
+      },
+      "source": [
+        "clf.classes_[np.argmax(y_pred_proba[0], axis=0)]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'Histoire naturelle'"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 49
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qsrY1g6mCniF"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gFywr71BCnkt"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 645
+        },
+        "id": "_Gews6OdbN3d",
+        "outputId": "03b7bb01-51be-4d35-f090-84f02b697366"
+      },
+      "source": [
+        "df_test.loc[(df_test['ensemble_domaine_enccre'] == 'Géographie') & (df_test['class_is_true'] == False )].groupby(by=[\"classification\"]).size().reset_index(name='counts').sort_values(by='counts', ascending=False)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>classification</th>\n",
+              "      <th>counts</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>19</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "      <td>11</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Antiquité</td>\n",
+              "      <td>10</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Belles-lettres - Poésie</td>\n",
+              "      <td>9</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>18</th>\n",
+              "      <td>Religion</td>\n",
+              "      <td>9</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17</th>\n",
+              "      <td>Physique - [Sciences physico-mathématiques]</td>\n",
+              "      <td>8</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>Droit - Jurisprudence</td>\n",
+              "      <td>5</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>Commerce</td>\n",
+              "      <td>4</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>4</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16</th>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>Marine</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>11</th>\n",
+              "      <td>Mathématiques</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>14</th>\n",
+              "      <td>Médailles</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>Médecine - Chirurgie</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Chimie</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Beaux-arts</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Architecture</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>12</th>\n",
+              "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>13</th>\n",
+              "      <td>Musique</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                                 classification  counts\n",
+              "8                                      Histoire      19\n",
+              "9                            Histoire naturelle      11\n",
+              "0                                     Antiquité      10\n",
+              "3                       Belles-lettres - Poésie       9\n",
+              "18                                     Religion       9\n",
+              "17  Physique - [Sciences physico-mathématiques]       8\n",
+              "6                         Droit - Jurisprudence       5\n",
+              "5                                      Commerce       4\n",
+              "7                                     Grammaire       4\n",
+              "16                                  Philosophie       3\n",
+              "10                                       Marine       2\n",
+              "11                                Mathématiques       2\n",
+              "14                                    Médailles       2\n",
+              "15                         Médecine - Chirurgie       2\n",
+              "4                                        Chimie       1\n",
+              "2                                    Beaux-arts       1\n",
+              "1                                  Architecture       1\n",
+              "12              Militaire (Art) - Guerre - Arme       1\n",
+              "13                                      Musique       1"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 39
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "IF_N5qRqdsmj"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "C_OcQ-uudso3"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dgFIEa0Pdsre"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "tHX62GU4dsue"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
-- 
GitLab