From c33bee9cde56ea9fc00a75acae1640802974a08a Mon Sep 17 00:00:00 2001
From: lmoncla <moncla.ludovic@gmail.com>
Date: Thu, 6 Jan 2022 10:38:26 +0100
Subject: [PATCH] add notebooks

---
 .gitignore                                    |    1 +
 notebooks/CorpusTEI_EDdA_to_dataframe.ipynb   | 5646 +++++++++++++++++
 .../EDdA_Classification_BertFineTuning.ipynb  | 4421 +++++++++++++
 .../EDdA_Classification_ClassicModels.ipynb   |  861 +++
 .../EDdA_Classification_DeepLearning.ipynb    | 1351 ++++
 .../EDdA_Classification_DeepLearning_2.ipynb  | 1349 ++++
 ...ssification_Generate_ConfusionMatrix.ipynb | 1181 ++++
 7 files changed, 14810 insertions(+)
 create mode 100644 notebooks/CorpusTEI_EDdA_to_dataframe.ipynb
 create mode 100644 notebooks/EDdA_Classification_BertFineTuning.ipynb
 create mode 100644 notebooks/EDdA_Classification_ClassicModels.ipynb
 create mode 100644 notebooks/EDdA_Classification_DeepLearning.ipynb
 create mode 100644 notebooks/EDdA_Classification_DeepLearning_2.ipynb
 create mode 100644 notebooks/EDdA_Classification_Generate_ConfusionMatrix.ipynb

diff --git a/.gitignore b/.gitignore
index 71a9a39..03156e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ dataframe_with_domaine_enccre.csv
 dataframe_with_normClass_artfl.csv
 *.pkl
 .DS_Store
+.DS_Store
diff --git a/notebooks/CorpusTEI_EDdA_to_dataframe.ipynb b/notebooks/CorpusTEI_EDdA_to_dataframe.ipynb
new file mode 100644
index 0000000..a1321a2
--- /dev/null
+++ b/notebooks/CorpusTEI_EDdA_to_dataframe.ipynb
@@ -0,0 +1,5646 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "metallic-shelf",
+   "metadata": {},
+   "source": [
+    "# PrÃ©paration du corpus EDdA pour la classification en domaine"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "designing-advice",
+   "metadata": {},
+   "source": [
+    "## Preparing data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "floppy-fleet",
+   "metadata": {},
+   "source": [
+    "### Import des librairies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "appreciated-victim",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from bs4 import BeautifulSoup\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "import urllib, json\n",
+    "from urllib.request import urlopen"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "framed-fossil",
+   "metadata": {},
+   "source": [
+    "### Parsing des articles TEI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "suburban-honduras",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_path = \"/Users/lmoncla/Documents/Data/Corpus/EDDA/Alice/EDdA/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "scenic-vermont",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Volume :  11\n",
+      "Volume :  16\n",
+      "Volume :  17\n",
+      "Volume :  10\n",
+      "Volume :  5\n",
+      "Volume :  2\n",
+      "Volume :  3\n",
+      "Volume :  4\n",
+      "Volume :  15\n",
+      "Volume :  12\n",
+      "Volume :  13\n",
+      "Volume :  14\n",
+      "Volume :  1\n",
+      "Volume :  6\n",
+      "Volume :  8\n",
+      "Volume :  9\n",
+      "Volume :  7\n"
+     ]
+    }
+   ],
+   "source": [
+    "# rÃ©cupÃ©ration dans une liste des mÃ©tadonnÃ©es (volume, numÃ©ro, nom de l'article, classe et auteur) Ã  partir des fichiers TEI\n",
+    "data = []\n",
+    "\n",
+    "for tome in os.listdir(input_path):\n",
+    "    volume = tome[1:]\n",
+    "    print(\"Volume : \", volume)\n",
+    "    \n",
+    "    for article in os.listdir(input_path + tome +\"/\"):\n",
+    "        #print(\"Article : \", article[7:-4])\n",
+    "        numero = article[7:-4]\n",
+    "        extension = article[-4:]\n",
+    "        if extension == '.tei':\n",
+    "\n",
+    "            try:\n",
+    "                soup = BeautifulSoup(open(input_path+tome+\"/\"+article))\n",
+    "\n",
+    "                head = soup.find(type=\"head\")\n",
+    "                author = soup.find(type=\"author\")\n",
+    "                normclass = soup.find(type=\"normclass\")\n",
+    "                classEDdA = soup.find(type=\"class\")\n",
+    "                \n",
+    "                #print(volume, numero, head.get('value'), normclass.get('value'), author.get('value'))\n",
+    "                data.append([int(volume), int(numero), head.get('value').strip(), normclass.get('value').strip(), classEDdA.get('value').strip(), author.get('value').strip()])\n",
+    "            \n",
+    "            except AttributeError as e:\n",
+    "                #print('Volume : ', volume, ' NumÃ©ro : ', numero)\n",
+    "                #print('Error : ' + str(e))\n",
+    "                pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "excess-waterproof",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# transformation de la liste en dataframe\n",
+    "df = pd.DataFrame(data, columns=['volume', 'numero', 'head', 'normClass', 'classEDdA', 'author'])\n",
+    "df = df.sort_values(['volume', 'numero']).reset_index(drop = True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "blocked-reading",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>45529</th>\n",
+       "      <td>11</td>\n",
+       "      <td>2501</td>\n",
+       "      <td>OPICIENS, les</td>\n",
+       "      <td>GÃ©ographie ancienne</td>\n",
+       "      <td>GÃ©og. anc.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>63464</th>\n",
+       "      <td>15</td>\n",
+       "      <td>1971</td>\n",
+       "      <td>SOUSA, Province de, ou Souse</td>\n",
+       "      <td>GÃ©ographie moderne</td>\n",
+       "      <td>GÃ©og. mod.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38896</th>\n",
+       "      <td>9</td>\n",
+       "      <td>4159</td>\n",
+       "      <td>Maison</td>\n",
+       "      <td>Histoire moderne</td>\n",
+       "      <td>Hist. mod.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52378</th>\n",
+       "      <td>13</td>\n",
+       "      <td>522</td>\n",
+       "      <td>PORTO-FERRAIO</td>\n",
+       "      <td>GÃ©ographie moderne</td>\n",
+       "      <td>Geog. mod.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62792</th>\n",
+       "      <td>15</td>\n",
+       "      <td>1299</td>\n",
+       "      <td>SNOWDON-HILLS</td>\n",
+       "      <td>GÃ©ographie moderne</td>\n",
+       "      <td>GÃ©og. mod.</td>\n",
+       "      <td>Jaucourt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>54108</th>\n",
+       "      <td>13</td>\n",
+       "      <td>2252</td>\n",
+       "      <td>PULO-WAY</td>\n",
+       "      <td>GÃ©ographie moderne</td>\n",
+       "      <td>Geog. mod.</td>\n",
+       "      <td>Jaucourt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62965</th>\n",
+       "      <td>15</td>\n",
+       "      <td>1472</td>\n",
+       "      <td>Solide</td>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>en Anatomie</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>61463</th>\n",
+       "      <td>14</td>\n",
+       "      <td>5167</td>\n",
+       "      <td>SÃ‰MI-PÃ‰LAGIANISME</td>\n",
+       "      <td>Histoire ecclÃ©siastique</td>\n",
+       "      <td>Hist. eccles.</td>\n",
+       "      <td>Jaucourt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29174</th>\n",
+       "      <td>7</td>\n",
+       "      <td>1711</td>\n",
+       "      <td>GAS</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>Chim.</td>\n",
+       "      <td>Venel</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21200</th>\n",
+       "      <td>4</td>\n",
+       "      <td>5290</td>\n",
+       "      <td>Divin, emplÃ¢tre divin, emplastrum divinum</td>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>Pharmac.</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       volume  numero                                       head  \\\n",
+       "45529      11    2501                              OPICIENS, les   \n",
+       "63464      15    1971               SOUSA, Province de, ou Souse   \n",
+       "38896       9    4159                                     Maison   \n",
+       "52378      13     522                              PORTO-FERRAIO   \n",
+       "62792      15    1299                              SNOWDON-HILLS   \n",
+       "54108      13    2252                                   PULO-WAY   \n",
+       "62965      15    1472                                     Solide   \n",
+       "61463      14    5167                          SÃ‰MI-PÃ‰LAGIANISME   \n",
+       "29174       7    1711                                        GAS   \n",
+       "21200       4    5290  Divin, emplÃ¢tre divin, emplastrum divinum   \n",
+       "\n",
+       "                     normClass      classEDdA    author  \n",
+       "45529      GÃ©ographie ancienne     GÃ©og. anc.  unsigned  \n",
+       "63464       GÃ©ographie moderne     GÃ©og. mod.  unsigned  \n",
+       "38896         Histoire moderne     Hist. mod.  unsigned  \n",
+       "52378       GÃ©ographie moderne     Geog. mod.  unsigned  \n",
+       "62792       GÃ©ographie moderne     GÃ©og. mod.  Jaucourt  \n",
+       "54108       GÃ©ographie moderne     Geog. mod.  Jaucourt  \n",
+       "62965                 Anatomie    en Anatomie  unsigned  \n",
+       "61463  Histoire ecclÃ©siastique  Hist. eccles.  Jaucourt  \n",
+       "29174                   Chimie          Chim.     Venel  \n",
+       "21200                Pharmacie       Pharmac.  unsigned  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# affichage alÃ©atoire de 50 lignes du dataframe\n",
+    "df.sample(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "expired-click",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "74190"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# nombre d'articles dans le dataframe\n",
+    "len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "considered-adjustment",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>normClass</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <td>44</td>\n",
+       "      <td>44</td>\n",
+       "      <td>44</td>\n",
+       "      <td>44</td>\n",
+       "      <td>44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Abus des langues</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Accord de sons</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Acoustique</th>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  volume  numero  head  classEDdA  author\n",
+       "normClass                                                \n",
+       "                      44      44    44         44      44\n",
+       "0                     17      17    17         17      17\n",
+       "Abus des langues       1       1     1          1       1\n",
+       "Accord de sons         1       1     1          1       1\n",
+       "Acoustique             6       6     6          6       6"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# On regroupe les lignes du dataframe en fonction du normclass\n",
+    "classes = df.groupby(['normClass']).count()\n",
+    "classes.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "instructional-variation",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2908"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Nombre de classes \n",
+    "len(classes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "handmade-contest",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "12685"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# nombre d'articles 'unclassified'\n",
+    "len(df.loc[df['normClass']==\"unclassified\",:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "crude-olympus",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1614"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# nombre de classes avec un seul article\n",
+    "len(classes.loc[classes['volume']==1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "sized-barrier",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2656"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# nombre de classes avec moins de 20 articles\n",
+    "len(classes.loc[classes['volume']<20])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "indian-selection",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "weighted-hanging",
+   "metadata": {},
+   "source": [
+    "### Enregistrement"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "stainless-stewart",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du rÃ©sultat du groupby\n",
+    "classes['volume'].to_csv('classesEDdA.tsv',sep='\\t',header=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "hearing-olive",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du dataframe (permet de ne pas reparser tous les fichiers TEI pour recharger ce dataframe)\n",
+    "df.to_csv('EDdA_dataframe_orginal.tsv',sep='\\t',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "stuck-courage",
+   "metadata": {},
+   "source": [
+    "### Lecture"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "id": "thick-destiny",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('EDdA_dataframe_orginal.tsv', sep='\\t')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "id": "typical-munich",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÃ‰LIMINAIRE DES EDITEURS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                                head     normClass  \\\n",
+       "0       1       1                          Title Page  unclassified   \n",
+       "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  unclassified   \n",
+       "2       1       3  DISCOURS PRÃ‰LIMINAIRE DES EDITEURS  unclassified   \n",
+       "3       1       5                            A, a & a     Grammaire   \n",
+       "4       1       6                                   A  unclassified   \n",
+       "\n",
+       "                                           classEDdA                author  \n",
+       "0                                       unclassified              unsigned  \n",
+       "1                                       unclassified  Diderot & d'Alembert  \n",
+       "2                                       unclassified            d'Alembert  \n",
+       "3  ordre EncyclopÃ©d. Entend. Science de l'homme, ...            Dumarsais5  \n",
+       "4                                       unclassified            Dumarsais5  "
+      ]
+     },
+     "execution_count": 144,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "baking-command",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "individual-protection",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "domaines_regroupes = {}\n",
+    "domaines_regroupes['Agriculture - Economie rustique'] = ['Agriculture', 'Economie rustique', 'Fontainier', 'Graines', 'Jardinage', 'Moulin', 'Sucre', 'Tabac', 'Vigne', 'Vin']\n",
+    "domaines_regroupes['Anatomie'] = ['Anatomie', 'Economie animale']\n",
+    "domaines_regroupes['AntiquitÃ©'] = ['AntiquitÃ©', 'Iconologie', 'Mythologie']\n",
+    "domaines_regroupes['Architecture'] = ['Architecture', 'Carreleur', 'Carrier', 'Coupe des pierres', 'Couvreur', 'DÃ©coration', 'MaÃ§onnerie']\n",
+    "domaines_regroupes['Arts et mÃ©tiers'] = ['Arts et mÃ©tiers', 'Arts mÃ©caniques', 'Manufacture']\n",
+    "domaines_regroupes['Beaux-arts'] = ['Beaux-arts', 'Dessin', 'Gravue', 'Peinture', 'Sculpture']\n",
+    "domaines_regroupes['Belles-lettres - PoÃ©sie'] = ['Belles-lettres', 'Eloquence', 'LittÃ©rature', 'PoÃ©sie', 'RhÃ©torique']\n",
+    "domaines_regroupes['Blason'] = ['Blason']\n",
+    "domaines_regroupes['CaractÃ¨res'] = ['CaractÃ¨res', 'Ecriture']\n",
+    "domaines_regroupes['Chasse'] = ['Chasse', 'Fauconnerie', 'Oisellerie', 'VÃ©nerie']\n",
+    "domaines_regroupes['Chimie'] = ['Alchimie', 'Chimie', 'Docimasie']\n",
+    "domaines_regroupes['Commerce'] = ['Commerce', 'Marchand', 'Voiturier']\n",
+    "domaines_regroupes['Droit - Jurisprudence'] = ['Chancellerie', 'Corporation', 'Douane', 'Droit', 'Eaux et ForÃªts', 'Finance', 'Jurisprudence', 'Palais']\n",
+    "domaines_regroupes['Economie domestique'] = ['Cuisine','Economie domestique']\n",
+    "#domaines_regroupes['GÃ©ographie'] = ['GÃ©ographie', 'GÃ©ographie Histoire naturelle', 'GÃ©ographie ancienne', 'GÃ©ographie des Arabes', 'GÃ©ographie du moyen Ã¢ge',\n",
+    "#                                   'GÃ©ographie ecclÃ©siastique', 'GÃ©ographie historique', 'GÃ©ographie maritime ancienne', 'GÃ©ographie des Romains', 'GÃ©ographie morderne',\n",
+    "#                                   'GÃ©ographie naturelle', 'GÃ©ographie physique', 'GÃ©ographie sacrÃ©e', 'GÃ©ographie sainte', 'GÃ©ographie transcendante', 'GÃ©ographie transcendantee']\n",
+    "domaines_regroupes['GÃ©ographie'] = ['GÃ©ographie', 'Topographie']\n",
+    "domaines_regroupes['Grammaire'] = ['Grammaire', 'Langues', 'Synonymes']\n",
+    "domaines_regroupes['Histoire'] = ['Calendrier','Chevalerie','Chronologie','Coutumes','GÃ©nÃ©alogie','Histoire','Inscriptions','Inventions', 'Voyage']\n",
+    "domaines_regroupes['Histoire naturelle'] = ['Botanique','Conchyliologie','Fossiles','Histoire naturelle', 'Ichtyologie','Insectologie','Ophiologie','Ornithologie','Zoologie']\n",
+    "domaines_regroupes['Jeu'] = ['Jeu']\n",
+    "domaines_regroupes['MarÃ©chage - ManÃ¨ge'] = ['MarÃ©chage', 'ManÃ¨ge']\n",
+    "domaines_regroupes['Marine'] = ['GalÃ¨re','Marine', 'Navigation', 'RiviÃ¨re']\n",
+    "domaines_regroupes['MathÃ©matiques'] = ['AlgÃ¨bre','Analyse des hasards', 'ArithmÃ©tique', 'Arpentage','GÃ©omÃ©trie', 'MathÃ©matiques', 'TrigonomÃ©trie']\n",
+    "domaines_regroupes['MÃ©dailles'] = ['MÃ©dailles','Numismatique']\n",
+    "domaines_regroupes['MÃ©decine - Chirurgie'] = ['Chirurgie', 'DiÃ¨te', 'Gymnastique', 'Maladie', 'MatiÃ¨re mÃ©dicale', 'MÃ©decine', 'Pathologie', 'Physiologie', 'SÃ©mÃ©iotique', 'ThÃ©rapeutique']\n",
+    "domaines_regroupes['Mesure'] = ['Balancier', 'Jaugeage', 'Mesure', 'Poids']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] = ['Boucherie', 'Boulangerie', 'Brasserie', 'Charcuterie', 'Confiserie', 'Distillation', 'Epicerie', 'PÃ¢tisserie', 'RÃ´tisserie', 'Vinaigrier']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Bois', 'Boissellerie', 'Charpenterie', 'Charronnage', 'Coffretier', 'EbÃ©nisterie', 'Formier', 'Layeterie', 'Menuiserie', 'Tonnelier', 'Vannerie']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Bourrelier', 'Boyaudier', 'Cardier', 'Chamoiseur', 'Corroierie', 'Cuir', 'Gainier', 'Hongroyeur', 'Maroquinier', 'MÃ©gisserie', 'Parcheminerie', 'Peausserie', 'Pelleterie', 'Sellier', 'Tannerie']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Aiguilletier-Epinglier', 'Ardoiserie', 'Argent', \"Batteur d'or\", 'Bijouterie', 'Bimblotier', 'ChaÃ®netier', 'Chaudronnerie', 'Ciselure', 'Cloche', 'Cloutier', 'Coutellerie', 'Cuivre', 'Diamantaire', 'Dorure', 'Eperonnier', 'Fer']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Ferblanterie', 'Fonderie', 'Forge', 'Fourbisseur', 'Glaces', 'Joaillier', 'Lapidaire', 'Lunetier', 'Marbrier', 'MarÃ©chal-grossier', 'MÃ©tal', 'Metteur en oeuvre', 'Miroiterie', 'Or', 'OrfÃ¨vrerie']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Pierres', 'Plomberie', \"Potier d'Ã©tain\", 'Serrurerie', 'Taillanderie', \"Tireur d'or\", 'Verrerie', 'Vitrerie']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Cartier', 'Cartonnier', 'Imprimerie', 'Librairie', 'Marbreur de papier', 'Papeterie', 'Reliure']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Bas au mÃ©tier', 'Blanchissage des toiles', 'Blondier', 'Bonneterie', 'Bottier', 'Bourserie', 'Boutonnier', 'Broderie', 'Cardeur', 'Ceinturier', 'Chapellerie', 'Cordonnerie','Coton', 'Couture', 'DÃ©coupeur', 'Dentelle', 'Draperie']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Etoffe', 'Fil', 'Friseur', 'Ganterie', 'Gazier', 'Laine', 'Lingerie', 'Mode', 'Ourdissage', 'Passementerie', 'Perruquier', 'Plumasserie', 'Rubanerie', 'Soierie', 'Tailleur', 'Tapisserie', 'Teinturerie', 'Tisserand', 'Toilerie', 'Tonderie de drap']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['Amidonnier', 'Blanchisserie de cire', 'Chandelier', 'Cirerie', 'Corderie', 'Emailleur', 'Eventailliste', 'FaÃ¯encier', 'Filassier', 'Fleuriste', 'Horlogerie', 'Marqueterie', 'MÃ©tiers peu attestÃ©s', 'Parfumeur', 'Paumier', 'Poterie']\n",
+    "domaines_regroupes[\"MÃ©tiers\"] += ['SalpÃªtrerie', 'Savonnerie', 'Sel', 'TabatiÃ¨re', 'Tabletier-Cornetier', 'Tourneur', 'Vergetier', 'Vernisseur']\n",
+    "domaines_regroupes['Militaire (Art) - Guerre - Arme'] = ['Armurerie', 'Artificier', 'Artillerie', 'Canon','Escrime','Fortification','Guerre','Milice','Militaire']\n",
+    "domaines_regroupes['MinÃ©ralogie'] = ['Lithologie','MÃ©tallurgie','MinÃ©ralogie']\n",
+    "domaines_regroupes['Monnaie'] = ['Monnaie']\n",
+    "domaines_regroupes['Musique'] = ['Danse', 'Lutherie','Musique','Orgue', 'Voix']\n",
+    "domaines_regroupes['PÃªche'] = ['PÃªche']\n",
+    "domaines_regroupes['Pharmacie'] = ['Drogues', 'Pharmacie']\n",
+    "domaines_regroupes['Philosophie'] = ['Education', 'Logique', 'MÃ©taphysique', 'Morale', 'Philologie','Philosophie', 'Sciences']\n",
+    "domaines_regroupes['Physique - [Sciences physico-mathÃ©matiques]'] = ['Acoustique', 'Astrologie', 'Astronomie', 'Cosmographie-Cosmologie', 'Gnomonique', 'Hydraulique', 'MÃ©canique', 'Optique', 'Perspective', 'Physique', 'Science microscopique']\n",
+    "domaines_regroupes['Politique'] = ['Economie', 'Gouvernement', 'Police', 'Politique']\n",
+    "domaines_regroupes['Religion'] = ['Critique sacrÃ©e', 'Culte', 'Eglise', 'Histoire ecclÃ©siastique', 'IdolÃ¢trie', 'Religion', 'ThÃ©ologie']\n",
+    "domaines_regroupes['Spectacle'] = ['OpÃ©ra','Spectacle', 'ThÃ©Ã¢tre']\n",
+    "domaines_regroupes['Superstition'] = ['Divination', 'Magie', 'Superstition']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "variable-instrument",
+   "metadata": {},
+   "source": [
+    "### RÃ©cupÃ©ration correspondance EDdA / ENCCRE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "south-equation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_correspondances = pd.read_csv(\"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Classification domaines EDdA/correspondances_ARTFL-ENCCRE.csv\") \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "protecting-incentive",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>path</th>\n",
+       "      <th>entreeid</th>\n",
+       "      <th>tome</th>\n",
+       "      <th>article</th>\n",
+       "      <th>adresse</th>\n",
+       "      <th>entree</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T1/article5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T1/article6</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T1/article7</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T1/article8</td>\n",
+       "      <td>v1-1-3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T1/article9</td>\n",
+       "      <td>v1-1-4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          path entreeid  tome  article  adresse  entree\n",
+       "0  T1/article5   v1-1-0     1        5        1       0\n",
+       "1  T1/article6   v1-1-1     1        6        1       1\n",
+       "2  T1/article7   v1-1-2     1        7        1       2\n",
+       "3  T1/article8   v1-1-3     1        8        1       3\n",
+       "4  T1/article9   v1-1-4     1        9        1       4"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_correspondances.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "continuous-feedback",
+   "metadata": {},
+   "source": [
+    "### Test rÃ©cupÃ©ration donnÃ©es ENCCRE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "spread-feature",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import urllib, json\n",
+    "from urllib.request import urlopen\n",
+    "\n",
+    "json_url = urlopen(\"http://enccre.academie-sciences.fr/icefront/api/article/v1-544-0\")\n",
+    "data = json.loads(json_url.read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "facial-syndicate",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'gÃ©ographie'"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data['annotations']['constit'][0]['domgen'][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "removed-nickel",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_key(val):\n",
+    "    for key, value in domaines_regroupes.items():\n",
+    "        for v in value:\n",
+    "            v = v.replace(\" \", \"\")\n",
+    "            if val == v.lower():\n",
+    "                return key\n",
+    " \n",
+    "    return None\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "nuclear-murder",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Histoire naturelle\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_key(\"histoirenaturelle\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "placed-homework",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "extraordinary-settlement",
+   "metadata": {},
+   "source": [
+    "### Ajout des colonnes domaines, texte, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "pursuant-camel",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def getDomaineEnccre(volume, numero, classEDDA):\n",
+    "    #print(volume, ' ', numero)\n",
+    "\n",
+    "    domaine = \"\"\n",
+    "    ensemble_domaine = \"\"\n",
+    "    entreeid = \"\"\n",
+    "    \n",
+    "    try :  \n",
+    "        #entreeid = df_correspondances.loc[(df_correspondances['tome']==volume) & (df_correspondances['article']==numero)]['entreeid'][0]\n",
+    "        d = df_correspondances.loc[(df_correspondances['tome']==volume) & (df_correspondances['article']==numero)].reset_index(drop=True)\n",
+    "        entreeid = d['entreeid'][0]\n",
+    "\n",
+    "        json_url = urlopen(\"http://enccre.academie-sciences.fr/icefront/api/article/\" + entreeid)\n",
+    "        data = json.loads(json_url.read())\n",
+    "        #print(data['annotations']['constit'][0]['domgen'][0])\n",
+    "        \n",
+    "        \n",
+    "        try :  \n",
+    "            \n",
+    "            # changer pour avoir tous les noms\n",
+    "            domaine = data['annotations']['constit'][0]['domgen'][0]\n",
+    "            ensemble_domaine = get_key(domaine)\n",
+    "\n",
+    "\n",
+    "\n",
+    "            \n",
+    "            '''\n",
+    "            for constit in data['annotations']['constit']:\n",
+    "                \n",
+    "                domaine = constit['domgen'][0]\n",
+    "                print(domaine)\n",
+    "\n",
+    "                for domgen in constit['domgen']:  \n",
+    "                    domaine_multi += domgen + \";\"\n",
+    "                    ensemble = get_key(domgen)\n",
+    "                    if ensemble:\n",
+    "                        ensemble_domaine_multi.append(ensemble)\n",
+    "                \n",
+    "            #print(domaine)\n",
+    "            '''\n",
+    "        except KeyError:\n",
+    "            pass\n",
+    "     \n",
+    "    except KeyError:\n",
+    "        pass\n",
+    "       \n",
+    "    try :\n",
+    "        if volume < 10:\n",
+    "            txt_file = \"/Users/lmoncla/Documents/Data/Corpus/EDDA/articles_all/all_txt/volume0\"+str(volume)+\"-\"+str(numero)+\".txt\"\n",
+    "        else :\n",
+    "            txt_file = \"/Users/lmoncla/Documents/Data/Corpus/EDDA/articles_all/all_txt/volume\"+str(volume)+\"-\"+str(numero)+\".txt\"\n",
+    "\n",
+    "        txtContent = open(txt_file, \"r\").read()\n",
+    "        \n",
+    "        classEDDA = str(classEDDA)\n",
+    "        \n",
+    "        #supprime le dÃ©signant du texte\n",
+    "        classEDDA_with_brcts = '('+ classEDDA +')'\n",
+    "        txtContentWithoutClass = txtContent.replace(classEDDA_with_brcts, \"\")\n",
+    "        txtContentWithoutClass = txtContent.replace(classEDDA, \"\")\n",
+    "        \n",
+    "        firstParagraph = txtContentWithoutClass.split('\\n \\n')[0]\n",
+    "        \n",
+    "    except FileNotFoundError:\n",
+    "        txtContent = \"\"\n",
+    "        txtContentWithoutClass = \"\"\n",
+    "        firstParagraph = \"\"\n",
+    "        \n",
+    "    #ensemble_domaine_multi = ';'.join(list(set(ensemble_domaine)))\n",
+    "    \n",
+    "    #print(entreeid, domaine, ensemble_domaine, txtContent, txtContentWithoutClass, firstParagraph)\n",
+    "    \n",
+    "    return pd.Series([entreeid, domaine, ensemble_domaine, txtContent, txtContentWithoutClass, firstParagraph])\n",
+    "        \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "id": "timely-inspection",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "id": "natural-spanking",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÃ‰LIMINAIRE DES EDITEURS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                                head     normClass  \\\n",
+       "0       1       1                          Title Page  unclassified   \n",
+       "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  unclassified   \n",
+       "2       1       3  DISCOURS PRÃ‰LIMINAIRE DES EDITEURS  unclassified   \n",
+       "3       1       5                            A, a & a     Grammaire   \n",
+       "4       1       6                                   A  unclassified   \n",
+       "\n",
+       "                                           classEDdA                author  \\\n",
+       "0                                       unclassified              unsigned   \n",
+       "1                                       unclassified  Diderot & d'Alembert   \n",
+       "2                                       unclassified            d'Alembert   \n",
+       "3  ordre EncyclopÃ©d. Entend. Science de l'homme, ...            Dumarsais5   \n",
+       "4                                       unclassified            Dumarsais5   \n",
+       "\n",
+       "   id_enccre  domaine_enccre  ensemble_domaine_enccre  content  \\\n",
+       "0          0               1                        2        3   \n",
+       "1          0               1                        2        3   \n",
+       "2          0               1                        2        3   \n",
+       "3          0               1                        2        3   \n",
+       "4          0               1                        2        3   \n",
+       "\n",
+       "   contentWithoutClass  firstParagraph  \n",
+       "0                    4               5  \n",
+       "1                    4               5  \n",
+       "2                    4               5  \n",
+       "3                    4               5  \n",
+       "4                    4               5  "
+      ]
+     },
+     "execution_count": 133,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "christian-advice",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>ENCYCLOPÃ‰DIE,\\nDICTIONNAIRE RAISONNÃ‰\\nDES SCIE...</td>\n",
+       "      <td>ENCYCLOPÃ‰DIE,\\nDICTIONNAIRE RAISONNÃ‰\\nDES SCIE...</td>\n",
+       "      <td>ENCYCLOPÃ‰DIE,\\nDICTIONNAIRE RAISONNÃ‰\\nDES SCIE...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...</td>\n",
+       "      <td>A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...</td>\n",
+       "      <td>A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÃ‰LIMINAIRE DES EDITEURS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>DISCOURS PRÃ‰LIMINAIRE\\nDES EDITEURS.\\nL'Encycl...</td>\n",
+       "      <td>DISCOURS PRÃ‰LIMINAIRE\\nDES EDITEURS.\\nL'Encycl...</td>\n",
+       "      <td>DISCOURS PRÃ‰LIMINAIRE\\nDES EDITEURS.\\nL'Encycl...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-3</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, Ã©toit une lettre numÃ©rale parmi les Anciens...</td>\n",
+       "      <td>A, Ã©toit une lettre numÃ©rale parmi les Anciens...</td>\n",
+       "      <td>A, Ã©toit une lettre numÃ©rale parmi les Anciens...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>A, lettre symbolique</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-4</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, lettre symbolique, Ã©toit un hiÃ©roglyphe che...</td>\n",
+       "      <td>A, lettre symbolique, Ã©toit un hiÃ©roglyphe che...</td>\n",
+       "      <td>A, lettre symbolique, Ã©toit un hiÃ©roglyphe che...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monÃ©taire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>A, lettre de suffrage</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-7</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, lettre de suffrage ; les Romains se servoie...</td>\n",
+       "      <td>A, lettre de suffrage ; les Romains se servoie...</td>\n",
+       "      <td>A, lettre de suffrage ; les Romains se servoie...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>A, signe d'absolution</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet5</td>\n",
+       "      <td>v1-1-8</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, signe d'absolution, chez les Romains dans l...</td>\n",
+       "      <td>A, signe d'absolution, chez les Romains dans l...</td>\n",
+       "      <td>A, signe d'absolution, chez les Romains dans l...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>A cognitionibus</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-2-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* A cognitionibus. Scorpus fameux Agitateur du...</td>\n",
+       "      <td>* A cognitionibus. Scorpus fameux Agitateur du...</td>\n",
+       "      <td>* A cognitionibus. Scorpus fameux Agitateur du...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>A curÃ¢ amicorum</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-3-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* A curÃ¢ amicorum. On lit dans quelques inscri...</td>\n",
+       "      <td>* A curÃ¢ amicorum. On lit dans quelques inscri...</td>\n",
+       "      <td>* A curÃ¢ amicorum. On lit dans quelques inscri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>1</td>\n",
+       "      <td>16</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Ecrivains modernes</td>\n",
+       "      <td>dans les Ecrivains modernes</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-4-0</td>\n",
+       "      <td>caractÃ¨res</td>\n",
+       "      <td>CaractÃ¨res</td>\n",
+       "      <td>A, dans les Ecrivains modernes, veut dire auss...</td>\n",
+       "      <td>A, , veut dire aussi\\nl'an, comme A. D. anno D...</td>\n",
+       "      <td>A, , veut dire aussi\\nl'an, comme A. D. anno D...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "      <td>17</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Calendrier Julien</td>\n",
+       "      <td>dans le calendrier Julien</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-4-1</td>\n",
+       "      <td>calendrier</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, dans le calendrier Julien, est aussi la pre...</td>\n",
+       "      <td>A, , est aussi la premiere\\ndes sept lettres d...</td>\n",
+       "      <td>A, , est aussi la premiere\\ndes sept lettres d...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>1</td>\n",
+       "      <td>18</td>\n",
+       "      <td>A. D.</td>\n",
+       "      <td>pending</td>\n",
+       "      <td>Ã©pistolaire</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-4-2</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A. D. Ã©pistolaire ; ces deux caracteres dans l...</td>\n",
+       "      <td>A. D.  ; ces deux caracteres dans les\\nLettres...</td>\n",
+       "      <td>A. D.  ; ces deux caracteres dans les\\nLettres...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>1</td>\n",
+       "      <td>19</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-4-3</td>\n",
+       "      <td>logique</td>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>* A dÃ©signe une proposition gÃ©nÃ©rale affirmati...</td>\n",
+       "      <td>* A dÃ©signe une proposition gÃ©nÃ©rale affirmati...</td>\n",
+       "      <td>* A dÃ©signe une proposition gÃ©nÃ©rale affirmati...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>1</td>\n",
+       "      <td>20</td>\n",
+       "      <td>A, signe des passions</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-4-4</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* A, signe des passions ; selon certains Auteu...</td>\n",
+       "      <td>* A, signe des passions ; selon certains Auteu...</td>\n",
+       "      <td>* A, signe des passions ; selon certains Auteu...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>1</td>\n",
+       "      <td>21</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>v1-4-5</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>A, est aussi une abbrÃ©viation dont on se sert ...</td>\n",
+       "      <td>A, est aussi une abbrÃ©viation dont on se sert ...</td>\n",
+       "      <td>A, est aussi une abbrÃ©viation dont on se sert ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>1</td>\n",
+       "      <td>22</td>\n",
+       "      <td>A A A</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>Chimistes</td>\n",
+       "      <td>Malouin5</td>\n",
+       "      <td>v1-5-0</td>\n",
+       "      <td>chimie</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>A A A, chez les Chimistes, signifie une amalga...</td>\n",
+       "      <td>A A A, chez les , signifie une amalgame,\\nou l...</td>\n",
+       "      <td>A A A, chez les , signifie une amalgame,\\nou l...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>1</td>\n",
+       "      <td>23</td>\n",
+       "      <td>A, Ä, ou Ä Ä</td>\n",
+       "      <td>MÃ©decine</td>\n",
+       "      <td>Medecine</td>\n",
+       "      <td>Vandenesse</td>\n",
+       "      <td>v1-6-0</td>\n",
+       "      <td>mÃ©decine</td>\n",
+       "      <td>MÃ©decine - Chirurgie</td>\n",
+       "      <td>A, Ä, ou Ä Ä; on se sert de cette abbrÃ©viation...</td>\n",
+       "      <td>A, Ä, ou Ä Ä; on se sert de cette abbrÃ©viation...</td>\n",
+       "      <td>A, Ä, ou Ä Ä; on se sert de cette abbrÃ©viation...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>1</td>\n",
+       "      <td>24</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-7-0</td>\n",
+       "      <td>commerce</td>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>A. Les Marchands NÃ©gocians, Banquiers, &amp; Teneu...</td>\n",
+       "      <td>A. Les Marchands NÃ©gocians, Banquiers, &amp; Teneu...</td>\n",
+       "      <td>A. Les Marchands NÃ©gocians, Banquiers, &amp; Teneu...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>1</td>\n",
+       "      <td>25</td>\n",
+       "      <td>A</td>\n",
+       "      <td>pending</td>\n",
+       "      <td>caractere alphabÃ©tique</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-8-0</td>\n",
+       "      <td>ecriture</td>\n",
+       "      <td>CaractÃ¨res</td>\n",
+       "      <td>* A, caractere alphabÃ©tique. AprÃ¨s avoir donnÃ©...</td>\n",
+       "      <td>* A, . AprÃ¨s avoir donnÃ© les\\ndiffÃ©rentes sign...</td>\n",
+       "      <td>* A, . AprÃ¨s avoir donnÃ© les\\ndiffÃ©rentes sign...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>1</td>\n",
+       "      <td>26</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-9-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* A, s. petite riviere de France, qui a sa sou...</td>\n",
+       "      <td>* A, s. petite riviere de France, qui a sa sou...</td>\n",
+       "      <td>* A, s. petite riviere de France, qui a sa sou...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>1</td>\n",
+       "      <td>27</td>\n",
+       "      <td>AA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-10-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* AA, s. f. riviere de France, qui prend sa so...</td>\n",
+       "      <td>* AA, s. f. riviere de France, qui prend sa so...</td>\n",
+       "      <td>* AA, s. f. riviere de France, qui prend sa so...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>1</td>\n",
+       "      <td>28</td>\n",
+       "      <td>AABAM</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Malouin</td>\n",
+       "      <td>v1-11-0</td>\n",
+       "      <td>alchimie</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>AABAM, s. m. Quelques Alchimistes se sont serv...</td>\n",
+       "      <td>AABAM, s. m. Quelques Alchimistes se sont serv...</td>\n",
+       "      <td>AABAM, s. m. Quelques Alchimistes se sont serv...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>1</td>\n",
+       "      <td>29</td>\n",
+       "      <td>AACH ou ACH</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-12-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* AACH ou ACH, s. f. petite ville d'Allemagne\\...</td>\n",
+       "      <td>* AACH ou ACH, s. f. petite ville d'Allemagne\\...</td>\n",
+       "      <td>* AACH ou ACH, s. f. petite ville d'Allemagne\\...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>1</td>\n",
+       "      <td>30</td>\n",
+       "      <td>AAHUS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-13-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* AAHUS, s. petite ville d'Allemagne dans le c...</td>\n",
+       "      <td>* AAHUS, s. petite ville d'Allemagne dans le c...</td>\n",
+       "      <td>* AAHUS, s. petite ville d'Allemagne dans le c...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>1</td>\n",
+       "      <td>31</td>\n",
+       "      <td>AAM</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-14-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* AAM, s. mesure des Liquides, en usage Ã  Amst...</td>\n",
+       "      <td>* AAM, s. mesure des Liquides, en usage Ã  Amst...</td>\n",
+       "      <td>* AAM, s. mesure des Liquides, en usage Ã  Amst...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>1</td>\n",
+       "      <td>32</td>\n",
+       "      <td>AAR</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-15-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* AAR, s. grande riviere qui a sa source proch...</td>\n",
+       "      <td>* AAR, s. grande riviere qui a sa source proch...</td>\n",
+       "      <td>* AAR, s. grande riviere qui a sa source proch...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>1</td>\n",
+       "      <td>33</td>\n",
+       "      <td>Aar</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-15-1</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* Aar, s. riviere d'Allemagne qui a sa source ...</td>\n",
+       "      <td>* Aar, s. riviere d'Allemagne qui a sa source ...</td>\n",
+       "      <td>* Aar, s. riviere d'Allemagne qui a sa source ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>1</td>\n",
+       "      <td>34</td>\n",
+       "      <td>AA ou AAS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-16-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* AA ou AAS, s. ou Fontaine des Arquebusades. ...</td>\n",
+       "      <td>* AA ou AAS, s. ou Fontaine des Arquebusades. ...</td>\n",
+       "      <td>* AA ou AAS, s. ou Fontaine des Arquebusades. ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>1</td>\n",
+       "      <td>35</td>\n",
+       "      <td>AAS ou AASA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-17-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* AAS ou AASA, Fort de Norwege dans le Baillia...</td>\n",
+       "      <td>* AAS ou AASA, Fort de Norwege dans le Baillia...</td>\n",
+       "      <td>* AAS ou AASA, Fort de Norwege dans le Baillia...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>1</td>\n",
+       "      <td>36</td>\n",
+       "      <td>AB</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-18-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>AB, s. m. onzieme mois de l'annÃ©e civile des H...</td>\n",
+       "      <td>AB, s. m. onzieme mois de l'annÃ©e civile des H...</td>\n",
+       "      <td>AB, s. m. onzieme mois de l'annÃ©e civile des H...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>1</td>\n",
+       "      <td>37</td>\n",
+       "      <td>AB</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-19-0</td>\n",
+       "      <td>calendrier</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>AB, s. m. en Langue Syriaque est le nom du der...</td>\n",
+       "      <td>AB, s. m. en Langue Syriaque est le nom du der...</td>\n",
+       "      <td>AB, s. m. en Langue Syriaque est le nom du der...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>1</td>\n",
+       "      <td>38</td>\n",
+       "      <td>AB</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-20-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>AB, s.m. en hÃ©breu signifie pere ; d'oÃ¹ les Ch...</td>\n",
+       "      <td>AB, s.m. en hÃ©breu signifie pere ; d'oÃ¹ les Ch...</td>\n",
+       "      <td>AB, s.m. en hÃ©breu signifie pere ; d'oÃ¹ les Ch...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>1</td>\n",
+       "      <td>39</td>\n",
+       "      <td>ABA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-21-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>*ABA, s. ville de la Phocide, bÃ¢tie par les Ab...</td>\n",
+       "      <td>*ABA, s. ville de la Phocide, bÃ¢tie par les Ab...</td>\n",
+       "      <td>*ABA, s. ville de la Phocide, bÃ¢tie par les Ab...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>1</td>\n",
+       "      <td>40</td>\n",
+       "      <td>ABACA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-22-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABACA, s. Il ne paroÃ®t pas qu'on sache bien ...</td>\n",
+       "      <td>* ABACA, s. Il ne paroÃ®t pas qu'on sache bien ...</td>\n",
+       "      <td>* ABACA, s. Il ne paroÃ®t pas qu'on sache bien ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>1</td>\n",
+       "      <td>41</td>\n",
+       "      <td>ABACH</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-23-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* ABACH, s. petite ville d'Allemagne dans la b...</td>\n",
+       "      <td>* ABACH, s. petite ville d'Allemagne dans la b...</td>\n",
+       "      <td>* ABACH, s. petite ville d'Allemagne dans la b...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>1</td>\n",
+       "      <td>42</td>\n",
+       "      <td>ABACO</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td>v1-24-0</td>\n",
+       "      <td>arithmÃ©tique</td>\n",
+       "      <td>MathÃ©matiques</td>\n",
+       "      <td>ABACO, s. m. Quelques anciens Auteurs se serve...</td>\n",
+       "      <td>ABACO, s. m. Quelques anciens Auteurs se serve...</td>\n",
+       "      <td>ABACO, s. m. Quelques anciens Auteurs se serve...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>1</td>\n",
+       "      <td>43</td>\n",
+       "      <td>ABACOA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-25-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>* ABACOA, s. Isle de l'AmÃ©rique septentrionale...</td>\n",
+       "      <td>* ABACOA, s. Isle de l'AmÃ©rique septentrionale...</td>\n",
+       "      <td>* ABACOA, s. Isle de l'AmÃ©rique septentrionale...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>1</td>\n",
+       "      <td>44</td>\n",
+       "      <td>ABACOT</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-26-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABACOT, s. m. nom de l'ancienne parure dÃ¨\\nt...</td>\n",
+       "      <td>* ABACOT, s. m. nom de l'ancienne parure dÃ¨\\nt...</td>\n",
+       "      <td>* ABACOT, s. m. nom de l'ancienne parure dÃ¨\\nt...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>1</td>\n",
+       "      <td>45</td>\n",
+       "      <td>ABADA</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-27-0</td>\n",
+       "      <td>histoirenaturelle</td>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>* ABADA, s. m. c'est, dit-on, un animal qui\\ns...</td>\n",
+       "      <td>* ABADA, s. m. c'est, dit-on, un animal qui\\ns...</td>\n",
+       "      <td>* ABADA, s. m. c'est, dit-on, un animal qui\\ns...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>1</td>\n",
+       "      <td>46</td>\n",
+       "      <td>ABADDON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-28-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABADDON, s. m. vient d'abad, perdre. C'est\\n...</td>\n",
+       "      <td>* ABADDON, s. m. vient d'abad, perdre. C'est\\n...</td>\n",
+       "      <td>* ABADDON, s. m. vient d'abad, perdre. C'est\\n...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45</th>\n",
+       "      <td>1</td>\n",
+       "      <td>47</td>\n",
+       "      <td>ABADIR ou ABADDIR</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-29-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>ABADIR ou ABADDIR, s. m. mot composÃ© \\nde deux...</td>\n",
+       "      <td>ABADIR ou ABADDIR, s. m. mot composÃ© \\nde deux...</td>\n",
+       "      <td>ABADIR ou ABADDIR, s. m. mot composÃ© \\nde deux...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>46</th>\n",
+       "      <td>1</td>\n",
+       "      <td>48</td>\n",
+       "      <td>ABACUZ</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; Toussaint</td>\n",
+       "      <td>v1-30-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABACUZ, s. m. pris adject. ce sont les biens...</td>\n",
+       "      <td>* ABACUZ, s. m. pris adject. ce sont les biens...</td>\n",
+       "      <td>* ABACUZ, s. m. pris adject. ce sont les biens...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>47</th>\n",
+       "      <td>1</td>\n",
+       "      <td>49</td>\n",
+       "      <td>ABAJOUR</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Blondel</td>\n",
+       "      <td>v1-31-0</td>\n",
+       "      <td>architecture</td>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>ABAJOUR, s. m. nom que les Architectes donnent...</td>\n",
+       "      <td>ABAJOUR, s. m. nom que les Architectes donnent...</td>\n",
+       "      <td>ABAJOUR, s. m. nom que les Architectes donnent...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>48</th>\n",
+       "      <td>1</td>\n",
+       "      <td>50</td>\n",
+       "      <td>ABAISIR</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Malouin</td>\n",
+       "      <td>v1-32-0</td>\n",
+       "      <td>alchimie</td>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>ABAISIR, s.m. Quelques Alchimistes se sont ser...</td>\n",
+       "      <td>ABAISIR, s.m. Quelques Alchimistes se sont ser...</td>\n",
+       "      <td>ABAISIR, s.m. Quelques Alchimistes se sont ser...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>49</th>\n",
+       "      <td>1</td>\n",
+       "      <td>51</td>\n",
+       "      <td>ABAISSE</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v1-33-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* ABAISSE, s. f. c'est le nom que les PÃ¢tissie...</td>\n",
+       "      <td>* ABAISSE, s. f. c'est le nom que les PÃ¢tissie...</td>\n",
+       "      <td>* ABAISSE, s. f. c'est le nom que les PÃ¢tissie...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    volume  numero                                head           normClass  \\\n",
+       "0        1       1                          Title Page        unclassified   \n",
+       "1        1       2   A MONSEIGNEUR LE COMTE D'ARGENSON        unclassified   \n",
+       "2        1       3  DISCOURS PRÃ‰LIMINAIRE DES EDITEURS        unclassified   \n",
+       "3        1       5                            A, a & a           Grammaire   \n",
+       "4        1       6                                   A        unclassified   \n",
+       "5        1       7                                   A        unclassified   \n",
+       "6        1       8                                   A        unclassified   \n",
+       "7        1       9                A, lettre symbolique        unclassified   \n",
+       "8        1      10        A, numismatique ou monÃ©taire        unclassified   \n",
+       "9        1      11                        A, lapidaire        unclassified   \n",
+       "10       1      12               A, lettre de suffrage        unclassified   \n",
+       "11       1      13               A, signe d'absolution        unclassified   \n",
+       "12       1      14                     A cognitionibus        unclassified   \n",
+       "13       1      15                     A curÃ¢ amicorum        unclassified   \n",
+       "14       1      16                                   A  Ecrivains modernes   \n",
+       "15       1      17                                   A   Calendrier Julien   \n",
+       "16       1      18                               A. D.             pending   \n",
+       "17       1      19                                   A        unclassified   \n",
+       "18       1      20               A, signe des passions        unclassified   \n",
+       "19       1      21                                   A        unclassified   \n",
+       "20       1      22                               A A A              Chimie   \n",
+       "21       1      23                        A, Ä, ou Ä Ä            MÃ©decine   \n",
+       "22       1      24                                   A        unclassified   \n",
+       "23       1      25                                   A             pending   \n",
+       "24       1      26                                   A        unclassified   \n",
+       "25       1      27                                  AA        unclassified   \n",
+       "26       1      28                               AABAM        unclassified   \n",
+       "27       1      29                         AACH ou ACH        unclassified   \n",
+       "28       1      30                               AAHUS        unclassified   \n",
+       "29       1      31                                 AAM        unclassified   \n",
+       "30       1      32                                 AAR        unclassified   \n",
+       "31       1      33                                 Aar        unclassified   \n",
+       "32       1      34                           AA ou AAS        unclassified   \n",
+       "33       1      35                         AAS ou AASA        unclassified   \n",
+       "34       1      36                                  AB        unclassified   \n",
+       "35       1      37                                  AB        unclassified   \n",
+       "36       1      38                                  AB        unclassified   \n",
+       "37       1      39                                 ABA        unclassified   \n",
+       "38       1      40                               ABACA        unclassified   \n",
+       "39       1      41                               ABACH        unclassified   \n",
+       "40       1      42                               ABACO        unclassified   \n",
+       "41       1      43                              ABACOA        unclassified   \n",
+       "42       1      44                              ABACOT        unclassified   \n",
+       "43       1      45                               ABADA        unclassified   \n",
+       "44       1      46                             ABADDON        unclassified   \n",
+       "45       1      47                   ABADIR ou ABADDIR        unclassified   \n",
+       "46       1      48                              ABACUZ        unclassified   \n",
+       "47       1      49                             ABAJOUR        unclassified   \n",
+       "48       1      50                             ABAISIR        unclassified   \n",
+       "49       1      51                             ABAISSE        unclassified   \n",
+       "\n",
+       "                                            classEDdA                author  \\\n",
+       "0                                        unclassified              unsigned   \n",
+       "1                                        unclassified  Diderot & d'Alembert   \n",
+       "2                                        unclassified            d'Alembert   \n",
+       "3   ordre EncyclopÃ©d. Entend. Science de l'homme, ...            Dumarsais5   \n",
+       "4                                        unclassified            Dumarsais5   \n",
+       "5                                        unclassified             Dumarsais   \n",
+       "6                                        unclassified                Mallet   \n",
+       "7                                        unclassified                Mallet   \n",
+       "8                                        unclassified                Mallet   \n",
+       "9                                        unclassified                Mallet   \n",
+       "10                                       unclassified                Mallet   \n",
+       "11                                       unclassified               Mallet5   \n",
+       "12                                       unclassified               Diderot   \n",
+       "13                                       unclassified               Diderot   \n",
+       "14                        dans les Ecrivains modernes                Mallet   \n",
+       "15                          dans le calendrier Julien                Mallet   \n",
+       "16                                        Ã©pistolaire                Mallet   \n",
+       "17                                       unclassified               Diderot   \n",
+       "18                                       unclassified               Diderot   \n",
+       "19                                       unclassified              unsigned   \n",
+       "20                                          Chimistes              Malouin5   \n",
+       "21                                           Medecine            Vandenesse   \n",
+       "22                                       unclassified                Mallet   \n",
+       "23                             caractere alphabÃ©tique               Diderot   \n",
+       "24                                       unclassified               Diderot   \n",
+       "25                                       unclassified               Diderot   \n",
+       "26                                       unclassified               Malouin   \n",
+       "27                                       unclassified               Diderot   \n",
+       "28                                       unclassified               Diderot   \n",
+       "29                                       unclassified               Diderot   \n",
+       "30                                       unclassified               Diderot   \n",
+       "31                                       unclassified               Diderot   \n",
+       "32                                       unclassified               Diderot   \n",
+       "33                                       unclassified               Diderot   \n",
+       "34                                       unclassified                Mallet   \n",
+       "35                                       unclassified                Mallet   \n",
+       "36                                       unclassified                Mallet   \n",
+       "37                                       unclassified               Diderot   \n",
+       "38                                       unclassified               Diderot   \n",
+       "39                                       unclassified               Diderot   \n",
+       "40                                       unclassified            d'Alembert   \n",
+       "41                                       unclassified               Diderot   \n",
+       "42                                       unclassified               Diderot   \n",
+       "43                                       unclassified               Diderot   \n",
+       "44                                       unclassified               Diderot   \n",
+       "45                                       unclassified                Mallet   \n",
+       "46                                       unclassified   Diderot & Toussaint   \n",
+       "47                                       unclassified               Blondel   \n",
+       "48                                       unclassified               Malouin   \n",
+       "49                                       unclassified               Diderot   \n",
+       "\n",
+       "   id_enccre     domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0                                                         \n",
+       "1                                                         \n",
+       "2                                                         \n",
+       "3     v1-1-0          grammaire               Grammaire   \n",
+       "4     v1-1-1          grammaire               Grammaire   \n",
+       "5     v1-1-2          grammaire               Grammaire   \n",
+       "6     v1-1-3                                              \n",
+       "7     v1-1-4                                              \n",
+       "8     v1-1-5       numismatique               MÃ©dailles   \n",
+       "9     v1-1-6       inscriptions                Histoire   \n",
+       "10    v1-1-7                                              \n",
+       "11    v1-1-8                                              \n",
+       "12    v1-2-0                                              \n",
+       "13    v1-3-0                                              \n",
+       "14    v1-4-0         caractÃ¨res              CaractÃ¨res   \n",
+       "15    v1-4-1         calendrier                Histoire   \n",
+       "16    v1-4-2                                              \n",
+       "17    v1-4-3            logique             Philosophie   \n",
+       "18    v1-4-4                                              \n",
+       "19    v1-4-5                                              \n",
+       "20    v1-5-0             chimie                  Chimie   \n",
+       "21    v1-6-0           mÃ©decine    MÃ©decine - Chirurgie   \n",
+       "22    v1-7-0           commerce                Commerce   \n",
+       "23    v1-8-0           ecriture              CaractÃ¨res   \n",
+       "24    v1-9-0         gÃ©ographie              GÃ©ographie   \n",
+       "25   v1-10-0         gÃ©ographie              GÃ©ographie   \n",
+       "26   v1-11-0           alchimie                  Chimie   \n",
+       "27   v1-12-0         gÃ©ographie              GÃ©ographie   \n",
+       "28   v1-13-0         gÃ©ographie              GÃ©ographie   \n",
+       "29   v1-14-0                                              \n",
+       "30   v1-15-0         gÃ©ographie              GÃ©ographie   \n",
+       "31   v1-15-1         gÃ©ographie              GÃ©ographie   \n",
+       "32   v1-16-0         gÃ©ographie              GÃ©ographie   \n",
+       "33   v1-17-0         gÃ©ographie              GÃ©ographie   \n",
+       "34   v1-18-0                                              \n",
+       "35   v1-19-0         calendrier                Histoire   \n",
+       "36   v1-20-0                                              \n",
+       "37   v1-21-0         gÃ©ographie              GÃ©ographie   \n",
+       "38   v1-22-0                                              \n",
+       "39   v1-23-0         gÃ©ographie              GÃ©ographie   \n",
+       "40   v1-24-0       arithmÃ©tique           MathÃ©matiques   \n",
+       "41   v1-25-0         gÃ©ographie              GÃ©ographie   \n",
+       "42   v1-26-0                                              \n",
+       "43   v1-27-0  histoirenaturelle      Histoire naturelle   \n",
+       "44   v1-28-0                                              \n",
+       "45   v1-29-0                                              \n",
+       "46   v1-30-0                                              \n",
+       "47   v1-31-0       architecture            Architecture   \n",
+       "48   v1-32-0           alchimie                  Chimie   \n",
+       "49   v1-33-0                                              \n",
+       "\n",
+       "                                              content  \\\n",
+       "0   ENCYCLOPÃ‰DIE,\\nDICTIONNAIRE RAISONNÃ‰\\nDES SCIE...   \n",
+       "1   A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...   \n",
+       "2   DISCOURS PRÃ‰LIMINAIRE\\nDES EDITEURS.\\nL'Encycl...   \n",
+       "3   A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...   \n",
+       "4   A, mot, est 1. la troisieme personne du prÃ©sen...   \n",
+       "5   A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...   \n",
+       "6   A, Ã©toit une lettre numÃ©rale parmi les Anciens...   \n",
+       "7   A, lettre symbolique, Ã©toit un hiÃ©roglyphe che...   \n",
+       "8   A, numismatique ou monÃ©taire, sur le revers de...   \n",
+       "9   A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "10  A, lettre de suffrage ; les Romains se servoie...   \n",
+       "11  A, signe d'absolution, chez les Romains dans l...   \n",
+       "12  * A cognitionibus. Scorpus fameux Agitateur du...   \n",
+       "13  * A curÃ¢ amicorum. On lit dans quelques inscri...   \n",
+       "14  A, dans les Ecrivains modernes, veut dire auss...   \n",
+       "15  A, dans le calendrier Julien, est aussi la pre...   \n",
+       "16  A. D. Ã©pistolaire ; ces deux caracteres dans l...   \n",
+       "17  * A dÃ©signe une proposition gÃ©nÃ©rale affirmati...   \n",
+       "18  * A, signe des passions ; selon certains Auteu...   \n",
+       "19  A, est aussi une abbrÃ©viation dont on se sert ...   \n",
+       "20  A A A, chez les Chimistes, signifie une amalga...   \n",
+       "21  A, Ä, ou Ä Ä; on se sert de cette abbrÃ©viation...   \n",
+       "22  A. Les Marchands NÃ©gocians, Banquiers, & Teneu...   \n",
+       "23  * A, caractere alphabÃ©tique. AprÃ¨s avoir donnÃ©...   \n",
+       "24  * A, s. petite riviere de France, qui a sa sou...   \n",
+       "25  * AA, s. f. riviere de France, qui prend sa so...   \n",
+       "26  AABAM, s. m. Quelques Alchimistes se sont serv...   \n",
+       "27  * AACH ou ACH, s. f. petite ville d'Allemagne\\...   \n",
+       "28  * AAHUS, s. petite ville d'Allemagne dans le c...   \n",
+       "29  * AAM, s. mesure des Liquides, en usage Ã  Amst...   \n",
+       "30  * AAR, s. grande riviere qui a sa source proch...   \n",
+       "31  * Aar, s. riviere d'Allemagne qui a sa source ...   \n",
+       "32  * AA ou AAS, s. ou Fontaine des Arquebusades. ...   \n",
+       "33  * AAS ou AASA, Fort de Norwege dans le Baillia...   \n",
+       "34  AB, s. m. onzieme mois de l'annÃ©e civile des H...   \n",
+       "35  AB, s. m. en Langue Syriaque est le nom du der...   \n",
+       "36  AB, s.m. en hÃ©breu signifie pere ; d'oÃ¹ les Ch...   \n",
+       "37  *ABA, s. ville de la Phocide, bÃ¢tie par les Ab...   \n",
+       "38  * ABACA, s. Il ne paroÃ®t pas qu'on sache bien ...   \n",
+       "39  * ABACH, s. petite ville d'Allemagne dans la b...   \n",
+       "40  ABACO, s. m. Quelques anciens Auteurs se serve...   \n",
+       "41  * ABACOA, s. Isle de l'AmÃ©rique septentrionale...   \n",
+       "42  * ABACOT, s. m. nom de l'ancienne parure dÃ¨\\nt...   \n",
+       "43  * ABADA, s. m. c'est, dit-on, un animal qui\\ns...   \n",
+       "44  * ABADDON, s. m. vient d'abad, perdre. C'est\\n...   \n",
+       "45  ABADIR ou ABADDIR, s. m. mot composÃ© \\nde deux...   \n",
+       "46  * ABACUZ, s. m. pris adject. ce sont les biens...   \n",
+       "47  ABAJOUR, s. m. nom que les Architectes donnent...   \n",
+       "48  ABAISIR, s.m. Quelques Alchimistes se sont ser...   \n",
+       "49  * ABAISSE, s. f. c'est le nom que les PÃ¢tissie...   \n",
+       "\n",
+       "                                  contentWithoutClass  \\\n",
+       "0   ENCYCLOPÃ‰DIE,\\nDICTIONNAIRE RAISONNÃ‰\\nDES SCIE...   \n",
+       "1   A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...   \n",
+       "2   DISCOURS PRÃ‰LIMINAIRE\\nDES EDITEURS.\\nL'Encycl...   \n",
+       "3   A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...   \n",
+       "4   A, mot, est 1. la troisieme personne du prÃ©sen...   \n",
+       "5   A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...   \n",
+       "6   A, Ã©toit une lettre numÃ©rale parmi les Anciens...   \n",
+       "7   A, lettre symbolique, Ã©toit un hiÃ©roglyphe che...   \n",
+       "8   A, numismatique ou monÃ©taire, sur le revers de...   \n",
+       "9   A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "10  A, lettre de suffrage ; les Romains se servoie...   \n",
+       "11  A, signe d'absolution, chez les Romains dans l...   \n",
+       "12  * A cognitionibus. Scorpus fameux Agitateur du...   \n",
+       "13  * A curÃ¢ amicorum. On lit dans quelques inscri...   \n",
+       "14  A, , veut dire aussi\\nl'an, comme A. D. anno D...   \n",
+       "15  A, , est aussi la premiere\\ndes sept lettres d...   \n",
+       "16  A. D.  ; ces deux caracteres dans les\\nLettres...   \n",
+       "17  * A dÃ©signe une proposition gÃ©nÃ©rale affirmati...   \n",
+       "18  * A, signe des passions ; selon certains Auteu...   \n",
+       "19  A, est aussi une abbrÃ©viation dont on se sert ...   \n",
+       "20  A A A, chez les , signifie une amalgame,\\nou l...   \n",
+       "21  A, Ä, ou Ä Ä; on se sert de cette abbrÃ©viation...   \n",
+       "22  A. Les Marchands NÃ©gocians, Banquiers, & Teneu...   \n",
+       "23  * A, . AprÃ¨s avoir donnÃ© les\\ndiffÃ©rentes sign...   \n",
+       "24  * A, s. petite riviere de France, qui a sa sou...   \n",
+       "25  * AA, s. f. riviere de France, qui prend sa so...   \n",
+       "26  AABAM, s. m. Quelques Alchimistes se sont serv...   \n",
+       "27  * AACH ou ACH, s. f. petite ville d'Allemagne\\...   \n",
+       "28  * AAHUS, s. petite ville d'Allemagne dans le c...   \n",
+       "29  * AAM, s. mesure des Liquides, en usage Ã  Amst...   \n",
+       "30  * AAR, s. grande riviere qui a sa source proch...   \n",
+       "31  * Aar, s. riviere d'Allemagne qui a sa source ...   \n",
+       "32  * AA ou AAS, s. ou Fontaine des Arquebusades. ...   \n",
+       "33  * AAS ou AASA, Fort de Norwege dans le Baillia...   \n",
+       "34  AB, s. m. onzieme mois de l'annÃ©e civile des H...   \n",
+       "35  AB, s. m. en Langue Syriaque est le nom du der...   \n",
+       "36  AB, s.m. en hÃ©breu signifie pere ; d'oÃ¹ les Ch...   \n",
+       "37  *ABA, s. ville de la Phocide, bÃ¢tie par les Ab...   \n",
+       "38  * ABACA, s. Il ne paroÃ®t pas qu'on sache bien ...   \n",
+       "39  * ABACH, s. petite ville d'Allemagne dans la b...   \n",
+       "40  ABACO, s. m. Quelques anciens Auteurs se serve...   \n",
+       "41  * ABACOA, s. Isle de l'AmÃ©rique septentrionale...   \n",
+       "42  * ABACOT, s. m. nom de l'ancienne parure dÃ¨\\nt...   \n",
+       "43  * ABADA, s. m. c'est, dit-on, un animal qui\\ns...   \n",
+       "44  * ABADDON, s. m. vient d'abad, perdre. C'est\\n...   \n",
+       "45  ABADIR ou ABADDIR, s. m. mot composÃ© \\nde deux...   \n",
+       "46  * ABACUZ, s. m. pris adject. ce sont les biens...   \n",
+       "47  ABAJOUR, s. m. nom que les Architectes donnent...   \n",
+       "48  ABAISIR, s.m. Quelques Alchimistes se sont ser...   \n",
+       "49  * ABAISSE, s. f. c'est le nom que les PÃ¢tissie...   \n",
+       "\n",
+       "                                       firstParagraph  \n",
+       "0   ENCYCLOPÃ‰DIE,\\nDICTIONNAIRE RAISONNÃ‰\\nDES SCIE...  \n",
+       "1   A MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINISTRE\\...  \n",
+       "2   DISCOURS PRÃ‰LIMINAIRE\\nDES EDITEURS.\\nL'Encycl...  \n",
+       "3   A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...  \n",
+       "4   A, mot, est 1. la troisieme personne du prÃ©sen...  \n",
+       "5   A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...  \n",
+       "6   A, Ã©toit une lettre numÃ©rale parmi les Anciens...  \n",
+       "7   A, lettre symbolique, Ã©toit un hiÃ©roglyphe che...  \n",
+       "8   A, numismatique ou monÃ©taire, sur le revers de...  \n",
+       "9   A, lapidaire, dans les anciennes inscriptions ...  \n",
+       "10  A, lettre de suffrage ; les Romains se servoie...  \n",
+       "11  A, signe d'absolution, chez les Romains dans l...  \n",
+       "12  * A cognitionibus. Scorpus fameux Agitateur du...  \n",
+       "13  * A curÃ¢ amicorum. On lit dans quelques inscri...  \n",
+       "14  A, , veut dire aussi\\nl'an, comme A. D. anno D...  \n",
+       "15  A, , est aussi la premiere\\ndes sept lettres d...  \n",
+       "16  A. D.  ; ces deux caracteres dans les\\nLettres...  \n",
+       "17  * A dÃ©signe une proposition gÃ©nÃ©rale affirmati...  \n",
+       "18  * A, signe des passions ; selon certains Auteu...  \n",
+       "19  A, est aussi une abbrÃ©viation dont on se sert ...  \n",
+       "20  A A A, chez les , signifie une amalgame,\\nou l...  \n",
+       "21  A, Ä, ou Ä Ä; on se sert de cette abbrÃ©viation...  \n",
+       "22  A. Les Marchands NÃ©gocians, Banquiers, & Teneu...  \n",
+       "23  * A, . AprÃ¨s avoir donnÃ© les\\ndiffÃ©rentes sign...  \n",
+       "24  * A, s. petite riviere de France, qui a sa sou...  \n",
+       "25  * AA, s. f. riviere de France, qui prend sa so...  \n",
+       "26  AABAM, s. m. Quelques Alchimistes se sont serv...  \n",
+       "27  * AACH ou ACH, s. f. petite ville d'Allemagne\\...  \n",
+       "28  * AAHUS, s. petite ville d'Allemagne dans le c...  \n",
+       "29  * AAM, s. mesure des Liquides, en usage Ã  Amst...  \n",
+       "30  * AAR, s. grande riviere qui a sa source proch...  \n",
+       "31  * Aar, s. riviere d'Allemagne qui a sa source ...  \n",
+       "32  * AA ou AAS, s. ou Fontaine des Arquebusades. ...  \n",
+       "33  * AAS ou AASA, Fort de Norwege dans le Baillia...  \n",
+       "34  AB, s. m. onzieme mois de l'annÃ©e civile des H...  \n",
+       "35  AB, s. m. en Langue Syriaque est le nom du der...  \n",
+       "36  AB, s.m. en hÃ©breu signifie pere ; d'oÃ¹ les Ch...  \n",
+       "37  *ABA, s. ville de la Phocide, bÃ¢tie par les Ab...  \n",
+       "38  * ABACA, s. Il ne paroÃ®t pas qu'on sache bien ...  \n",
+       "39  * ABACH, s. petite ville d'Allemagne dans la b...  \n",
+       "40  ABACO, s. m. Quelques anciens Auteurs se serve...  \n",
+       "41  * ABACOA, s. Isle de l'AmÃ©rique septentrionale...  \n",
+       "42  * ABACOT, s. m. nom de l'ancienne parure dÃ¨\\nt...  \n",
+       "43  * ABADA, s. m. c'est, dit-on, un animal qui\\ns...  \n",
+       "44  * ABADDON, s. m. vient d'abad, perdre. C'est\\n...  \n",
+       "45  ABADIR ou ABADDIR, s. m. mot composÃ© \\nde deux...  \n",
+       "46  * ABACUZ, s. m. pris adject. ce sont les biens...  \n",
+       "47  ABAJOUR, s. m. nom que les Architectes donnent...  \n",
+       "48  ABAISIR, s.m. Quelques Alchimistes se sont ser...  \n",
+       "49  * ABAISSE, s. f. c'est le nom que les PÃ¢tissie...  "
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "df['id_enccre'], df['domaine_enccre'],  df['ensemble_domaine_enccre'], df['content'], df['contentWithoutClass'], df['firstParagraph'] = df.apply(lambda row: getDomaineEnccre(row.volume, row.numero, row.classEDdA), axis=1).T.values\n",
+    "\n",
+    "#df['id_enccre'], df['domaine_enccre'],  df['ensemble_domaine_enccre'], df['content'], df['contentWithoutClass'], df['firstParagraph'] = getDomaineEnccre(df.volume, df.numero, df.classEDdA)\n",
+    "df.head(50)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "daily-office",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# nombre d'articles non classÃ©s par ENCCRE (Ã  partir de la correspondance automatique)\n",
+    "len(df.loc[(df['domaine_enccre']==\"\")])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "suited-methodology",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# nombre d'article non classÃ©s par ARTFL\n",
+    "len(df.loc[(df['normClass']==\"unclassified\")])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "special-investigation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# nombre de classe ENCCRE\n",
+    "\n",
+    "classes_enccre = df.groupby(['domaine_enccre']).count()\n",
+    "classes_enccre.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "id": "legendary-independence",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "312"
+      ]
+     },
+     "execution_count": 94,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(classes_enccre)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "id": "theoretical-marathon",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "lonely-efficiency",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>\\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...</td>\n",
+       "      <td>\\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...</td>\n",
+       "      <td>\\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>\\nDIDEROT &amp; D'ALEMBERT.\\n</td>\n",
+       "      <td>\\nDIDEROT &amp; D'ALEMBERT.\\n</td>\n",
+       "      <td>\\nDIDEROT &amp; D'ALEMBERT.\\n</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÃ‰LIMINAIRE DES EDITEURS</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>\\nVoilÃ  dans son ordre naturel, &amp; sans dÃ©membr...</td>\n",
+       "      <td>\\nVoilÃ  dans son ordre naturel, &amp; sans dÃ©membr...</td>\n",
+       "      <td>\\nVoilÃ  dans son ordre naturel, &amp; sans dÃ©membr...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire;</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire;</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                                head     normClass  \\\n",
+       "0       1       1                          Title Page  unclassified   \n",
+       "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  unclassified   \n",
+       "2       1       3  DISCOURS PRÃ‰LIMINAIRE DES EDITEURS  unclassified   \n",
+       "3       1       5                            A, a & a     Grammaire   \n",
+       "4       1       6                                   A  unclassified   \n",
+       "\n",
+       "                                           classEDdA                author  \\\n",
+       "0                                       unclassified              unsigned   \n",
+       "1                                       unclassified  Diderot & d'Alembert   \n",
+       "2                                       unclassified            d'Alembert   \n",
+       "3  ordre EncyclopÃ©d. Entend. Science de l'homme, ...            Dumarsais5   \n",
+       "4                                       unclassified            Dumarsais5   \n",
+       "\n",
+       "  id_enccre domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0                                                    \n",
+       "1                                                    \n",
+       "2                                                    \n",
+       "3    v1-1-0     grammaire;               Grammaire   \n",
+       "4    v1-1-1     grammaire;               Grammaire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  \\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...   \n",
+       "1                          \\nDIDEROT & D'ALEMBERT.\\n   \n",
+       "2  \\nVoilÃ  dans son ordre naturel, & sans dÃ©membr...   \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "4  \\n2. A, comme mot, est aussi une prÃ©position, ...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  \\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...   \n",
+       "1                          \\nDIDEROT & D'ALEMBERT.\\n   \n",
+       "2  \\nVoilÃ  dans son ordre naturel, & sans dÃ©membr...   \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "4  \\n2. A, comme mot, est aussi une prÃ©position, ...   \n",
+       "\n",
+       "                                      firstParagraph  \n",
+       "0  \\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...  \n",
+       "1                          \\nDIDEROT & D'ALEMBERT.\\n  \n",
+       "2  \\nVoilÃ  dans son ordre naturel, & sans dÃ©membr...  \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...  \n",
+       "4  \\n2. A, comme mot, est aussi une prÃ©position, ...  "
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "skilled-channel",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "least-practice",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "circular-service",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "possible-sleeping",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "streaming-savings",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 146,
+   "id": "fourth-involvement",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du dataframe dans un fichier tsv\n",
+    "df.to_csv('EDdA_dataframe_withContent.tsv',sep='\\t',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 152,
+   "id": "framed-sodium",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('EDdA_dataframe_withContent.tsv', sep='\\t')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "comparable-envelope",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "tutorial-savannah",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "74190"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "minus-waterproof",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.dropna(subset = ['content', 'contentWithoutClass', 'firstParagraph', 'ensemble_domaine_enccre', 'domaine_enccre', 'normClass'], inplace= True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 155,
+   "id": "scenic-sugar",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "61673"
+      ]
+     },
+     "execution_count": 155,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 156,
+   "id": "unavailable-indiana",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monÃ©taire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>\\nA, numismatique ou monÃ©taire, sur le revers ...</td>\n",
+       "      <td>\\nA, numismatique ou monÃ©taire, sur le revers ...</td>\n",
+       "      <td>\\nA, numismatique ou monÃ©taire, sur le revers ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "3       1       5                      A, a & a     Grammaire   \n",
+       "4       1       6                             A  unclassified   \n",
+       "5       1       7                             A  unclassified   \n",
+       "8       1      10  A, numismatique ou monÃ©taire  unclassified   \n",
+       "9       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "3  ordre EncyclopÃ©d. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "4                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "5                                       unclassified   Dumarsais    v1-1-2   \n",
+       "8                                       unclassified      Mallet    v1-1-5   \n",
+       "9                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "3      grammaire               Grammaire   \n",
+       "4      grammaire               Grammaire   \n",
+       "5      grammaire               Grammaire   \n",
+       "8   numismatique               MÃ©dailles   \n",
+       "9   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "4  \\n2. A, comme mot, est aussi une prÃ©position, ...   \n",
+       "5  \\nEn terme de Grammaire, & sur-tout de Grammai...   \n",
+       "8  \\nA, numismatique ou monÃ©taire, sur le revers ...   \n",
+       "9  \\nA, lapidaire, dans les anciennes inscription...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "4  \\n2. A, comme mot, est aussi une prÃ©position, ...   \n",
+       "5  \\nEn terme de Grammaire, & sur-tout de Grammai...   \n",
+       "8  \\nA, numismatique ou monÃ©taire, sur le revers ...   \n",
+       "9  \\nA, lapidaire, dans les anciennes inscription...   \n",
+       "\n",
+       "                                      firstParagraph  \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...  \n",
+       "4  \\n2. A, comme mot, est aussi une prÃ©position, ...  \n",
+       "5  \\nEn terme de Grammaire, & sur-tout de Grammai...  \n",
+       "8  \\nA, numismatique ou monÃ©taire, sur le revers ...  \n",
+       "9  \\nA, lapidaire, dans les anciennes inscription...  "
+      ]
+     },
+     "execution_count": 156,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "ahead-pendant",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def addNbWord(content):\n",
+    "    return len(content.split(' '))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "hearing-backup",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['nb_word'] = df.apply(lambda row: addNbWord(row.content), axis=1).T.values\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "suffering-athletics",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.loc[(df['nb_word']>=15)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "needed-behavior",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "mature-norfolk",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "green-afternoon",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "suffering-puppy",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_unclassified = df.loc[(df['normClass']==\"unclassified\")]\n",
+    "df_classified = df.loc[(df['normClass']!=\"unclassified\")]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "disturbed-constitution",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "12685\n",
+      "61505\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(df_unclassified))\n",
+    "print(len(df_classified))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 159,
+   "id": "fatty-bouquet",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 163,
+   "id": "pharmaceutical-presence",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "seasonal-suspect",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du dataframe dans un fichier tsv\n",
+    "df.to_csv('EDdA_dataframe_withContent.tsv',sep='\\t',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 182,
+   "id": "opposed-binding",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "######\n",
+    "df = pd.read_csv('EDdA_dataframe_withContent.tsv', sep='\\t')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 185,
+   "id": "banner-beijing",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "      <td>38</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "      <td>18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>\\nEn terme de Grammaire, &amp; sur-tout de Grammai...</td>\n",
+       "      <td>24</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monÃ©taire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>\\nA, numismatique ou monÃ©taire, sur le revers ...</td>\n",
+       "      <td>\\nA, numismatique ou monÃ©taire, sur le revers ...</td>\n",
+       "      <td>\\nA, numismatique ou monÃ©taire, sur le revers ...</td>\n",
+       "      <td>112</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "0       1       5                      A, a & a     Grammaire   \n",
+       "1       1       6                             A  unclassified   \n",
+       "2       1       7                             A  unclassified   \n",
+       "3       1      10  A, numismatique ou monÃ©taire  unclassified   \n",
+       "4       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "0  ordre EncyclopÃ©d. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "1                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "2                                       unclassified   Dumarsais    v1-1-2   \n",
+       "3                                       unclassified      Mallet    v1-1-5   \n",
+       "4                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0      grammaire               Grammaire   \n",
+       "1      grammaire               Grammaire   \n",
+       "2      grammaire               Grammaire   \n",
+       "3   numismatique               MÃ©dailles   \n",
+       "4   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "1  \\n2. A, comme mot, est aussi une prÃ©position, ...   \n",
+       "2  \\nEn terme de Grammaire, & sur-tout de Grammai...   \n",
+       "3  \\nA, numismatique ou monÃ©taire, sur le revers ...   \n",
+       "4  \\nA, lapidaire, dans les anciennes inscription...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  \\n3. On dit de quelqu'un qui n'a rien fait, ri...   \n",
+       "1  \\n2. A, comme mot, est aussi une prÃ©position, ...   \n",
+       "2  \\nEn terme de Grammaire, & sur-tout de Grammai...   \n",
+       "3  \\nA, numismatique ou monÃ©taire, sur le revers ...   \n",
+       "4  \\nA, lapidaire, dans les anciennes inscription...   \n",
+       "\n",
+       "                                      firstParagraph  nb_word  \n",
+       "0  \\n3. On dit de quelqu'un qui n'a rien fait, ri...       38  \n",
+       "1  \\n2. A, comme mot, est aussi une prÃ©position, ...       18  \n",
+       "2  \\nEn terme de Grammaire, & sur-tout de Grammai...       24  \n",
+       "3  \\nA, numismatique ou monÃ©taire, sur le revers ...      112  \n",
+       "4  \\nA, lapidaire, dans les anciennes inscription...       80  "
+      ]
+     },
+     "execution_count": 185,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "innocent-stability",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 183,
+   "id": "classical-receipt",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "7837"
+      ]
+     },
+     "execution_count": 183,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df.loc[(df['nb_word']<=15)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 184,
+   "id": "featured-tennis",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1695"
+      ]
+     },
+     "execution_count": 184,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df.loc[(df['nb_word']<=15) & (df['ensemble_domaine_enccre']==\"GÃ©ographie\")])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "neither-idaho",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 175,
+   "id": "expanded-tunnel",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_old = pd.read_csv('EDdA_dataframe_withContent_old.tsv', sep='\\t')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 176,
+   "id": "valid-manor",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>author</th>\n",
+       "      <th>normClass_artfl</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Title Page</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>\\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+       "      <td>Diderot &amp; d'Alembert</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>\\nDIDEROT &amp; D'ALEMBERT.\\n</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>DISCOURS PRÃ‰LIMINAIRE DES EDITEURS</td>\n",
+       "      <td>d'Alembert</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>\\nVoilÃ  dans son ordre naturel, &amp; sans dÃ©membr...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire;</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n3. On dit de quelqu'un qui n'a rien fait, ri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire;</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>\\n2. A, comme mot, est aussi une prÃ©position, ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                                head                author  \\\n",
+       "0       1       1                          Title Page              unsigned   \n",
+       "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  Diderot & d'Alembert   \n",
+       "2       1       3  DISCOURS PRÃ‰LIMINAIRE DES EDITEURS            d'Alembert   \n",
+       "3       1       5                            A, a & a            Dumarsais5   \n",
+       "4       1       6                                   A            Dumarsais5   \n",
+       "\n",
+       "  normClass_artfl id_enccre domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0    unclassified       NaN            NaN                     NaN   \n",
+       "1    unclassified       NaN            NaN                     NaN   \n",
+       "2    unclassified       NaN            NaN                     NaN   \n",
+       "3       Grammaire    v1-1-0     grammaire;               Grammaire   \n",
+       "4    unclassified    v1-1-1     grammaire;               Grammaire   \n",
+       "\n",
+       "                                             content  \n",
+       "0  \\nM. DCC. L I.\\nAVEC APPROBATION ET PRIVILEGE ...  \n",
+       "1                          \\nDIDEROT & D'ALEMBERT.\\n  \n",
+       "2  \\nVoilÃ  dans son ordre naturel, & sans dÃ©membr...  \n",
+       "3  \\n3. On dit de quelqu'un qui n'a rien fait, ri...  \n",
+       "4  \\n2. A, comme mot, est aussi une prÃ©position, ...  "
+      ]
+     },
+     "execution_count": 176,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_old.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 179,
+   "id": "focused-bulgarian",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3654\n"
+     ]
+    }
+   ],
+   "source": [
+    "def countDomaine(domaine):\n",
+    "    return str(domaine).count(';')\n",
+    "\n",
+    "df_old['nb_domaine'] = df_old.apply(lambda row: countDomaine(row.ensemble_domaine_enccre), axis=1).T.values\n",
+    "\n",
+    "print(len(df_old.loc[(df_old['nb_domaine']>0)]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "informative-chess",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "covered-spine",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "endless-cathedral",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(66056, 13)"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "corrected-batman",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.loc[(df['nb_word']>=15)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "documentary-prince",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(66056, 13)"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "opened-november",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "\n",
+    "\n",
+    "train_x, validation_x, train_y, validation_y = train_test_split(df, df[\"ensemble_domaine_enccre\"], test_size=0.2, random_state=42, stratify = df[\"ensemble_domaine_enccre\"] )\n",
+    "\n",
+    "train, test_x, train_labels, test_y = train_test_split(train_x, train_x[\"ensemble_domaine_enccre\"], test_size=0.3, random_state=42, stratify = train_x[\"ensemble_domaine_enccre\"] )\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "noticed-evanescence",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(36990, 13)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "welcome-homework",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(13212, 13)"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "validation_x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "returning-george",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(15854, 13)"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_x.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "thorough-senator",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>46001</th>\n",
+       "      <td>11</td>\n",
+       "      <td>2973</td>\n",
+       "      <td>ORNIS</td>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>Comm.</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>v11-1767-0</td>\n",
+       "      <td>commerce</td>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>ORNIS, s. m. toile des Indes, (Comm.) sortes d...</td>\n",
+       "      <td>ORNIS, s. m. toile des Indes, () sortes de\\nto...</td>\n",
+       "      <td>ORNIS, s. m. toile des Indes, () sortes de\\nto...</td>\n",
+       "      <td>45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15442</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3525</td>\n",
+       "      <td>COMPRENDRE</td>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>terme de Philosophie,</td>\n",
+       "      <td>Diderot</td>\n",
+       "      <td>v3-1722-0</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
+       "      <td>* COMPRENDRE, v. act. terme de Philosophie,\\nc...</td>\n",
+       "      <td>* COMPRENDRE, v. act. \\nc'est appercevoir la l...</td>\n",
+       "      <td>* COMPRENDRE, v. act. \\nc'est appercevoir la l...</td>\n",
+       "      <td>92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2558</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2560</td>\n",
+       "      <td>ANCRE</td>\n",
+       "      <td>Marine</td>\n",
+       "      <td>Marine</td>\n",
+       "      <td>d'Alembert &amp; Diderot</td>\n",
+       "      <td>v1-1865-0</td>\n",
+       "      <td>marine</td>\n",
+       "      <td>Marine</td>\n",
+       "      <td>ANCRE, s. f. (Marine.) est un instrument de fe...</td>\n",
+       "      <td>ANCRE, s. f. (.) est un instrument de fer\\nABC...</td>\n",
+       "      <td>ANCRE, s. f. (.) est un instrument de fer\\nABC...</td>\n",
+       "      <td>3327</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70433</th>\n",
+       "      <td>16</td>\n",
+       "      <td>4241</td>\n",
+       "      <td>VAKEBARO</td>\n",
+       "      <td>GÃ©ographie moderne</td>\n",
+       "      <td>GÃ©og. mod.</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>v16-2587-0</td>\n",
+       "      <td>gÃ©ographie</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>VAKEBARO, (GÃ©og. mod.) vallÃ©e du royaume\\nd'Es...</td>\n",
+       "      <td>VAKEBARO, () vallÃ©e du royaume\\nd'Espagne dans...</td>\n",
+       "      <td>VAKEBARO, () vallÃ©e du royaume\\nd'Espagne dans...</td>\n",
+       "      <td>34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34129</th>\n",
+       "      <td>8</td>\n",
+       "      <td>3281</td>\n",
+       "      <td>INSPECTEUR</td>\n",
+       "      <td>Histoire ancienne</td>\n",
+       "      <td>Hist. anc.</td>\n",
+       "      <td>unsigned</td>\n",
+       "      <td>v8-2533-0</td>\n",
+       "      <td>histoire</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>INSPECTEUR, s. m. inspector ; (Hist. anc.) cel...</td>\n",
+       "      <td>INSPECTEUR, s. m. inspector ; () celui \\nÃ  qui...</td>\n",
+       "      <td>INSPECTEUR, s. m. inspector ; () celui \\nÃ  qui...</td>\n",
+       "      <td>102</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       volume  numero        head           normClass              classEDdA  \\\n",
+       "46001      11    2973       ORNIS            Commerce                  Comm.   \n",
+       "15442       3    3525  COMPRENDRE         Philosophie  terme de Philosophie,   \n",
+       "2558        1    2560       ANCRE              Marine                 Marine   \n",
+       "70433      16    4241    VAKEBARO  GÃ©ographie moderne             GÃ©og. mod.   \n",
+       "34129       8    3281  INSPECTEUR   Histoire ancienne             Hist. anc.   \n",
+       "\n",
+       "                     author   id_enccre domaine_enccre  \\\n",
+       "46001              unsigned  v11-1767-0       commerce   \n",
+       "15442               Diderot   v3-1722-0                  \n",
+       "2558   d'Alembert & Diderot   v1-1865-0         marine   \n",
+       "70433              unsigned  v16-2587-0     gÃ©ographie   \n",
+       "34129              unsigned   v8-2533-0       histoire   \n",
+       "\n",
+       "      ensemble_domaine_enccre  \\\n",
+       "46001                Commerce   \n",
+       "15442                           \n",
+       "2558                   Marine   \n",
+       "70433              GÃ©ographie   \n",
+       "34129                Histoire   \n",
+       "\n",
+       "                                                 content  \\\n",
+       "46001  ORNIS, s. m. toile des Indes, (Comm.) sortes d...   \n",
+       "15442  * COMPRENDRE, v. act. terme de Philosophie,\\nc...   \n",
+       "2558   ANCRE, s. f. (Marine.) est un instrument de fe...   \n",
+       "70433  VAKEBARO, (GÃ©og. mod.) vallÃ©e du royaume\\nd'Es...   \n",
+       "34129  INSPECTEUR, s. m. inspector ; (Hist. anc.) cel...   \n",
+       "\n",
+       "                                     contentWithoutClass  \\\n",
+       "46001  ORNIS, s. m. toile des Indes, () sortes de\\nto...   \n",
+       "15442  * COMPRENDRE, v. act. \\nc'est appercevoir la l...   \n",
+       "2558   ANCRE, s. f. (.) est un instrument de fer\\nABC...   \n",
+       "70433  VAKEBARO, () vallÃ©e du royaume\\nd'Espagne dans...   \n",
+       "34129  INSPECTEUR, s. m. inspector ; () celui \\nÃ  qui...   \n",
+       "\n",
+       "                                          firstParagraph  nb_word  \n",
+       "46001  ORNIS, s. m. toile des Indes, () sortes de\\nto...       45  \n",
+       "15442  * COMPRENDRE, v. act. \\nc'est appercevoir la l...       92  \n",
+       "2558   ANCRE, s. f. (.) est un instrument de fer\\nABC...     3327  \n",
+       "70433  VAKEBARO, () vallÃ©e du royaume\\nd'Espagne dans...       34  \n",
+       "34129  INSPECTEUR, s. m. inspector ; () celui \\nÃ  qui...      102  "
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_x.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "hearing-moses",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train.to_csv('training_set.tsv',sep='\\t',index=False) \n",
+    "validation_x.to_csv('validation_set.tsv',sep='\\t',index=False)  \n",
+    "test_x.to_csv('test_set.tsv',sep='\\t',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "exterior-praise",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>counts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td></td>\n",
+       "      <td>10053</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Agriculture - Economie rustique</td>\n",
+       "      <td>1077</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>1021</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>AntiquitÃ©</td>\n",
+       "      <td>1336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>1357</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Arts et mÃ©tiers</td>\n",
+       "      <td>550</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Beaux-arts</td>\n",
+       "      <td>427</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Belles-lettres - PoÃ©sie</td>\n",
+       "      <td>1026</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Blason</td>\n",
+       "      <td>526</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>CaractÃ¨res</td>\n",
+       "      <td>113</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Chasse</td>\n",
+       "      <td>516</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>478</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>1823</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Droit - Jurisprudence</td>\n",
+       "      <td>6052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Economie domestique</td>\n",
+       "      <td>131</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>2397</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>11959</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>3025</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>4707</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Jeu</td>\n",
+       "      <td>279</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Marine</td>\n",
+       "      <td>1893</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>MarÃ©chage - ManÃ¨ge</td>\n",
+       "      <td>494</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>MathÃ©matiques</td>\n",
+       "      <td>681</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Mesure</td>\n",
+       "      <td>179</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+       "      <td>1265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>MinÃ©ralogie</td>\n",
+       "      <td>109</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Monnaie</td>\n",
+       "      <td>309</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Musique</td>\n",
+       "      <td>681</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>MÃ©decine - Chirurgie</td>\n",
+       "      <td>2227</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>MÃ©tiers</td>\n",
+       "      <td>5083</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>311</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>478</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Physique - [Sciences physico-mathÃ©matiques]</td>\n",
+       "      <td>1286</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Politique</td>\n",
+       "      <td>114</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>PÃªche</td>\n",
+       "      <td>199</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Religion</td>\n",
+       "      <td>1623</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Spectacle</td>\n",
+       "      <td>47</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>Superstition</td>\n",
+       "      <td>108</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        ensemble_domaine_enccre  counts\n",
+       "0                                                 10053\n",
+       "1               Agriculture - Economie rustique    1077\n",
+       "2                                      Anatomie    1021\n",
+       "3                                     AntiquitÃ©    1336\n",
+       "4                                  Architecture    1357\n",
+       "5                               Arts et mÃ©tiers     550\n",
+       "6                                    Beaux-arts     427\n",
+       "7                       Belles-lettres - PoÃ©sie    1026\n",
+       "8                                        Blason     526\n",
+       "9                                    CaractÃ¨res     113\n",
+       "10                                       Chasse     516\n",
+       "11                                       Chimie     478\n",
+       "12                                     Commerce    1823\n",
+       "13                        Droit - Jurisprudence    6052\n",
+       "14                          Economie domestique     131\n",
+       "15                                    Grammaire    2397\n",
+       "16                                   GÃ©ographie   11959\n",
+       "17                                     Histoire    3025\n",
+       "18                           Histoire naturelle    4707\n",
+       "19                                          Jeu     279\n",
+       "20                                       Marine    1893\n",
+       "21                           MarÃ©chage - ManÃ¨ge     494\n",
+       "22                                MathÃ©matiques     681\n",
+       "23                                       Mesure     179\n",
+       "24              Militaire (Art) - Guerre - Arme    1265\n",
+       "25                                  MinÃ©ralogie     109\n",
+       "26                                      Monnaie     309\n",
+       "27                                      Musique     681\n",
+       "28                                    MÃ©dailles     116\n",
+       "29                         MÃ©decine - Chirurgie    2227\n",
+       "30                                      MÃ©tiers    5083\n",
+       "31                                    Pharmacie     311\n",
+       "32                                  Philosophie     478\n",
+       "33  Physique - [Sciences physico-mathÃ©matiques]    1286\n",
+       "34                                    Politique     114\n",
+       "35                                        PÃªche     199\n",
+       "36                                     Religion    1623\n",
+       "37                                    Spectacle      47\n",
+       "38                                 Superstition     108"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.groupby(['ensemble_domaine_enccre']).size().reset_index(name='counts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "unable-agenda",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>counts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td></td>\n",
+       "      <td>5629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Agriculture - Economie rustique</td>\n",
+       "      <td>603</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>572</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>AntiquitÃ©</td>\n",
+       "      <td>748</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>760</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Arts et mÃ©tiers</td>\n",
+       "      <td>308</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Beaux-arts</td>\n",
+       "      <td>239</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Belles-lettres - PoÃ©sie</td>\n",
+       "      <td>575</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Blason</td>\n",
+       "      <td>295</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>CaractÃ¨res</td>\n",
+       "      <td>63</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Chasse</td>\n",
+       "      <td>289</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>267</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>1021</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Droit - Jurisprudence</td>\n",
+       "      <td>3389</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Economie domestique</td>\n",
+       "      <td>74</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>1343</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>6697</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>1694</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>2636</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Jeu</td>\n",
+       "      <td>156</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Marine</td>\n",
+       "      <td>1060</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>MarÃ©chage - ManÃ¨ge</td>\n",
+       "      <td>277</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>MathÃ©matiques</td>\n",
+       "      <td>381</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Mesure</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+       "      <td>708</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>MinÃ©ralogie</td>\n",
+       "      <td>61</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Monnaie</td>\n",
+       "      <td>173</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Musique</td>\n",
+       "      <td>382</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>65</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>MÃ©decine - Chirurgie</td>\n",
+       "      <td>1247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>MÃ©tiers</td>\n",
+       "      <td>2846</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>174</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>267</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Physique - [Sciences physico-mathÃ©matiques]</td>\n",
+       "      <td>720</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Politique</td>\n",
+       "      <td>64</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>PÃªche</td>\n",
+       "      <td>111</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Religion</td>\n",
+       "      <td>909</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Spectacle</td>\n",
+       "      <td>27</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>Superstition</td>\n",
+       "      <td>60</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        ensemble_domaine_enccre  counts\n",
+       "0                                                  5629\n",
+       "1               Agriculture - Economie rustique     603\n",
+       "2                                      Anatomie     572\n",
+       "3                                     AntiquitÃ©     748\n",
+       "4                                  Architecture     760\n",
+       "5                               Arts et mÃ©tiers     308\n",
+       "6                                    Beaux-arts     239\n",
+       "7                       Belles-lettres - PoÃ©sie     575\n",
+       "8                                        Blason     295\n",
+       "9                                    CaractÃ¨res      63\n",
+       "10                                       Chasse     289\n",
+       "11                                       Chimie     267\n",
+       "12                                     Commerce    1021\n",
+       "13                        Droit - Jurisprudence    3389\n",
+       "14                          Economie domestique      74\n",
+       "15                                    Grammaire    1343\n",
+       "16                                   GÃ©ographie    6697\n",
+       "17                                     Histoire    1694\n",
+       "18                           Histoire naturelle    2636\n",
+       "19                                          Jeu     156\n",
+       "20                                       Marine    1060\n",
+       "21                           MarÃ©chage - ManÃ¨ge     277\n",
+       "22                                MathÃ©matiques     381\n",
+       "23                                       Mesure     100\n",
+       "24              Militaire (Art) - Guerre - Arme     708\n",
+       "25                                  MinÃ©ralogie      61\n",
+       "26                                      Monnaie     173\n",
+       "27                                      Musique     382\n",
+       "28                                    MÃ©dailles      65\n",
+       "29                         MÃ©decine - Chirurgie    1247\n",
+       "30                                      MÃ©tiers    2846\n",
+       "31                                    Pharmacie     174\n",
+       "32                                  Philosophie     267\n",
+       "33  Physique - [Sciences physico-mathÃ©matiques]     720\n",
+       "34                                    Politique      64\n",
+       "35                                        PÃªche     111\n",
+       "36                                     Religion     909\n",
+       "37                                    Spectacle      27\n",
+       "38                                 Superstition      60"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.groupby(['ensemble_domaine_enccre']).size().reset_index(name='counts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 208,
+   "id": "potential-friday",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>counts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Agriculture - Economie rustique</td>\n",
+       "      <td>212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>187</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AntiquitÃ©</td>\n",
+       "      <td>263</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Arts et mÃ©tiers</td>\n",
+       "      <td>108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Beaux-arts</td>\n",
+       "      <td>84</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Belles-lettres - PoÃ©sie</td>\n",
+       "      <td>195</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Blason</td>\n",
+       "      <td>87</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>CaractÃ¨res</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Chasse</td>\n",
+       "      <td>102</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>361</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Droit - Jurisprudence</td>\n",
+       "      <td>1181</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Economie domestique</td>\n",
+       "      <td>26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>466</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>2368</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>592</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>931</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Jeu</td>\n",
+       "      <td>54</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Marine</td>\n",
+       "      <td>363</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>MarÃ©chage - ManÃ¨ge</td>\n",
+       "      <td>97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>MathÃ©matiques</td>\n",
+       "      <td>126</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>Mesure</td>\n",
+       "      <td>35</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+       "      <td>247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>MinÃ©ralogie</td>\n",
+       "      <td>21</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>Monnaie</td>\n",
+       "      <td>61</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Musique</td>\n",
+       "      <td>133</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>MÃ©decine - Chirurgie</td>\n",
+       "      <td>428</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>MÃ©tiers</td>\n",
+       "      <td>1006</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>93</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Physique - [Sciences physico-mathÃ©matiques]</td>\n",
+       "      <td>247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Politique</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>PÃªche</td>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>Religion</td>\n",
+       "      <td>319</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Spectacle</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Superstition</td>\n",
+       "      <td>21</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        ensemble_domaine_enccre  counts\n",
+       "0               Agriculture - Economie rustique     212\n",
+       "1                                      Anatomie     187\n",
+       "2                                     AntiquitÃ©     263\n",
+       "3                                  Architecture     265\n",
+       "4                               Arts et mÃ©tiers     108\n",
+       "5                                    Beaux-arts      84\n",
+       "6                       Belles-lettres - PoÃ©sie     195\n",
+       "7                                        Blason      87\n",
+       "8                                    CaractÃ¨res      22\n",
+       "9                                        Chasse     102\n",
+       "10                                       Chimie      94\n",
+       "11                                     Commerce     361\n",
+       "12                        Droit - Jurisprudence    1181\n",
+       "13                          Economie domestique      26\n",
+       "14                                    Grammaire     466\n",
+       "15                                   GÃ©ographie    2368\n",
+       "16                                     Histoire     592\n",
+       "17                           Histoire naturelle     931\n",
+       "18                                          Jeu      54\n",
+       "19                                       Marine     363\n",
+       "20                           MarÃ©chage - ManÃ¨ge      97\n",
+       "21                                MathÃ©matiques     126\n",
+       "22                                       Mesure      35\n",
+       "23              Militaire (Art) - Guerre - Arme     247\n",
+       "24                                  MinÃ©ralogie      21\n",
+       "25                                      Monnaie      61\n",
+       "26                                      Musique     133\n",
+       "27                                    MÃ©dailles      23\n",
+       "28                         MÃ©decine - Chirurgie     428\n",
+       "29                                      MÃ©tiers    1006\n",
+       "30                                    Pharmacie      59\n",
+       "31                                  Philosophie      93\n",
+       "32  Physique - [Sciences physico-mathÃ©matiques]     247\n",
+       "33                                    Politique      22\n",
+       "34                                        PÃªche      39\n",
+       "35                                     Religion     319\n",
+       "36                                    Spectacle       9\n",
+       "37                                 Superstition      21"
+      ]
+     },
+     "execution_count": 208,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "validation_x.groupby(['ensemble_domaine_enccre']).size().reset_index(name='counts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 209,
+   "id": "fatty-pharmacy",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>counts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Agriculture - Economie rustique</td>\n",
+       "      <td>254</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Anatomie</td>\n",
+       "      <td>224</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AntiquitÃ©</td>\n",
+       "      <td>316</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Architecture</td>\n",
+       "      <td>318</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Arts et mÃ©tiers</td>\n",
+       "      <td>129</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Beaux-arts</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Belles-lettres - PoÃ©sie</td>\n",
+       "      <td>235</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Blason</td>\n",
+       "      <td>105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>CaractÃ¨res</td>\n",
+       "      <td>27</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Chasse</td>\n",
+       "      <td>122</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Chimie</td>\n",
+       "      <td>112</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Commerce</td>\n",
+       "      <td>433</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Droit - Jurisprudence</td>\n",
+       "      <td>1417</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Economie domestique</td>\n",
+       "      <td>31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>560</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "      <td>2842</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>711</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "      <td>1118</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Jeu</td>\n",
+       "      <td>65</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>Marine</td>\n",
+       "      <td>435</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>MarÃ©chage - ManÃ¨ge</td>\n",
+       "      <td>116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>MathÃ©matiques</td>\n",
+       "      <td>151</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>Mesure</td>\n",
+       "      <td>42</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+       "      <td>296</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>MinÃ©ralogie</td>\n",
+       "      <td>26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>Monnaie</td>\n",
+       "      <td>73</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>Musique</td>\n",
+       "      <td>160</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>MÃ©decine - Chirurgie</td>\n",
+       "      <td>513</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>MÃ©tiers</td>\n",
+       "      <td>1207</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>Pharmacie</td>\n",
+       "      <td>71</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>Philosophie</td>\n",
+       "      <td>112</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Physique - [Sciences physico-mathÃ©matiques]</td>\n",
+       "      <td>296</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Politique</td>\n",
+       "      <td>26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>PÃªche</td>\n",
+       "      <td>47</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>Religion</td>\n",
+       "      <td>383</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Spectacle</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>Superstition</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        ensemble_domaine_enccre  counts\n",
+       "0               Agriculture - Economie rustique     254\n",
+       "1                                      Anatomie     224\n",
+       "2                                     AntiquitÃ©     316\n",
+       "3                                  Architecture     318\n",
+       "4                               Arts et mÃ©tiers     129\n",
+       "5                                    Beaux-arts     100\n",
+       "6                       Belles-lettres - PoÃ©sie     235\n",
+       "7                                        Blason     105\n",
+       "8                                    CaractÃ¨res      27\n",
+       "9                                        Chasse     122\n",
+       "10                                       Chimie     112\n",
+       "11                                     Commerce     433\n",
+       "12                        Droit - Jurisprudence    1417\n",
+       "13                          Economie domestique      31\n",
+       "14                                    Grammaire     560\n",
+       "15                                   GÃ©ographie    2842\n",
+       "16                                     Histoire     711\n",
+       "17                           Histoire naturelle    1118\n",
+       "18                                          Jeu      65\n",
+       "19                                       Marine     435\n",
+       "20                           MarÃ©chage - ManÃ¨ge     116\n",
+       "21                                MathÃ©matiques     151\n",
+       "22                                       Mesure      42\n",
+       "23              Militaire (Art) - Guerre - Arme     296\n",
+       "24                                  MinÃ©ralogie      26\n",
+       "25                                      Monnaie      73\n",
+       "26                                      Musique     160\n",
+       "27                                    MÃ©dailles      28\n",
+       "28                         MÃ©decine - Chirurgie     513\n",
+       "29                                      MÃ©tiers    1207\n",
+       "30                                    Pharmacie      71\n",
+       "31                                  Philosophie     112\n",
+       "32  Physique - [Sciences physico-mathÃ©matiques]     296\n",
+       "33                                    Politique      26\n",
+       "34                                        PÃªche      47\n",
+       "35                                     Religion     383\n",
+       "36                                    Spectacle      11\n",
+       "37                                 Superstition      25"
+      ]
+     },
+     "execution_count": 209,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_x.groupby(['ensemble_domaine_enccre']).size().reset_index(name='counts')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "indonesian-reach",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "divine-winner",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "tropical-research",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "younger-louisiana",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "demanding-essay",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "vanilla-italy",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "consistent-checklist",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## ajout dune colonne avec toutes les classes enccre\n",
+    "\n",
+    "def getDomaineEnccre2(volume, numero):\n",
+    "    #print(volume, ' ', numero)\n",
+    "\n",
+    "    ensemble_domaine = \"\"\n",
+    "\n",
+    "    try :  \n",
+    "        #entreeid = df_correspondances.loc[(df_correspondances['tome']==volume) & (df_correspondances['article']==numero)]['entreeid'][0]\n",
+    "        d = df_correspondances.loc[(df_correspondances['tome']==volume) & (df_correspondances['article']==numero)].reset_index(drop=True)\n",
+    "        entreeid = d['entreeid'][0]\n",
+    "\n",
+    "        json_url = urlopen(\"http://enccre.academie-sciences.fr/icefront/api/article/\" + entreeid)\n",
+    "        data = json.loads(json_url.read())\n",
+    "        #print(data['annotations']['constit'][0]['domgen'][0])\n",
+    "        cpt = 0\n",
+    "        try :  \n",
+    "            \n",
+    "            # changer pour avoir tous les noms\n",
+    "            for dom in data['annotations']['constit'][0]['domgen']:\n",
+    "                val = get_key(dom)\n",
+    "                if val is not None:\n",
+    "                    if cpt > 0:\n",
+    "                        ensemble_domaine += '|'\n",
+    "                    ensemble_domaine += get_key(dom)\n",
+    "                    cpt += 1\n",
+    "\n",
+    "            \n",
+    "            #print(ensemble_domaine)\n",
+    "\n",
+    "        except KeyError:\n",
+    "            pass\n",
+    "     \n",
+    "    except KeyError:\n",
+    "        pass\n",
+    "       \n",
+    "    \n",
+    "        \n",
+    "    #ensemble_domaine_multi = ';'.join(list(set(ensemble_domaine)))\n",
+    "    \n",
+    "    #print(entreeid, domaine, ensemble_domaine, txtContent, txtContentWithoutClass, firstParagraph)\n",
+    "    \n",
+    "    return ensemble_domaine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "coral-level",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "      <th>classification</th>\n",
+       "      <th>class_is_true</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>711</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>238</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>1980</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monÃ©taire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>112</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "0       1       5                      A, a & a     Grammaire   \n",
+       "1       1       6                             A  unclassified   \n",
+       "2       1       7                             A  unclassified   \n",
+       "3       1      10  A, numismatique ou monÃ©taire  unclassified   \n",
+       "4       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "0  ordre EncyclopÃ©d. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "1                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "2                                       unclassified   Dumarsais    v1-1-2   \n",
+       "3                                       unclassified      Mallet    v1-1-5   \n",
+       "4                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0      grammaire               Grammaire   \n",
+       "1      grammaire               Grammaire   \n",
+       "2      grammaire               Grammaire   \n",
+       "3   numismatique               MÃ©dailles   \n",
+       "4   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                      firstParagraph  nb_word classification  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...      711      Grammaire   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...      238      Grammaire   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...     1980      Grammaire   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...      112      MÃ©dailles   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...       80       Histoire   \n",
+       "\n",
+       "   class_is_true  \n",
+       "0           True  \n",
+       "1           True  \n",
+       "2           True  \n",
+       "3           True  \n",
+       "4           True  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "filepath = '/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Classification domaines EDdA/results_classification/result_classification_sgdtfidf_21.11.24.csv'\n",
+    "df = pd.read_csv(filepath)\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "enormous-longer",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    " df['ensembles_domaine_enccre'] = df.apply(lambda row: getDomaineEnccre2(row.volume, row.numero), axis=1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "incorporated-commons",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "      <th>classification</th>\n",
+       "      <th>class_is_true</th>\n",
+       "      <th>ensembles_domaine_enccre</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>711</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>238</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>1980</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monÃ©taire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>112</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>True</td>\n",
+       "      <td>MÃ©dailles|Monnaie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Histoire</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "0       1       5                      A, a & a     Grammaire   \n",
+       "1       1       6                             A  unclassified   \n",
+       "2       1       7                             A  unclassified   \n",
+       "3       1      10  A, numismatique ou monÃ©taire  unclassified   \n",
+       "4       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "0  ordre EncyclopÃ©d. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "1                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "2                                       unclassified   Dumarsais    v1-1-2   \n",
+       "3                                       unclassified      Mallet    v1-1-5   \n",
+       "4                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0      grammaire               Grammaire   \n",
+       "1      grammaire               Grammaire   \n",
+       "2      grammaire               Grammaire   \n",
+       "3   numismatique               MÃ©dailles   \n",
+       "4   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                      firstParagraph  nb_word classification  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...      711      Grammaire   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...      238      Grammaire   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...     1980      Grammaire   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...      112      MÃ©dailles   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...       80       Histoire   \n",
+       "\n",
+       "   class_is_true ensembles_domaine_enccre  \n",
+       "0           True                Grammaire  \n",
+       "1           True                Grammaire  \n",
+       "2           True                Grammaire  \n",
+       "3           True        MÃ©dailles|Monnaie  \n",
+       "4           True                 Histoire  "
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "pleasant-throat",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# enregistrement du dataframe dans un fichier tsv\n",
+    "df.to_csv('/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Classification domaines EDdA/results_classification/result_classification_sgdtfidf_21.11.25.csv',index=False)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "small-shore",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>volume</th>\n",
+       "      <th>numero</th>\n",
+       "      <th>head</th>\n",
+       "      <th>normClass</th>\n",
+       "      <th>classEDdA</th>\n",
+       "      <th>author</th>\n",
+       "      <th>id_enccre</th>\n",
+       "      <th>domaine_enccre</th>\n",
+       "      <th>ensemble_domaine_enccre</th>\n",
+       "      <th>content</th>\n",
+       "      <th>contentWithoutClass</th>\n",
+       "      <th>firstParagraph</th>\n",
+       "      <th>nb_word</th>\n",
+       "      <th>classification</th>\n",
+       "      <th>class_is_true</th>\n",
+       "      <th>ensembles_domaine_enccre</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>A, a &amp; a</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-0</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+       "      <td>711</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais5</td>\n",
+       "      <td>v1-1-1</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+       "      <td>238</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>A</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Dumarsais</td>\n",
+       "      <td>v1-1-2</td>\n",
+       "      <td>grammaire</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+       "      <td>1980</td>\n",
+       "      <td>Grammaire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Grammaire</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>A, numismatique ou monÃ©taire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-5</td>\n",
+       "      <td>numismatique</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+       "      <td>112</td>\n",
+       "      <td>MÃ©dailles</td>\n",
+       "      <td>True</td>\n",
+       "      <td>MÃ©dailles|Monnaie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>A, lapidaire</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>unclassified</td>\n",
+       "      <td>Mallet</td>\n",
+       "      <td>v1-1-6</td>\n",
+       "      <td>inscriptions</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Histoire</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Histoire</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   volume  numero                          head     normClass  \\\n",
+       "0       1       5                      A, a & a     Grammaire   \n",
+       "1       1       6                             A  unclassified   \n",
+       "2       1       7                             A  unclassified   \n",
+       "3       1      10  A, numismatique ou monÃ©taire  unclassified   \n",
+       "4       1      11                  A, lapidaire  unclassified   \n",
+       "\n",
+       "                                           classEDdA      author id_enccre  \\\n",
+       "0  ordre EncyclopÃ©d. Entend. Science de l'homme, ...  Dumarsais5    v1-1-0   \n",
+       "1                                       unclassified  Dumarsais5    v1-1-1   \n",
+       "2                                       unclassified   Dumarsais    v1-1-2   \n",
+       "3                                       unclassified      Mallet    v1-1-5   \n",
+       "4                                       unclassified      Mallet    v1-1-6   \n",
+       "\n",
+       "  domaine_enccre ensemble_domaine_enccre  \\\n",
+       "0      grammaire               Grammaire   \n",
+       "1      grammaire               Grammaire   \n",
+       "2      grammaire               Grammaire   \n",
+       "3   numismatique               MÃ©dailles   \n",
+       "4   inscriptions                Histoire   \n",
+       "\n",
+       "                                             content  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                 contentWithoutClass  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...   \n",
+       "\n",
+       "                                      firstParagraph  nb_word classification  \\\n",
+       "0  A, a & a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...      711      Grammaire   \n",
+       "1  A, mot, est 1. la troisieme personne du prÃ©sen...      238      Grammaire   \n",
+       "2  A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...     1980      Grammaire   \n",
+       "3  A, numismatique ou monÃ©taire, sur le revers de...      112      MÃ©dailles   \n",
+       "4  A, lapidaire, dans les anciennes inscriptions ...       80       Histoire   \n",
+       "\n",
+       "   class_is_true ensembles_domaine_enccre  \n",
+       "0           True                Grammaire  \n",
+       "1           True                Grammaire  \n",
+       "2           True                Grammaire  \n",
+       "3           True        MÃ©dailles|Monnaie  \n",
+       "4           True                 Histoire  "
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acute-basketball",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "verified-compression",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/EDdA_Classification_BertFineTuning.ipynb b/notebooks/EDdA_Classification_BertFineTuning.ipynb
new file mode 100644
index 0000000..dc0830e
--- /dev/null
+++ b/notebooks/EDdA_Classification_BertFineTuning.ipynb
@@ -0,0 +1,4421 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4YCMlsNwOWs0"
+   },
+   "source": [
+    "# BERT fine-tuning for EDdA classification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Pz9VDIXUON97"
+   },
+   "source": [
+    "## Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ouU5usvXg4PA"
+   },
+   "outputs": [],
+   "source": [
+    "train_path = 'training_set.tsv'\n",
+    "validation_path = 'validation_set.tsv'\n",
+    "test_path =  'test_set.tsv'\n",
+    "\n",
+    "columnText = 'contentWithoutClass'\n",
+    "columnClass = 'ensemble_domaine_enccre'\n",
+    "\n",
+    "minOfInstancePerClass = 0\n",
+    "maxOfInstancePerClass = 10000\n",
+    "\n",
+    "#model_chosen = \"bert\"\n",
+    "model_chosen = \"camembert\"\n",
+    "\n",
+    "batch_size = 8  # 16 or 32 recommended\n",
+    "max_len = 512"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6xdYI9moOQSv"
+   },
+   "source": [
+    "## Setup colab environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "WF0qFN_g3ekz",
+    "outputId": "445ffd96-843b-4ff1-a24d-c110964a63e4"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Your runtime has 27.3 gigabytes of available RAM\n",
+      "\n",
+      "You are using a high-RAM runtime!\n"
+     ]
+    }
+   ],
+   "source": [
+    "from psutil import virtual_memory\n",
+    "ram_gb = virtual_memory().total / 1e9\n",
+    "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
+    "\n",
+    "if ram_gb < 20:\n",
+    "  print('Not using a high-RAM runtime')\n",
+    "else:\n",
+    "  print('You are using a high-RAM runtime!')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "vL0S-s9Uofvn",
+    "outputId": "415b7bf1-d3fd-42b6-ee03-13601c953a4f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mounted at /content/drive\n"
+     ]
+    }
+   ],
+   "source": [
+    "from google.colab import drive\n",
+    "drive.mount('/content/drive')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "8hzEGHl7gmzk"
+   },
+   "source": [
+    "## Setup GPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "dPOU-Efhf4ui",
+    "outputId": "fc873e0c-1254-4928-c8e9-e3eb093acc64"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 1 GPU(s) available.\n",
+      "We will use the GPU: Tesla P100-PCIE-16GB\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# If there's a GPU available...\n",
+    "if torch.cuda.is_available():    \n",
+    "\n",
+    "    # Tell PyTorch to use the GPU.    \n",
+    "    device = torch.device(\"cuda\")\n",
+    "\n",
+    "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
+    "\n",
+    "    print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
+    "\n",
+    "# If not...\n",
+    "else:\n",
+    "    print('No GPU available, using the CPU instead.')\n",
+    "    device = torch.device(\"cpu\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Jr-S9yYIgGkA"
+   },
+   "source": [
+    "## Install packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "pwmZ5bBvgGNh",
+    "outputId": "e92404c6-af38-4bd8-8c99-20ec6b545b3f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting transformers==4.10.3\n",
+      "  Downloading transformers-4.10.3-py3-none-any.whl (2.8 MB)\n",
+      "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2.8 MB 5.0 MB/s \n",
+      "\u001b[?25hCollecting tokenizers<0.11,>=0.10.1\n",
+      "  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
+      "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3.3 MB 38.8 MB/s \n",
+      "\u001b[?25hCollecting pyyaml>=5.1\n",
+      "  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
+      "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 596 kB 58.6 MB/s \n",
+      "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (2019.12.20)\n",
+      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (4.62.3)\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (2.23.0)\n",
+      "Collecting huggingface-hub>=0.0.12\n",
+      "  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)\n",
+      "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 61 kB 486 kB/s \n",
+      "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (3.4.0)\n",
+      "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (1.19.5)\n",
+      "Collecting sacremoses\n",
+      "  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n",
+      "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 895 kB 43.3 MB/s \n",
+      "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (21.3)\n",
+      "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (4.8.2)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub>=0.0.12->transformers==4.10.3) (3.10.0.2)\n",
+      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers==4.10.3) (3.0.6)\n",
+      "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers==4.10.3) (3.6.0)\n",
+      "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (2.10)\n",
+      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (1.24.3)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (2021.10.8)\n",
+      "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (3.0.4)\n",
+      "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (7.1.2)\n",
+      "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (1.15.0)\n",
+      "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (1.1.0)\n",
+      "Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers\n",
+      "  Attempting uninstall: pyyaml\n",
+      "    Found existing installation: PyYAML 3.13\n",
+      "    Uninstalling PyYAML-3.13:\n",
+      "      Successfully uninstalled PyYAML-3.13\n",
+      "Successfully installed huggingface-hub-0.2.1 pyyaml-6.0 sacremoses-0.0.46 tokenizers-0.10.3 transformers-4.10.3\n",
+      "Collecting sentencepiece\n",
+      "  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
+      "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1.2 MB 5.1 MB/s \n",
+      "\u001b[?25hInstalling collected packages: sentencepiece\n",
+      "Successfully installed sentencepiece-0.1.96\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install transformers==4.10.3\n",
+    "!pip install sentencepiece"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "wSqbrupGMc1M"
+   },
+   "source": [
+    "## Import librairies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "SkErnwgMMbRj"
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd \n",
+    "import numpy as np\n",
+    "import csv\n",
+    "from sklearn import preprocessing\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import *\n",
+    "\n",
+    "from transformers import BertTokenizer, CamembertTokenizer, BertForSequenceClassification, AdamW, BertConfig, CamembertForSequenceClassification\n",
+    "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
+    "from transformers import get_linear_schedule_with_warmup\n",
+    "\n",
+    "import time\n",
+    "import datetime\n",
+    "\n",
+    "import random\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.metrics import plot_confusion_matrix\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.metrics import classification_report\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "12SA-qPFgsVo"
+   },
+   "source": [
+    "## Utils functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "WkIVcabUgxIl"
+   },
+   "outputs": [],
+   "source": [
+    "def create_dict(df, classColumnName):\n",
+    "  return dict(df[classColumnName].value_counts())\n",
+    "\n",
+    "\n",
+    "def remove_weak_classes(df, classColumnName, threshold):\n",
+    "  dictOfClassInstances = create_dict(df,classColumnName)\n",
+    "  dictionary = {k: v for k, v in dictOfClassInstances.items() if v >= threshold }\n",
+    "  keys = [*dictionary]\n",
+    "  df_tmp = df[~ df[classColumnName].isin(keys)]\n",
+    "  df =  pd.concat([df,df_tmp]).drop_duplicates(keep=False)\n",
+    "  return df\n",
+    "\n",
+    "\n",
+    "def resample_classes(df, classColumnName, numberOfInstances):\n",
+    "  #random numberOfInstances elements\n",
+    "  replace = False  # with replacement\n",
+    "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+    "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+    "    \n",
+    "\n",
+    "# Function to calculate the accuracy of our predictions vs labels\n",
+    "def flat_accuracy(preds, labels):\n",
+    "  pred_flat = np.argmax(preds, axis=1).flatten()\n",
+    "  labels_flat = labels.flatten()\n",
+    "  return np.sum(pred_flat == labels_flat) / len(labels_flat) \n",
+    "\n",
+    "\n",
+    "def format_time(elapsed):\n",
+    "  '''\n",
+    "  Takes a time in seconds and returns a string hh:mm:ss\n",
+    "  '''\n",
+    "  # Round to the nearest second.\n",
+    "  elapsed_rounded = int(round((elapsed)))\n",
+    "\n",
+    "  # Format as hh:mm:ss\n",
+    "  return str(datetime.timedelta(seconds=elapsed_rounded))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "c5QKcXulhNJ-"
+   },
+   "source": [
+    "## Load Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "jdCdUVOTZrqh"
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+    "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+    "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "9d1IxD_bLEvp"
+   },
+   "source": [
+    "## Parameters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "5u1acjunhoxe"
+   },
+   "outputs": [],
+   "source": [
+    "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+    "df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)\n",
+    "\n",
+    "df_validation = pd.read_csv(validation_path, sep=\"\\t\")\n",
+    "df_validation = resample_classes(df_validation, columnClass, maxOfInstancePerClass)\n",
+    "\n",
+    "#df_train = remove_weak_classes(df, columnClass, minOfInstancePerClass)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "zj3JDoJNfx1f",
+    "outputId": "59262e3f-5fe0-49f5-bb55-8586653498ab"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(30650, 13)\n",
+      "(10947, 13)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df_train.shape)\n",
+    "print(df_validation.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "zrjZvs2dhzAy"
+   },
+   "outputs": [],
+   "source": [
+    "y_train  = df_train[columnClass]\n",
+    "y_validation = df_validation[columnClass]\n",
+    "numberOfClasses = y_train.nunique()\n",
+    "\n",
+    "encoder = preprocessing.LabelEncoder()\n",
+    "\n",
+    "y_train = encoder.fit_transform(y_train)\n",
+    "y_validation = encoder.fit_transform(y_validation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "u9AxxaA_h1CM"
+   },
+   "outputs": [],
+   "source": [
+    "#train_x, test_x, train_y, test_y = train_test_split(df, y, test_size=0.33, random_state=42, stratify = y )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Xt_PhH_6h1_3"
+   },
+   "outputs": [],
+   "source": [
+    "sentences_train = df_train[columnText].values\n",
+    "labels_train = y_train.tolist()\n",
+    "\n",
+    "sentences_validation = df_validation[columnText].values\n",
+    "labels_validation = y_validation.tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "Dq_KF5WAsbpC",
+    "outputId": "ba91b953-abcb-4bed-a5c5-9e429e68239a"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([\"\\nESTAMPEUR, s. m. en , est une\\nsorte de pilon de bois, surmontÃ© d'un manche d'environ \\ndeux piÃ©s & demi. On s'en sert pour estamper\\nles formes oÃ¹ l'on veut faire des vergeoises. Voyez\\nVergeoise & Estamper.\\n\",\n",
+       "       \"\\nOn doit Ã©bourgeonner les vignes, alors ce mot doit\\ns'entendre autrement que pour les arbres fruitiers:\\non Ã©bourgeonne les vignes. non-seulement quand on\\nsupprime les bourgeons surnumÃ©raires, mais encore\\nquand on arrÃªte par-en-haut les bourgeons. Il en est\\nde mÃªme quand on dÃ©tache en cassant les faux bourgeons \\nqui poussent d'ordinaire Ã  chaque noeud Ã \\ncÃ´tÃ© des yeux, Ã  commencer par le bas. (K)\\n\",\n",
+       "       \"\\nBois mort en piÃ©, s'il est pourri sur piÃ©, sans\\nsubstance, & bon seulement Ã  brÃ»ler.\\n\",\n",
+       "       ...,\n",
+       "       \"\\nIl y a une hydatoscopie naturelle & permise ; elle\\nconsiste Ã  prÃ©voir & Ã  prÃ©dire les orages & les tempÃªtes \\nsur certains signes qu'on remarque dans la mer,\\ndans l'air, & dans les nuages. Voyez Tems & Ouragans. Dict. de TrÃ©voux.\\n\",\n",
+       "       \"\\nMÃ‰TÃ‰OROMANCIE, s.f. () divination par\\nles mÃ©tÃ©ores ; & comme les mÃ©tÃ©ores ignÃ©s sont ceux\\nqui jettent le plus de crainte parmi les hommes, la\\nmÃ©tÃ©oromancie dÃ©signe proprement la divination par\\nle tonnerre & les Ã©clairs. Cette espece de divination\\npassa des Toscans aux Romains, sons rien perdre de\\nce qu'elle avoit de frivole. Seneque nous apprend\\nque deux auteurs graves, & qui avoient exercÃ© des\\n\\nmagistratures, Ã©crivoient Ã  Rome sur cette matiere.\\nIl semble mÃªme que l'un d'eux l'Ã©puisa entierement,\\ncar il donnoit une liste exacte des diffÃ©rentes especes\\nde tonnerres. Il circonstancioit & leurs noms & les\\nprognostics qui s'en pouvoient tirer ; le tout avec un\\nair de confiance plus surprenant encore que les choses\\nqu'il rapportoit. On eÃ»t dit, tant cette matiere mÃ©tÃ©orologique lui Ã©toit familiere, qu'il comptoit les tableaux \\nde sa galerie, ou qu'il faisoit la description\\ndes fleurs de son jardin. La plus ancienne maladie,\\nla plus invÃ©tÃ©rÃ©e, la plus incurable du genre humain,\\nc'est l'envie de connoÃ®tre ce qui doit arriver.\\nNi le voile obscur qui nous cache notre destinÃ©e, ni\\nl'expÃ©rience journaliere, ni une infinitÃ© de tentatives \\nmalheureuses, n'ont pÃ» guerir les hommes. HÃ©!\\nse dÃ©prÃ©viennent-ils jamais d'une erreur agrÃ©ablement \\nreÃ§ue? Nous sommes sur ce point aussi crÃ©dules\\nque nos ancÃªtres ; nous prÃªtons comme eux l'oreille\\nÃ  toutes les impostures flatteuses. Pour avoir trompÃ©\\ncent fois, elles n'ont point perdu le droit funeste de\\ntromper encore. (D. J.)\\n\",\n",
+       "       \"\\nPENTACLE, s. m. () c'est le nom que la\\nmagie des exorcismes donne Ã  un sceau imprimÃ© ou\\nsur du parchemin vierge fait de peau de bouc, ou\\nsur quelque mÃ©tal, or, argent, cuivre, Ã©tain, plomb,\\n&c. On ne peut faire aucune opÃ©ration magique pour\\nexorciser les esprits, sans avoir ce sceau qui contient\\nles noms de Dieu. Le pentacle se fait en renfermant\\nun triangle dans deux cercles : on lit dans ce triangle \\nces trois mots ; formatio, reformatio, transformatio. A cÃ´tÃ© du triangle est le mot agla, qui est trÃ¨s puissant \\npour arrÃªter la malice des esprits. Il faut que\\nla peau sur laquelle on applique le sceau soit exorcisÃ©e \\n& bÃ©nite. On exorcise aussi l'encre & la plume,\\ndont on se sert pour Ã©crire les mots dont on vient de\\nparler. AprÃ¨s cela on encense le pentacle ; on l'enferme \\ntrois jours & trois nuits dans un vase bien net ;\\nenfin, on le met dans un linge ou dans un livre que\\nl'on parfume & que l'on exorcise. VoilÃ  les fadaises\\nqu'on lit dans le livre intitulÃ© Encheiridion Leonis papae, ouvrage misÃ©rable, qui n'a servi qu'Ã  gÃ¢ter davantage \\nles esprits crÃ©dules & portÃ©s Ã  la superstitition.\\n(D. J.)\\n\"],\n",
+       "      dtype=object)"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sentences_train"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Gs4Agx_5h43M"
+   },
+   "source": [
+    "# Model\n",
+    "## Tokenisation & Input Formatting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "YZ5PhEYZiCEA"
+   },
+   "outputs": [],
+   "source": [
+    "if model_chosen == \"bert\":\n",
+    "  tokeniser_bert = 'bert-base-multilingual-cased'\n",
+    "  model_bert =  \"bert-base-multilingual-cased\"\n",
+    "elif model_chosen == \"camembert\":\n",
+    "  tokeniser_bert = 'camembert-base'\n",
+    "  model_bert = 'camembert-base'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 131,
+     "referenced_widgets": [
+      "06c6e7721b68449a9f3619ffdf18dfeb",
+      "5ec6a851b16c4339b51acb6129935f13",
+      "fd39a852133144e2b4aed474b204451f",
+      "0143df420df444e9aac5c8b39c342021",
+      "c61b6474b55948cb91a598e6b9aa10d2",
+      "a0d9ceaa8d3a4876ae65d877687bcf50",
+      "aa6ea92757df47eda1e41603cb109e79",
+      "41558bfcc0464711916c2d96337bef66",
+      "fdf05cea504c42f793f9c06e58ef995b",
+      "044fc1f96f8347ddb4a79d31edf32174",
+      "cf0d3320e06546789b5d5a2021dbc3ad",
+      "fba1d1d5c83b40659295a3457d74cb4e",
+      "f7224a1b831d459594852eece9f05543",
+      "185ae5ef7be646b797467086ad7d3a82",
+      "3ceaa994a3814d3c85e2051e37397342",
+      "e674e279b13b41fda3df3a6c89f5fcb1",
+      "3203783f58e54b0e856ab84503bf0d3c",
+      "0214f74b229a4232a9edf3cab751b90d",
+      "152afcb9245c416fae0fde257fa25e2e",
+      "fb3a174c597b47c7a527517004ba5f54",
+      "75073a0f673345728871dfb0346e7c1b",
+      "db8c94b4ed724f859d1ae8c153b01110",
+      "6a29c1c28ceb415f91ec55512da981c5",
+      "5879fadf430646f6af41b1a9b14864ff",
+      "340241453dab4db88043d372aaa88c2e",
+      "27e18e1fa3884c0fb0339764e0397990",
+      "2af1124092684f8bafab311cbe9bf22c",
+      "95a3332ba4634d1c930a7021eacce230",
+      "d53488432f8544de863210d9e8ee4e48",
+      "4422e64029184ba4ba30eecfdf2b4306",
+      "1d97e83c703f4071b9176ba7bf57cddf",
+      "17bf94188b844f649642d9c6e6a20373",
+      "d3aaecd7a6e34cc8918a689ac6299746"
+     ]
+    },
+    "id": "C4bigx_3ibuN",
+    "outputId": "b8cef3f8-7a6c-47d1-9d37-7b3b6d08f00b"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading CamemBERT tokenizer...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "06c6e7721b68449a9f3619ffdf18dfeb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/811k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fba1d1d5c83b40659295a3457d74cb4e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/1.40M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6a29c1c28ceb415f91ec55512da981c5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/508 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Load the BERT tokenizer.\n",
+    "if model_chosen == \"bert\":\n",
+    "  print('Loading BERT tokenizer...')\n",
+    "  tokenizer = BertTokenizer.from_pretrained(tokeniser_bert)\n",
+    "elif model_chosen == \"camembert\":\n",
+    "  print('Loading CamemBERT tokenizer...')\n",
+    "  tokenizer = CamembertTokenizer.from_pretrained(tokeniser_bert)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "5hNod5X9jDZN",
+    "outputId": "93b6e633-afb7-4bcc-be00-44388f801d64"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (1263 > 512). Running this sequence through the model will result in indexing errors\n"
+     ]
+    }
+   ],
+   "source": [
+    " # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+    "input_ids_train = []\n",
+    "\n",
+    "# For every sentence...\n",
+    "for sent in sentences_train:\n",
+    "    # `encode` will:\n",
+    "    #   (1) Tokenize the sentence.\n",
+    "    #   (2) Prepend the `[CLS]` token to the start.\n",
+    "    #   (3) Append the `[SEP]` token to the end.\n",
+    "    #   (4) Map tokens to their IDs.\n",
+    "    encoded_sent_train = tokenizer.encode(\n",
+    "                        str(sent),                      # Sentence to encode.\n",
+    "                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+    "\n",
+    "                        # This function also supports truncation and conversion\n",
+    "                        # to pytorch tensors, but I need to do padding, so I\n",
+    "                        # can't use these features.\n",
+    "                        #max_length = 128,          # Truncate all sentences.\n",
+    "                        #return_tensors = 'pt',     # Return pytorch tensors.\n",
+    "                   )\n",
+    "    \n",
+    "    # Add the encoded sentence to the list.\n",
+    "    input_ids_train.append(encoded_sent_train)\n",
+    "\n",
+    "input_ids_validation = []\n",
+    "for sent in sentences_validation:\n",
+    "    # `encode` will:\n",
+    "    #   (1) Tokenize the sentence.\n",
+    "    #   (2) Prepend the `[CLS]` token to the start.\n",
+    "    #   (3) Append the `[SEP]` token to the end.\n",
+    "    #   (4) Map tokens to their IDs.\n",
+    "    encoded_sent_validation = tokenizer.encode(\n",
+    "                        str(sent),                      # Sentence to encode.\n",
+    "                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+    "\n",
+    "                        # This function also supports truncation and conversion\n",
+    "                        # to pytorch tensors, but I need to do padding, so I\n",
+    "                        # can't use these features.\n",
+    "                        #max_length = 128,          # Truncate all sentences.\n",
+    "                        #return_tensors = 'pt',     # Return pytorch tensors.\n",
+    "                   )\n",
+    "    \n",
+    "    # Add the encoded sentence to the list.\n",
+    "    input_ids_validation.append(encoded_sent_validation)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "W9EWv5JvjGH3",
+    "outputId": "32cd417d-9a40-4086-d900-b81982407667"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Max sentence length train:  2253\n",
+      "Max sentence length validation:  3067\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('Max sentence length train: ', max([len(sen) for sen in input_ids_train]))\n",
+    "print('Max sentence length validation: ', max([len(sen) for sen in input_ids_validation])) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "xh1TQJyvjOx5"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "padded_train = []\n",
+    "for i in input_ids_train:\n",
+    "\n",
+    "  if len(i) > max_len:\n",
+    "    padded_train.extend([i[:max_len]])\n",
+    "  else:\n",
+    "    padded_train.extend([i + [0] * (max_len - len(i))])\n",
+    "\n",
+    "\n",
+    "padded_train = input_ids_train = np.array(padded_train)\n",
+    "\n",
+    "\n",
+    "padded_validation = []\n",
+    "for i in input_ids_validation:\n",
+    "\n",
+    "  if len(i) > max_len:\n",
+    "    padded_validation.extend([i[:max_len]])\n",
+    "  else:\n",
+    "    padded_validation.extend([i + [0] * (max_len - len(i))])\n",
+    "\n",
+    "\n",
+    "padded_validation = input_ids_train = np.array(padded_validation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ZiwY6gn0jUkD"
+   },
+   "outputs": [],
+   "source": [
+    " # Create attention masks\n",
+    "attention_masks_train = []\n",
+    "\n",
+    "# For each sentence...\n",
+    "for sent in padded_train:\n",
+    "    \n",
+    "    # Create the attention mask.\n",
+    "    #   - If a token ID is 0, then it's padding, set the mask to 0.\n",
+    "    #   - If a token ID is > 0, then it's a real token, set the mask to 1.\n",
+    "    att_mask = [int(token_id > 0) for token_id in sent]\n",
+    "    \n",
+    "    # Store the attention mask for this sentence.\n",
+    "    attention_masks_train.append(att_mask)\n",
+    "\n",
+    "\n",
+    "attention_masks_validation = []\n",
+    "\n",
+    "# For each sentence...\n",
+    "for sent in padded_validation:\n",
+    "    \n",
+    "    # Create the attention mask.\n",
+    "    #   - If a token ID is 0, then it's padding, set the mask to 0.\n",
+    "    #   - If a token ID is > 0, then it's a real token, set the mask to 1.\n",
+    "    att_mask = [int(token_id > 0) for token_id in sent]\n",
+    "    \n",
+    "    # Store the attention mask for this sentence.\n",
+    "    attention_masks_validation.append(att_mask)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "oBTR5AfAjXJe"
+   },
+   "outputs": [],
+   "source": [
+    "# Use 70% for training and 30% for validation.\n",
+    "#train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(padded, labels, \n",
+    "#                                                            random_state=2018, test_size=0.3, stratify = labels)\n",
+    "# Do the same for the masks.\n",
+    "#train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,\n",
+    "#                                             random_state=2018, test_size=0.3, stratify = labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "b9Mw5kq3jhTb"
+   },
+   "outputs": [],
+   "source": [
+    "# Convert all inputs and labels into torch tensors, the required datatype \n",
+    "# for my model.\n",
+    "train_inputs = torch.tensor(padded_train)\n",
+    "validation_inputs = torch.tensor(padded_validation)\n",
+    "\n",
+    "train_labels = torch.tensor(labels_train)\n",
+    "validation_labels = torch.tensor(labels_validation)\n",
+    "\n",
+    "train_masks = torch.tensor(attention_masks_train)\n",
+    "validation_masks = torch.tensor(attention_masks_validation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "UfFWzbENjnkw"
+   },
+   "outputs": [],
+   "source": [
+    "# The DataLoader needs to know the batch size for training, so I specify it here.\n",
+    "# For fine-tuning BERT on a specific task, the authors recommend a batch size of\n",
+    "# 16 or 32.\n",
+    "\n",
+    "# Create the DataLoader for training set.\n",
+    "train_data = TensorDataset(train_inputs, train_masks, train_labels)\n",
+    "train_sampler = RandomSampler(train_data)\n",
+    "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n",
+    "\n",
+    "# Create the DataLoader for validation set.\n",
+    "validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)\n",
+    "validation_sampler = SequentialSampler(validation_data)\n",
+    "validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "x45JNGqhkUn2"
+   },
+   "source": [
+    "## Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000,
+     "referenced_widgets": [
+      "4873cc6c9e1d493c9a67d6536e4367a6",
+      "12aa3280d3284c07ac12e2fe842b40b0",
+      "1bcdb04d16dd4f9e9d86938e1d2def02",
+      "b5f86071b23c40bf9c96f74c613c2729",
+      "27a20a17123744948e0c1dbf49b51b27",
+      "f470af786c1c4d049de4f0a7f373379f",
+      "00bd66a81aad4cd7a10df4a67b52b14e",
+      "a5efb634a95c42a7abfaaf61e1c2c928",
+      "600e627de1f0403595f701381dc3b164",
+      "f3b7527bd4d04c81936d8392decee3ac",
+      "885f91c34b9c422889df8b556aad8ec0"
+     ]
+    },
+    "id": "C7M2Er1ajsTf",
+    "outputId": "2c3f467d-ab09-4f8f-d464-a4e738333587"
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4873cc6c9e1d493c9a67d6536e4367a6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/445M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at camembert-base were not used when initializing CamembertForSequenceClassification: ['lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']\n",
+      "- This IS expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at camembert-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CamembertForSequenceClassification(\n",
+       "  (roberta): RobertaModel(\n",
+       "    (embeddings): RobertaEmbeddings(\n",
+       "      (word_embeddings): Embedding(32005, 768, padding_idx=1)\n",
+       "      (position_embeddings): Embedding(514, 768, padding_idx=1)\n",
+       "      (token_type_embeddings): Embedding(1, 768)\n",
+       "      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "      (dropout): Dropout(p=0.1, inplace=False)\n",
+       "    )\n",
+       "    (encoder): RobertaEncoder(\n",
+       "      (layer): ModuleList(\n",
+       "        (0): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (1): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (2): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (3): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (4): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (5): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (6): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (7): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (8): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (9): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (10): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (11): RobertaLayer(\n",
+       "          (attention): RobertaAttention(\n",
+       "            (self): RobertaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): RobertaSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): RobertaIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "          )\n",
+       "          (output): RobertaOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "  )\n",
+       "  (classifier): RobertaClassificationHead(\n",
+       "    (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "    (dropout): Dropout(p=0.1, inplace=False)\n",
+       "    (out_proj): Linear(in_features=768, out_features=38, bias=True)\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load BertForSequenceClassification, the pretrained BERT model with a single \n",
+    "# linear classification layer on top.\n",
+    "\n",
+    "#model = CamembertForSequenceClassification.from_pretrained(\n",
+    "if model_chosen == \"bert\":\n",
+    "  model = BertForSequenceClassification.from_pretrained(\n",
+    "      model_bert, # Use the 12-layer BERT model, with an uncased vocab.\n",
+    "      num_labels = numberOfClasses, # The number of output labels--2 for binary classification.\n",
+    "                      # You can increase this for multi-class tasks.   \n",
+    "      output_attentions = False, # Whether the model returns attentions weights.\n",
+    "      output_hidden_states = False, # Whether the model returns all hidden-states.\n",
+    "  )\n",
+    "elif model_chosen == \"camembert\":\n",
+    "  model = CamembertForSequenceClassification.from_pretrained(\n",
+    "      model_bert, # Use the 12-layer BERT model, with an uncased vocab.\n",
+    "      num_labels = numberOfClasses, # The number of output labels--2 for binary classification.\n",
+    "                      # You can increase this for multi-class tasks.   \n",
+    "      output_attentions = False, # Whether the model returns attentions weights.\n",
+    "      output_hidden_states = False, # Whether the model returns all hidden-states.\n",
+    "  )\n",
+    "\n",
+    "# Tell pytorch to run this model on the GPU.\n",
+    "model.cuda()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "xd_cG-8pj4Iw"
+   },
+   "outputs": [],
+   "source": [
+    "#Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n",
+    "# I believe the 'W' stands for 'Weight Decay fix\"\n",
+    "optimizer = AdamW(model.parameters(),\n",
+    "                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
+    "                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n",
+    "                )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "65G-uHuLj4_6"
+   },
+   "outputs": [],
+   "source": [
+    "# Number of training epochs (authors recommend between 2 and 4)\n",
+    "epochs = 4\n",
+    "\n",
+    "# Total number of training steps is number of batches * number of epochs.\n",
+    "total_steps = len(train_dataloader) * epochs\n",
+    "\n",
+    "# Create the learning rate scheduler.\n",
+    "scheduler = get_linear_schedule_with_warmup(optimizer, \n",
+    "                                            num_warmup_steps = 0, # Default value in run_glue.py\n",
+    "                                            num_training_steps = total_steps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "background_save": true,
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "SbHBbYpwkKaA",
+    "outputId": "49f7f5f4-716d-44c2-e299-505086a89061"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "======== Epoch 1 / 4 ========\n",
+      "Training...\n",
+      "  Batch    40  of  2,642.    Elapsed: 0:00:18.\n",
+      "  Batch    80  of  2,642.    Elapsed: 0:00:36.\n",
+      "  Batch   120  of  2,642.    Elapsed: 0:00:55.\n",
+      "  Batch   160  of  2,642.    Elapsed: 0:01:13.\n",
+      "  Batch   200  of  2,642.    Elapsed: 0:01:31.\n",
+      "  Batch   240  of  2,642.    Elapsed: 0:01:49.\n",
+      "  Batch   280  of  2,642.    Elapsed: 0:02:08.\n",
+      "  Batch   320  of  2,642.    Elapsed: 0:02:26.\n",
+      "  Batch   360  of  2,642.    Elapsed: 0:02:44.\n",
+      "  Batch   400  of  2,642.    Elapsed: 0:03:02.\n",
+      "  Batch   440  of  2,642.    Elapsed: 0:03:20.\n",
+      "  Batch   480  of  2,642.    Elapsed: 0:03:39.\n",
+      "  Batch   520  of  2,642.    Elapsed: 0:03:57.\n",
+      "  Batch   560  of  2,642.    Elapsed: 0:04:15.\n",
+      "  Batch   600  of  2,642.    Elapsed: 0:04:33.\n",
+      "  Batch   640  of  2,642.    Elapsed: 0:04:51.\n",
+      "  Batch   680  of  2,642.    Elapsed: 0:05:10.\n",
+      "  Batch   720  of  2,642.    Elapsed: 0:05:28.\n",
+      "  Batch   760  of  2,642.    Elapsed: 0:05:46.\n",
+      "  Batch   800  of  2,642.    Elapsed: 0:06:04.\n",
+      "  Batch   840  of  2,642.    Elapsed: 0:06:22.\n",
+      "  Batch   880  of  2,642.    Elapsed: 0:06:41.\n",
+      "  Batch   920  of  2,642.    Elapsed: 0:06:59.\n",
+      "  Batch   960  of  2,642.    Elapsed: 0:07:17.\n",
+      "  Batch 1,000  of  2,642.    Elapsed: 0:07:35.\n",
+      "  Batch 1,040  of  2,642.    Elapsed: 0:07:54.\n",
+      "  Batch 1,080  of  2,642.    Elapsed: 0:08:12.\n",
+      "  Batch 1,120  of  2,642.    Elapsed: 0:08:30.\n",
+      "  Batch 1,160  of  2,642.    Elapsed: 0:08:48.\n",
+      "  Batch 1,200  of  2,642.    Elapsed: 0:09:06.\n",
+      "  Batch 1,240  of  2,642.    Elapsed: 0:09:25.\n",
+      "  Batch 1,280  of  2,642.    Elapsed: 0:09:43.\n",
+      "  Batch 1,320  of  2,642.    Elapsed: 0:10:01.\n",
+      "  Batch 1,360  of  2,642.    Elapsed: 0:10:19.\n",
+      "  Batch 1,400  of  2,642.    Elapsed: 0:10:37.\n",
+      "  Batch 1,440  of  2,642.    Elapsed: 0:10:56.\n",
+      "  Batch 1,480  of  2,642.    Elapsed: 0:11:14.\n",
+      "  Batch 1,520  of  2,642.    Elapsed: 0:11:32.\n",
+      "  Batch 1,560  of  2,642.    Elapsed: 0:11:50.\n",
+      "  Batch 1,600  of  2,642.    Elapsed: 0:12:08.\n",
+      "  Batch 1,640  of  2,642.    Elapsed: 0:12:27.\n",
+      "  Batch 1,680  of  2,642.    Elapsed: 0:12:45.\n",
+      "  Batch 1,720  of  2,642.    Elapsed: 0:13:03.\n",
+      "  Batch 1,760  of  2,642.    Elapsed: 0:13:21.\n",
+      "  Batch 1,800  of  2,642.    Elapsed: 0:13:39.\n",
+      "  Batch 1,840  of  2,642.    Elapsed: 0:13:58.\n",
+      "  Batch 1,880  of  2,642.    Elapsed: 0:14:16.\n",
+      "  Batch 1,920  of  2,642.    Elapsed: 0:14:34.\n",
+      "  Batch 1,960  of  2,642.    Elapsed: 0:14:52.\n",
+      "  Batch 2,000  of  2,642.    Elapsed: 0:15:11.\n",
+      "  Batch 2,040  of  2,642.    Elapsed: 0:15:29.\n",
+      "  Batch 2,080  of  2,642.    Elapsed: 0:15:47.\n",
+      "  Batch 2,120  of  2,642.    Elapsed: 0:16:05.\n",
+      "  Batch 2,160  of  2,642.    Elapsed: 0:16:23.\n",
+      "  Batch 2,200  of  2,642.    Elapsed: 0:16:42.\n",
+      "  Batch 2,240  of  2,642.    Elapsed: 0:17:00.\n",
+      "  Batch 2,280  of  2,642.    Elapsed: 0:17:18.\n",
+      "  Batch 2,320  of  2,642.    Elapsed: 0:17:36.\n",
+      "  Batch 2,360  of  2,642.    Elapsed: 0:17:54.\n",
+      "  Batch 2,400  of  2,642.    Elapsed: 0:18:13.\n",
+      "  Batch 2,440  of  2,642.    Elapsed: 0:18:31.\n",
+      "  Batch 2,480  of  2,642.    Elapsed: 0:18:49.\n",
+      "  Batch 2,520  of  2,642.    Elapsed: 0:19:07.\n",
+      "  Batch 2,560  of  2,642.    Elapsed: 0:19:26.\n",
+      "  Batch 2,600  of  2,642.    Elapsed: 0:19:44.\n",
+      "  Batch 2,640  of  2,642.    Elapsed: 0:20:02.\n",
+      "\n",
+      "  Average training loss: 2.04\n",
+      "  Training epoch took: 0:20:03\n",
+      "\n",
+      "Running Validation...\n",
+      "  Accuracy: 0.75\n",
+      "  Validation took: 0:03:09\n",
+      "\n",
+      "======== Epoch 2 / 4 ========\n",
+      "Training...\n",
+      "  Batch    40  of  2,642.    Elapsed: 0:00:18.\n",
+      "  Batch    80  of  2,642.    Elapsed: 0:00:36.\n",
+      "  Batch   120  of  2,642.    Elapsed: 0:00:55.\n",
+      "  Batch   160  of  2,642.    Elapsed: 0:01:13.\n",
+      "  Batch   200  of  2,642.    Elapsed: 0:01:31.\n",
+      "  Batch   240  of  2,642.    Elapsed: 0:01:49.\n",
+      "  Batch   280  of  2,642.    Elapsed: 0:02:07.\n",
+      "  Batch   320  of  2,642.    Elapsed: 0:02:26.\n",
+      "  Batch   360  of  2,642.    Elapsed: 0:02:44.\n",
+      "  Batch   400  of  2,642.    Elapsed: 0:03:02.\n",
+      "  Batch   440  of  2,642.    Elapsed: 0:03:20.\n",
+      "  Batch   480  of  2,642.    Elapsed: 0:03:38.\n",
+      "  Batch   520  of  2,642.    Elapsed: 0:03:57.\n",
+      "  Batch   560  of  2,642.    Elapsed: 0:04:15.\n",
+      "  Batch   600  of  2,642.    Elapsed: 0:04:33.\n",
+      "  Batch   640  of  2,642.    Elapsed: 0:04:51.\n",
+      "  Batch   680  of  2,642.    Elapsed: 0:05:10.\n",
+      "  Batch   720  of  2,642.    Elapsed: 0:05:28.\n",
+      "  Batch   760  of  2,642.    Elapsed: 0:05:46.\n",
+      "  Batch   800  of  2,642.    Elapsed: 0:06:04.\n",
+      "  Batch   840  of  2,642.    Elapsed: 0:06:22.\n",
+      "  Batch   880  of  2,642.    Elapsed: 0:06:41.\n",
+      "  Batch   920  of  2,642.    Elapsed: 0:06:59.\n",
+      "  Batch   960  of  2,642.    Elapsed: 0:07:17.\n",
+      "  Batch 1,000  of  2,642.    Elapsed: 0:07:35.\n",
+      "  Batch 1,040  of  2,642.    Elapsed: 0:07:53.\n",
+      "  Batch 1,080  of  2,642.    Elapsed: 0:08:12.\n",
+      "  Batch 1,120  of  2,642.    Elapsed: 0:08:30.\n",
+      "  Batch 1,160  of  2,642.    Elapsed: 0:08:48.\n",
+      "  Batch 1,200  of  2,642.    Elapsed: 0:09:06.\n",
+      "  Batch 1,240  of  2,642.    Elapsed: 0:09:24.\n",
+      "  Batch 1,280  of  2,642.    Elapsed: 0:09:43.\n",
+      "  Batch 1,320  of  2,642.    Elapsed: 0:10:01.\n",
+      "  Batch 1,360  of  2,642.    Elapsed: 0:10:19.\n",
+      "  Batch 1,400  of  2,642.    Elapsed: 0:10:37.\n",
+      "  Batch 1,440  of  2,642.    Elapsed: 0:10:55.\n",
+      "  Batch 1,480  of  2,642.    Elapsed: 0:11:14.\n",
+      "  Batch 1,520  of  2,642.    Elapsed: 0:11:32.\n",
+      "  Batch 1,560  of  2,642.    Elapsed: 0:11:50.\n",
+      "  Batch 1,600  of  2,642.    Elapsed: 0:12:08.\n",
+      "  Batch 1,640  of  2,642.    Elapsed: 0:12:27.\n",
+      "  Batch 1,680  of  2,642.    Elapsed: 0:12:45.\n",
+      "  Batch 1,720  of  2,642.    Elapsed: 0:13:03.\n",
+      "  Batch 1,760  of  2,642.    Elapsed: 0:13:21.\n",
+      "  Batch 1,800  of  2,642.    Elapsed: 0:13:39.\n",
+      "  Batch 1,840  of  2,642.    Elapsed: 0:13:58.\n",
+      "  Batch 1,880  of  2,642.    Elapsed: 0:14:16.\n",
+      "  Batch 1,920  of  2,642.    Elapsed: 0:14:34.\n",
+      "  Batch 1,960  of  2,642.    Elapsed: 0:14:52.\n",
+      "  Batch 2,000  of  2,642.    Elapsed: 0:15:10.\n",
+      "  Batch 2,040  of  2,642.    Elapsed: 0:15:29.\n",
+      "  Batch 2,080  of  2,642.    Elapsed: 0:15:47.\n",
+      "  Batch 2,120  of  2,642.    Elapsed: 0:16:05.\n",
+      "  Batch 2,160  of  2,642.    Elapsed: 0:16:23.\n",
+      "  Batch 2,200  of  2,642.    Elapsed: 0:16:41.\n",
+      "  Batch 2,240  of  2,642.    Elapsed: 0:17:00.\n",
+      "  Batch 2,280  of  2,642.    Elapsed: 0:17:18.\n",
+      "  Batch 2,320  of  2,642.    Elapsed: 0:17:36.\n",
+      "  Batch 2,360  of  2,642.    Elapsed: 0:17:54.\n",
+      "  Batch 2,400  of  2,642.    Elapsed: 0:18:12.\n",
+      "  Batch 2,440  of  2,642.    Elapsed: 0:18:31.\n",
+      "  Batch 2,480  of  2,642.    Elapsed: 0:18:49.\n",
+      "  Batch 2,520  of  2,642.    Elapsed: 0:19:07.\n",
+      "  Batch 2,560  of  2,642.    Elapsed: 0:19:25.\n",
+      "  Batch 2,600  of  2,642.    Elapsed: 0:19:44.\n",
+      "  Batch 2,640  of  2,642.    Elapsed: 0:20:02.\n",
+      "\n",
+      "  Average training loss: 1.03\n",
+      "  Training epoch took: 0:20:02\n",
+      "\n",
+      "Running Validation...\n",
+      "  Accuracy: 0.79\n",
+      "  Validation took: 0:03:09\n",
+      "\n",
+      "======== Epoch 3 / 4 ========\n",
+      "Training...\n",
+      "  Batch    40  of  2,642.    Elapsed: 0:00:18.\n",
+      "  Batch    80  of  2,642.    Elapsed: 0:00:36.\n",
+      "  Batch   120  of  2,642.    Elapsed: 0:00:55.\n",
+      "  Batch   160  of  2,642.    Elapsed: 0:01:13.\n",
+      "  Batch   200  of  2,642.    Elapsed: 0:01:31.\n",
+      "  Batch   240  of  2,642.    Elapsed: 0:01:49.\n",
+      "  Batch   280  of  2,642.    Elapsed: 0:02:07.\n",
+      "  Batch   320  of  2,642.    Elapsed: 0:02:26.\n",
+      "  Batch   360  of  2,642.    Elapsed: 0:02:44.\n",
+      "  Batch   400  of  2,642.    Elapsed: 0:03:02.\n",
+      "  Batch   440  of  2,642.    Elapsed: 0:03:20.\n",
+      "  Batch   480  of  2,642.    Elapsed: 0:03:38.\n",
+      "  Batch   520  of  2,642.    Elapsed: 0:03:57.\n",
+      "  Batch   560  of  2,642.    Elapsed: 0:04:15.\n",
+      "  Batch   600  of  2,642.    Elapsed: 0:04:33.\n",
+      "  Batch   640  of  2,642.    Elapsed: 0:04:51.\n",
+      "  Batch   680  of  2,642.    Elapsed: 0:05:09.\n",
+      "  Batch   720  of  2,642.    Elapsed: 0:05:28.\n",
+      "  Batch   760  of  2,642.    Elapsed: 0:05:46.\n",
+      "  Batch   800  of  2,642.    Elapsed: 0:06:04.\n",
+      "  Batch   840  of  2,642.    Elapsed: 0:06:22.\n",
+      "  Batch   880  of  2,642.    Elapsed: 0:06:41.\n",
+      "  Batch   920  of  2,642.    Elapsed: 0:06:59.\n",
+      "  Batch   960  of  2,642.    Elapsed: 0:07:17.\n",
+      "  Batch 1,000  of  2,642.    Elapsed: 0:07:35.\n",
+      "  Batch 1,040  of  2,642.    Elapsed: 0:07:53.\n",
+      "  Batch 1,080  of  2,642.    Elapsed: 0:08:12.\n",
+      "  Batch 1,120  of  2,642.    Elapsed: 0:08:30.\n",
+      "  Batch 1,160  of  2,642.    Elapsed: 0:08:48.\n",
+      "  Batch 1,200  of  2,642.    Elapsed: 0:09:06.\n",
+      "  Batch 1,240  of  2,642.    Elapsed: 0:09:24.\n",
+      "  Batch 1,280  of  2,642.    Elapsed: 0:09:43.\n",
+      "  Batch 1,320  of  2,642.    Elapsed: 0:10:01.\n",
+      "  Batch 1,360  of  2,642.    Elapsed: 0:10:19.\n",
+      "  Batch 1,400  of  2,642.    Elapsed: 0:10:37.\n",
+      "  Batch 1,440  of  2,642.    Elapsed: 0:10:55.\n",
+      "  Batch 1,480  of  2,642.    Elapsed: 0:11:14.\n",
+      "  Batch 1,520  of  2,642.    Elapsed: 0:11:32.\n",
+      "  Batch 1,560  of  2,642.    Elapsed: 0:11:50.\n",
+      "  Batch 1,600  of  2,642.    Elapsed: 0:12:08.\n",
+      "  Batch 1,640  of  2,642.    Elapsed: 0:12:26.\n",
+      "  Batch 1,680  of  2,642.    Elapsed: 0:12:45.\n",
+      "  Batch 1,720  of  2,642.    Elapsed: 0:13:03.\n",
+      "  Batch 1,760  of  2,642.    Elapsed: 0:13:21.\n",
+      "  Batch 1,800  of  2,642.    Elapsed: 0:13:39.\n",
+      "  Batch 1,840  of  2,642.    Elapsed: 0:13:57.\n",
+      "  Batch 1,880  of  2,642.    Elapsed: 0:14:16.\n",
+      "  Batch 1,920  of  2,642.    Elapsed: 0:14:34.\n",
+      "  Batch 1,960  of  2,642.    Elapsed: 0:14:52.\n",
+      "  Batch 2,000  of  2,642.    Elapsed: 0:15:10.\n",
+      "  Batch 2,040  of  2,642.    Elapsed: 0:15:28.\n",
+      "  Batch 2,080  of  2,642.    Elapsed: 0:15:47.\n",
+      "  Batch 2,120  of  2,642.    Elapsed: 0:16:05.\n",
+      "  Batch 2,160  of  2,642.    Elapsed: 0:16:23.\n",
+      "  Batch 2,200  of  2,642.    Elapsed: 0:16:41.\n",
+      "  Batch 2,240  of  2,642.    Elapsed: 0:17:00.\n",
+      "  Batch 2,280  of  2,642.    Elapsed: 0:17:18.\n",
+      "  Batch 2,320  of  2,642.    Elapsed: 0:17:36.\n",
+      "  Batch 2,360  of  2,642.    Elapsed: 0:17:54.\n",
+      "  Batch 2,400  of  2,642.    Elapsed: 0:18:12.\n",
+      "  Batch 2,440  of  2,642.    Elapsed: 0:18:31.\n",
+      "  Batch 2,480  of  2,642.    Elapsed: 0:18:49.\n",
+      "  Batch 2,520  of  2,642.    Elapsed: 0:19:07.\n",
+      "  Batch 2,560  of  2,642.    Elapsed: 0:19:25.\n",
+      "  Batch 2,600  of  2,642.    Elapsed: 0:19:43.\n",
+      "  Batch 2,640  of  2,642.    Elapsed: 0:20:02.\n",
+      "\n",
+      "  Average training loss: 0.75\n",
+      "  Training epoch took: 0:20:02\n",
+      "\n",
+      "Running Validation...\n",
+      "  Accuracy: 0.79\n",
+      "  Validation took: 0:03:09\n",
+      "\n",
+      "======== Epoch 4 / 4 ========\n",
+      "Training...\n",
+      "  Batch    40  of  2,642.    Elapsed: 0:00:18.\n",
+      "  Batch    80  of  2,642.    Elapsed: 0:00:36.\n",
+      "  Batch   120  of  2,642.    Elapsed: 0:00:55.\n",
+      "  Batch   160  of  2,642.    Elapsed: 0:01:13.\n",
+      "  Batch   200  of  2,642.    Elapsed: 0:01:31.\n",
+      "  Batch   240  of  2,642.    Elapsed: 0:01:49.\n",
+      "  Batch   280  of  2,642.    Elapsed: 0:02:07.\n",
+      "  Batch   320  of  2,642.    Elapsed: 0:02:26.\n",
+      "  Batch   360  of  2,642.    Elapsed: 0:02:44.\n",
+      "  Batch   400  of  2,642.    Elapsed: 0:03:02.\n",
+      "  Batch   440  of  2,642.    Elapsed: 0:03:20.\n",
+      "  Batch   480  of  2,642.    Elapsed: 0:03:39.\n",
+      "  Batch   520  of  2,642.    Elapsed: 0:03:57.\n",
+      "  Batch   560  of  2,642.    Elapsed: 0:04:15.\n",
+      "  Batch   600  of  2,642.    Elapsed: 0:04:33.\n",
+      "  Batch   640  of  2,642.    Elapsed: 0:04:51.\n",
+      "  Batch   680  of  2,642.    Elapsed: 0:05:10.\n",
+      "  Batch   720  of  2,642.    Elapsed: 0:05:28.\n",
+      "  Batch   760  of  2,642.    Elapsed: 0:05:46.\n",
+      "  Batch   800  of  2,642.    Elapsed: 0:06:04.\n",
+      "  Batch   840  of  2,642.    Elapsed: 0:06:22.\n",
+      "  Batch   880  of  2,642.    Elapsed: 0:06:41.\n",
+      "  Batch   920  of  2,642.    Elapsed: 0:06:59.\n",
+      "  Batch   960  of  2,642.    Elapsed: 0:07:17.\n",
+      "  Batch 1,000  of  2,642.    Elapsed: 0:07:35.\n",
+      "  Batch 1,040  of  2,642.    Elapsed: 0:07:53.\n",
+      "  Batch 1,080  of  2,642.    Elapsed: 0:08:12.\n",
+      "  Batch 1,120  of  2,642.    Elapsed: 0:08:30.\n",
+      "  Batch 1,160  of  2,642.    Elapsed: 0:08:48.\n",
+      "  Batch 1,200  of  2,642.    Elapsed: 0:09:06.\n",
+      "  Batch 1,240  of  2,642.    Elapsed: 0:09:24.\n",
+      "  Batch 1,280  of  2,642.    Elapsed: 0:09:43.\n",
+      "  Batch 1,320  of  2,642.    Elapsed: 0:10:01.\n",
+      "  Batch 1,360  of  2,642.    Elapsed: 0:10:19.\n",
+      "  Batch 1,400  of  2,642.    Elapsed: 0:10:37.\n",
+      "  Batch 1,440  of  2,642.    Elapsed: 0:10:55.\n",
+      "  Batch 1,480  of  2,642.    Elapsed: 0:11:14.\n",
+      "  Batch 1,520  of  2,642.    Elapsed: 0:11:32.\n",
+      "  Batch 1,560  of  2,642.    Elapsed: 0:11:50.\n",
+      "  Batch 1,600  of  2,642.    Elapsed: 0:12:08.\n",
+      "  Batch 1,640  of  2,642.    Elapsed: 0:12:26.\n",
+      "  Batch 1,680  of  2,642.    Elapsed: 0:12:45.\n",
+      "  Batch 1,720  of  2,642.    Elapsed: 0:13:03.\n",
+      "  Batch 1,760  of  2,642.    Elapsed: 0:13:21.\n",
+      "  Batch 1,800  of  2,642.    Elapsed: 0:13:39.\n",
+      "  Batch 1,840  of  2,642.    Elapsed: 0:13:57.\n",
+      "  Batch 1,880  of  2,642.    Elapsed: 0:14:16.\n",
+      "  Batch 1,920  of  2,642.    Elapsed: 0:14:34.\n",
+      "  Batch 1,960  of  2,642.    Elapsed: 0:14:52.\n",
+      "  Batch 2,000  of  2,642.    Elapsed: 0:15:10.\n",
+      "  Batch 2,040  of  2,642.    Elapsed: 0:15:28.\n",
+      "  Batch 2,080  of  2,642.    Elapsed: 0:15:46.\n",
+      "  Batch 2,120  of  2,642.    Elapsed: 0:16:05.\n",
+      "  Batch 2,160  of  2,642.    Elapsed: 0:16:23.\n",
+      "  Batch 2,200  of  2,642.    Elapsed: 0:16:41.\n",
+      "  Batch 2,240  of  2,642.    Elapsed: 0:16:59.\n",
+      "  Batch 2,280  of  2,642.    Elapsed: 0:17:17.\n",
+      "  Batch 2,320  of  2,642.    Elapsed: 0:17:36.\n",
+      "  Batch 2,360  of  2,642.    Elapsed: 0:17:54.\n",
+      "  Batch 2,400  of  2,642.    Elapsed: 0:18:12.\n",
+      "  Batch 2,440  of  2,642.    Elapsed: 0:18:30.\n",
+      "  Batch 2,480  of  2,642.    Elapsed: 0:18:48.\n",
+      "  Batch 2,520  of  2,642.    Elapsed: 0:19:07.\n",
+      "  Batch 2,560  of  2,642.    Elapsed: 0:19:25.\n",
+      "  Batch 2,600  of  2,642.    Elapsed: 0:19:43.\n",
+      "  Batch 2,640  of  2,642.    Elapsed: 0:20:01.\n",
+      "\n",
+      "  Average training loss: 0.60\n",
+      "  Training epoch took: 0:20:02\n",
+      "\n",
+      "Running Validation...\n",
+      "  Accuracy: 0.80\n",
+      "  Validation took: 0:03:09\n",
+      "\n",
+      "Training complete!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This training code is based on the `run_glue.py` script here:\n",
+    "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n",
+    "\n",
+    "# Set the seed value all over the place to make this reproducible.\n",
+    "seed_val = 42\n",
+    "\n",
+    "random.seed(seed_val)\n",
+    "np.random.seed(seed_val)\n",
+    "torch.manual_seed(seed_val)\n",
+    "torch.cuda.manual_seed_all(seed_val)\n",
+    "\n",
+    "# Store the average loss after each epoch so I can plot them.\n",
+    "loss_values = []\n",
+    "\n",
+    "# For each epoch...\n",
+    "for epoch_i in range(0, epochs):\n",
+    "    \n",
+    "    # ========================================\n",
+    "    #               Training\n",
+    "    # ========================================\n",
+    "    \n",
+    "    # Perform one full pass over the training set.\n",
+    "\n",
+    "    print(\"\")\n",
+    "    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
+    "    print('Training...')\n",
+    "\n",
+    "    # Measure how long the training epoch takes.\n",
+    "    t0 = time.time()\n",
+    "\n",
+    "    # Reset the total loss for this epoch.\n",
+    "    total_loss = 0\n",
+    "\n",
+    "    # Put the model into training mode.\n",
+    "    model.train()\n",
+    "\n",
+    "    # For each batch of training data...\n",
+    "    for step, batch in enumerate(train_dataloader):\n",
+    "\n",
+    "        # Progress update every 40 batches.\n",
+    "        if step % 40 == 0 and not step == 0:\n",
+    "            # Calculate elapsed time in minutes.\n",
+    "            elapsed = format_time(time.time() - t0)\n",
+    "            \n",
+    "            # Report progress.\n",
+    "            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n",
+    "\n",
+    "        # Unpack this training batch from the dataloader. \n",
+    "        #\n",
+    "        # As I unpack the batch, I'll also copy each tensor to the GPU using the \n",
+    "        # `to` method.\n",
+    "        #\n",
+    "        # `batch` contains three pytorch tensors:\n",
+    "        #   [0]: input ids \n",
+    "        #   [1]: attention masks\n",
+    "        #   [2]: labels \n",
+    "        b_input_ids = batch[0].to(device)\n",
+    "        b_input_mask = batch[1].to(device)\n",
+    "        b_labels = batch[2].to(device)\n",
+    "\n",
+    "        # Always clear any previously calculated gradients before performing a\n",
+    "        # backward pass. PyTorch doesn't do this automatically because \n",
+    "        # accumulating the gradients is \"convenient while training RNNs\". \n",
+    "        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n",
+    "        model.zero_grad()        \n",
+    "\n",
+    "        # Perform a forward pass (evaluate the model on this training batch).\n",
+    "        # This will return the loss (rather than the model output) because I\n",
+    "        # have provided the `labels`.\n",
+    "        # The documentation for this `model` function is here: \n",
+    "        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
+    "        outputs = model(b_input_ids, \n",
+    "                    token_type_ids=None, \n",
+    "                    attention_mask=b_input_mask, \n",
+    "                    labels=b_labels)\n",
+    "        \n",
+    "        # The call to `model` always returns a tuple, so I need to pull the \n",
+    "        # loss value out of the tuple.\n",
+    "        loss = outputs[0]\n",
+    "\n",
+    "        # Accumulate the training loss over all of the batches so that I can\n",
+    "        # calculate the average loss at the end. `loss` is a Tensor containing a\n",
+    "        # single value; the `.item()` function just returns the Python value \n",
+    "        # from the tensor.\n",
+    "        total_loss += loss.item()\n",
+    "\n",
+    "        # Perform a backward pass to calculate the gradients.\n",
+    "        loss.backward()\n",
+    "\n",
+    "        # Clip the norm of the gradients to 1.0.\n",
+    "        # This is to help prevent the \"exploding gradients\" problem.\n",
+    "        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
+    "\n",
+    "        # Update parameters and take a step using the computed gradient.\n",
+    "        # The optimizer dictates the \"update rule\"--how the parameters are\n",
+    "        # modified based on their gradients, the learning rate, etc.\n",
+    "        optimizer.step()\n",
+    "\n",
+    "        # Update the learning rate.\n",
+    "        scheduler.step()\n",
+    "\n",
+    "    # Calculate the average loss over the training data.\n",
+    "    avg_train_loss = total_loss / len(train_dataloader)            \n",
+    "    \n",
+    "    # Store the loss value for plotting the learning curve.\n",
+    "    loss_values.append(avg_train_loss)\n",
+    "\n",
+    "    print(\"\")\n",
+    "    print(\"  Average training loss: {0:.2f}\".format(avg_train_loss))\n",
+    "    print(\"  Training epoch took: {:}\".format(format_time(time.time() - t0)))\n",
+    "        \n",
+    "    # ========================================\n",
+    "    #               Validation\n",
+    "    # ========================================\n",
+    "    # After the completion of each training epoch, measure the performance on\n",
+    "    # the validation set.\n",
+    "\n",
+    "    print(\"\")\n",
+    "    print(\"Running Validation...\")\n",
+    "\n",
+    "    t0 = time.time()\n",
+    "\n",
+    "    # Put the model in evaluation mode--the dropout layers behave differently\n",
+    "    # during evaluation.\n",
+    "    model.eval()\n",
+    "\n",
+    "    # Tracking variables \n",
+    "    eval_loss, eval_accuracy = 0, 0\n",
+    "    nb_eval_steps, nb_eval_examples = 0, 0\n",
+    "\n",
+    "    # Evaluate data for one epoch\n",
+    "    for batch in validation_dataloader:\n",
+    "        \n",
+    "        # Add batch to GPU\n",
+    "        batch = tuple(t.to(device) for t in batch)\n",
+    "        \n",
+    "        # Unpack the inputs from dataloader\n",
+    "        b_input_ids, b_input_mask, b_labels = batch\n",
+    "        \n",
+    "        # Telling the model not to compute or store gradients, saving memory and\n",
+    "        # speeding up validation\n",
+    "        with torch.no_grad():        \n",
+    "\n",
+    "            # Forward pass, calculate logit predictions.\n",
+    "            # This will return the logits rather than the loss because we have\n",
+    "            # not provided labels.\n",
+    "            # token_type_ids is the same as the \"segment ids\", which \n",
+    "            # differentiates sentence 1 and 2 in 2-sentence tasks.\n",
+    "            # The documentation for this `model` function is here: \n",
+    "            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
+    "            outputs = model(b_input_ids, \n",
+    "                            token_type_ids=None, \n",
+    "                            attention_mask=b_input_mask)\n",
+    "        \n",
+    "        # Get the \"logits\" output by the model. The \"logits\" are the output\n",
+    "        # values prior to applying an activation function like the softmax.\n",
+    "        logits = outputs[0]\n",
+    "\n",
+    "        # Move logits and labels to CPU\n",
+    "        logits = logits.detach().cpu().numpy()\n",
+    "        label_ids = b_labels.to('cpu').numpy()\n",
+    "        \n",
+    "        # Calculate the accuracy for this batch of test sentences.\n",
+    "        tmp_eval_accuracy = flat_accuracy(logits, label_ids)\n",
+    "        \n",
+    "        # Accumulate the total accuracy.\n",
+    "        eval_accuracy += tmp_eval_accuracy\n",
+    "\n",
+    "        # Track the number of batches\n",
+    "        nb_eval_steps += 1\n",
+    "\n",
+    "    # Report the final accuracy for this validation run.\n",
+    "    print(\"  Accuracy: {0:.2f}\".format(eval_accuracy/nb_eval_steps))\n",
+    "    print(\"  Validation took: {:}\".format(format_time(time.time() - t0)))\n",
+    "\n",
+    "print(\"\")\n",
+    "print(\"Training complete!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "uEe7lPtVKpIY"
+   },
+   "source": [
+    "## Saving model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "background_save": true
+    },
+    "id": "AYCSVm_wKnuM"
+   },
+   "outputs": [],
+   "source": [
+    "model_path = \"drive/MyDrive/Classification-EDdA/model_\"+model_bert+\"_s\"+str(maxOfInstancePerClass)+\".pt\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "background_save": true
+    },
+    "id": "qmsxrOqjCsGo"
+   },
+   "outputs": [],
+   "source": [
+    "torch.save(model, model_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "pM9bSsckCndR"
+   },
+   "source": [
+    "## Loading model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "cEycmiS8Cnjw"
+   },
+   "outputs": [],
+   "source": [
+    "#model = torch.load(model_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VJwyfmakkQyj"
+   },
+   "source": [
+    "## Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "K9qdtYexIIvk"
+   },
+   "outputs": [],
+   "source": [
+    "def evaluate_bert(data, labels, model, batch_size):\n",
+    "  # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+    "  input_ids = []\n",
+    "  # For every sentence...\n",
+    "  for sent in data:\n",
+    "      # `encode` will:\n",
+    "      #   (1) Tokenize the sentence.\n",
+    "      #   (2) Prepend the `[CLS]` token to the start.\n",
+    "      #   (3) Append the `[SEP]` token to the end.\n",
+    "      #   (4) Map tokens to their IDs.\n",
+    "      encoded_sent = tokenizer.encode(\n",
+    "                          str(sent),                      # Sentence to encode.\n",
+    "                          add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+    "                  )\n",
+    "      \n",
+    "      input_ids.append(encoded_sent)\n",
+    "\n",
+    "  # Pad our input tokens\n",
+    "  padded = []\n",
+    "  for i in input_ids:\n",
+    "\n",
+    "    if len(i) > max_len:\n",
+    "      padded.extend([i[:max_len]])\n",
+    "    else:\n",
+    "      padded.extend([i + [0] * (max_len - len(i))])\n",
+    "  input_ids = np.array(padded)\n",
+    "\n",
+    "  # Create attention masks\n",
+    "  attention_masks = []\n",
+    "\n",
+    "  # Create a mask of 1s for each token followed by 0s for padding\n",
+    "  for seq in input_ids:\n",
+    "      seq_mask = [float(i>0) for i in seq]\n",
+    "      attention_masks.append(seq_mask) \n",
+    "\n",
+    "  # Convert to tensors.\n",
+    "  prediction_inputs = torch.tensor(input_ids)\n",
+    "  prediction_masks = torch.tensor(attention_masks)\n",
+    "  prediction_labels = torch.tensor(labels)\n",
+    "\n",
+    "  # Create the DataLoader.\n",
+    "  prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)\n",
+    "  prediction_sampler = SequentialSampler(prediction_data)\n",
+    "  prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n",
+    "\n",
+    "  print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs)))\n",
+    "\n",
+    "  # Put model in evaluation mode\n",
+    "  model.eval()\n",
+    "\n",
+    "  # Tracking variables \n",
+    "  predictions , true_labels = [], []\n",
+    "\n",
+    "  # Predict \n",
+    "  for batch in prediction_dataloader:\n",
+    "  # Add batch to GPU\n",
+    "      batch = tuple(t.to(device) for t in batch)\n",
+    "      \n",
+    "      # Unpack the inputs from the dataloader\n",
+    "      b_input_ids, b_input_mask, b_labels = batch\n",
+    "      \n",
+    "      # Telling the model not to compute or store gradients, saving memory and \n",
+    "      # speeding up prediction\n",
+    "      with torch.no_grad():\n",
+    "          # Forward pass, calculate logit predictions\n",
+    "          outputs = model(b_input_ids, token_type_ids=None, \n",
+    "                          attention_mask=b_input_mask)\n",
+    "\n",
+    "      logits = outputs[0]\n",
+    "      #print(logits)\n",
+    "\n",
+    "      # Move logits and labels to CPU\n",
+    "      logits = logits.detach().cpu().numpy()\n",
+    "      label_ids = b_labels.to('cpu').numpy()\n",
+    "      #print(logits)\n",
+    "      \n",
+    "      # Store predictions and true labels\n",
+    "      predictions.append(logits)\n",
+    "      true_labels.append(label_ids)\n",
+    "\n",
+    "  print('    DONE.')\n",
+    "\n",
+    "\n",
+    "  pred_labels = []\n",
+    "\n",
+    "  # Evaluate each test batch using many matrics\n",
+    "  print('Calculating the matrics for each batch...')\n",
+    "\n",
+    "  for i in range(len(true_labels)):\n",
+    "    \n",
+    "    # The predictions for this batch are a 2-column ndarray (one column for \"0\" \n",
+    "    # and one column for \"1\"). Pick the label with the highest value and turn this\n",
+    "    # in to a list of 0s and 1s.\n",
+    "    pred_labels_i = np.argmax(predictions[i], axis=1).flatten()\n",
+    "    pred_labels.append(pred_labels_i)\n",
+    "\n",
+    "\n",
+    "  pred_labels_ = [item for sublist in pred_labels for item in sublist]\n",
+    "  true_labels_ = [item for sublist in true_labels for item in sublist]\n",
+    "\n",
+    "  return pred_labels_, true_labels_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "AJ0suC8iMs8a"
+   },
+   "outputs": [],
+   "source": [
+    "dataset_name = [\"validation\", \"test\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "dPjV_5g8DDQy"
+   },
+   "outputs": [],
+   "source": [
+    "for dataset in dataset_name:\n",
+    "  df_eval = pd.read_csv(dataset+\"_set.tsv\", sep=\"\\t\")\n",
+    "  data_eval = df_eval[columnText].values\n",
+    "\n",
+    "  y = df_eval[columnClass]\n",
+    "  encoder = preprocessing.LabelEncoder()\n",
+    "  y = encoder.fit_transform(y)\n",
+    "  labels = y.tolist()\n",
+    "\n",
+    "  pred_labels_, true_labels_ = evaluate_bert(data_eval, labels, model, batch_size)\n",
+    "\n",
+    "\n",
+    "  report = classification_report( pred_labels_, true_labels_, output_dict = True)\n",
+    "      \n",
+    "  classes = [str(e) for e in encoder.transform(encoder.classes_)]\n",
+    "  classesName = encoder.classes_\n",
+    "\n",
+    "  precision = []\n",
+    "  recall = []\n",
+    "  f1 = []\n",
+    "  support = []\n",
+    "  dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+    "  for c in classes:\n",
+    "    precision.append(report[c]['precision'])\n",
+    "    recall.append(report[c]['recall'])\n",
+    "    f1.append(report[c]['f1-score'])\n",
+    "    support.append(report[c]['support'])\n",
+    "\n",
+    "  accuracy = report['accuracy']\n",
+    "  weighted_avg = report['weighted avg']\n",
+    "  cnf_matrix = confusion_matrix(true_labels_, pred_labels_)\n",
+    "  FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+    "  FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+    "  TP = np.diag(cnf_matrix)\n",
+    "  TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+    "\n",
+    "  dff['className'] = classesName\n",
+    "  dff['precision'] = precision\n",
+    "  dff['recall'] = recall\n",
+    "  dff['f1-score'] = f1\n",
+    "  dff['support'] = support\n",
+    "  dff['FP'] = FP\n",
+    "  dff['FN'] = FN\n",
+    "  dff['TP'] = TP\n",
+    "  dff['TN'] = TN\n",
+    "\n",
+    "  print(dataset+\"_\"+model_bert+\"_s\"+str(maxOfInstancePerClass))\n",
+    "\n",
+    "  print(weighted_avg)\n",
+    "  print(accuracy)\n",
+    "  print(dff)\n",
+    "\n",
+    "  dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_\"+dataset+\"_\"+model_bert+\"_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "cVdM4eT6I8g2"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "HzxyFO3knanV"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "KDRPPw4Wnap7"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "DX81R2dcnasF"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "wgfqJFVeJMK1"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "GqEf5_41JMNZ"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "x_n57EvhJMQh"
+   },
+   "outputs": [],
+   "source": [
+    "model_path = \"drive/MyDrive/Classification-EDdA/model_bert-base-multilingual-cased_s10000.pt\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "R3_9tA9MI8ju"
+   },
+   "outputs": [],
+   "source": [
+    "model = torch.load(model_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "_fzgS5USJeAF",
+    "outputId": "be4a5506-76ed-4eef-bb3c-fe2bb77c6e4d"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2021-09-30 19:38:22--  https://projet.liris.cnrs.fr/geode/files/datasets/EDdA/Classification/LGE_withContent.tsv\n",
+      "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+      "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 356197 (348K) [text/tab-separated-values]\n",
+      "Saving to: â€˜LGE_withContent.tsvâ€™\n",
+      "\n",
+      "LGE_withContent.tsv 100%[===================>] 347.85K   567KB/s    in 0.6s    \n",
+      "\n",
+      "2021-09-30 19:38:24 (567 KB/s) - â€˜LGE_withContent.tsvâ€™ saved [356197/356197]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "!wget https://projet.liris.cnrs.fr/geode/files/datasets/EDdA/Classification/LGE_withContent.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "8WEJjQC7I8mP"
+   },
+   "outputs": [],
+   "source": [
+    "df_LGE = pd.read_csv(\"LGE_withContent.tsv\", sep=\"\\t\")\n",
+    "data_LGE = df_LGE[\"content\"].values\n",
+    "\n",
+    "\n",
+    "#pred_labels_, true_labels_ = evaluate_bert(data_eval, labels, model, batch_size)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "id": "9qJDTU-6vzkk",
+    "outputId": "1b279f0e-7715-4d23-f524-08e8ba327f6c"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tome</th>\n",
+       "      <th>rank</th>\n",
+       "      <th>domain</th>\n",
+       "      <th>remark</th>\n",
+       "      <th>content</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>abrabeses-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>623</td>\n",
+       "      <td>geography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ABRABESES. Village dâ€™Espagne de la prov. de Za...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>accius-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1076</td>\n",
+       "      <td>biography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>achenbach-2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1357</td>\n",
+       "      <td>biography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACHENBACH(Henri), administrateur prussien, nÃ© ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>acireale-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1513</td>\n",
+       "      <td>geography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>actÃ©e-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1731</td>\n",
+       "      <td>botany</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACTÃ‰E(ActÅ“a L.). Genre de plantes de la famill...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            id  tome  ...  remark                                            content\n",
+       "0  abrabeses-0     1  ...     NaN  ABRABESES. Village dâ€™Espagne de la prov. de Za...\n",
+       "1     accius-0     1  ...     NaN  ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...\n",
+       "2  achenbach-2     1  ...     NaN  ACHENBACH(Henri), administrateur prussien, nÃ© ...\n",
+       "3   acireale-0     1  ...     NaN  ACIREALE. Yille de Sicile, de la province et d...\n",
+       "4      actÃ©e-0     1  ...     NaN  ACTÃ‰E(ActÅ“a L.). Genre de plantes de la famill...\n",
+       "\n",
+       "[5 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_LGE.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "71-fP61-OOwQ",
+    "outputId": "ef08b49e-0a9f-4653-e303-3163250af35b"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(310, 6)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_LGE.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "lFFed2EAI8oq"
+   },
+   "outputs": [],
+   "source": [
+    "def generate_prediction_dataloader(chosen_model, sentences_to_predict, batch_size = 8, max_len = 512):\n",
+    "\n",
+    "    if chosen_model == 'bert-base-multilingual-cased' :\n",
+    "        print('Loading Bert Tokenizer...')\n",
+    "        tokenizer = BertTokenizer.from_pretrained(chosen_model)\n",
+    "    elif chosen_model == 'camembert-base':\n",
+    "        print('Loading Camembert Tokenizer...')\n",
+    "        tokenizer = CamembertTokenizer.from_pretrained(chosen_model)\n",
+    "\n",
+    "    # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+    "    input_ids_test = []\n",
+    "    # For every sentence...\n",
+    "    for sent in sentences_to_predict:\n",
+    "        # `encode` will:\n",
+    "        #   (1) Tokenize the sentence.\n",
+    "        #   (2) Prepend the `[CLS]` token to the start.\n",
+    "        #   (3) Append the `[SEP]` token to the end.\n",
+    "        #   (4) Map tokens to their IDs.\n",
+    "        encoded_sent = tokenizer.encode(\n",
+    "                            sent,                      # Sentence to encode.\n",
+    "                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+    "                    )\n",
+    "\n",
+    "        input_ids_test.append(encoded_sent)\n",
+    "\n",
+    "    # Pad our input tokens\n",
+    "    padded_test = []\n",
+    "    for i in input_ids_test:\n",
+    "\n",
+    "        if len(i) > max_len:\n",
+    "            padded_test.extend([i[:max_len]])\n",
+    "        else:\n",
+    "\n",
+    "            padded_test.extend([i + [0] * (max_len - len(i))])\n",
+    "    input_ids_test = np.array(padded_test)\n",
+    "\n",
+    "    # Create attention masks\n",
+    "    attention_masks = []\n",
+    "\n",
+    "    # Create a mask of 1s for each token followed by 0s for padding\n",
+    "    for seq in input_ids_test:\n",
+    "        seq_mask = [float(i>0) for i in seq]\n",
+    "        attention_masks.append(seq_mask)\n",
+    "\n",
+    "    # Convert to tensors.\n",
+    "    prediction_inputs = torch.tensor(input_ids_test)\n",
+    "    prediction_masks = torch.tensor(attention_masks)\n",
+    "    #set batch size\n",
+    "\n",
+    "\n",
+    "    # Create the DataLoader.\n",
+    "    prediction_data = TensorDataset(prediction_inputs, prediction_masks)\n",
+    "    prediction_sampler = SequentialSampler(prediction_data)\n",
+    "    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n",
+    "\n",
+    "    return prediction_dataloader\n",
+    "\n",
+    "\n",
+    "\n",
+    "def predict_class_bertFineTuning(model, sentences_to_predict_dataloader):\n",
+    "\n",
+    "\n",
+    "    # If there's a GPU available...\n",
+    "    if torch.cuda.is_available():\n",
+    "\n",
+    "        # Tell PyTorch to use the GPU.\n",
+    "        device = torch.device(\"cuda\")\n",
+    "\n",
+    "        print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
+    "\n",
+    "        print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
+    "\n",
+    "        # If not...\n",
+    "    else:\n",
+    "        print('No GPU available, using the CPU instead.')\n",
+    "        device = torch.device(\"cpu\")\n",
+    "\n",
+    "    # Put model in evaluation mode\n",
+    "    model.eval()\n",
+    "\n",
+    "    # Tracking variables\n",
+    "    predictions_test , true_labels = [], []\n",
+    "    pred_labels_ = []\n",
+    "    # Predict\n",
+    "    for batch in sentences_to_predict_dataloader:\n",
+    "    # Add batch to GPU\n",
+    "        batch = tuple(t.to(device) for t in batch)\n",
+    "\n",
+    "        # Unpack the inputs from the dataloader\n",
+    "        b_input_ids, b_input_mask = batch\n",
+    "\n",
+    "        # Telling the model not to compute or store gradients, saving memory and\n",
+    "        # speeding up prediction\n",
+    "        with torch.no_grad():\n",
+    "            # Forward pass, calculate logit predictions\n",
+    "            outputs = model(b_input_ids, token_type_ids=None,\n",
+    "                            attention_mask=b_input_mask)\n",
+    "\n",
+    "        logits = outputs[0]\n",
+    "        #print(logits)\n",
+    "\n",
+    "        # Move logits and labels to CPU\n",
+    "        logits = logits.detach().cpu().numpy()\n",
+    "        #print(logits)\n",
+    "\n",
+    "        # Store predictions and true labels\n",
+    "        predictions_test.append(logits)\n",
+    "\n",
+    "        #print('    DONE.')\n",
+    "\n",
+    "        pred_labels = []\n",
+    "        \n",
+    "        for i in range(len(predictions_test)):\n",
+    "\n",
+    "            # The predictions for this batch are a 2-column ndarray (one column for \"0\"\n",
+    "            # and one column for \"1\"). Pick the label with the highest value and turn this\n",
+    "            # in to a list of 0s and 1s.\n",
+    "            pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()\n",
+    "            pred_labels.append(pred_labels_i)\n",
+    "\n",
+    "    pred_labels_ += [item for sublist in pred_labels for item in sublist]\n",
+    "    return pred_labels_\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "O9eer_kgI8rC",
+    "outputId": "94ea7418-14a8-4918-e210-caf0018f5989"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading Bert Tokenizer...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (1204 > 512). Running this sequence through the model will result in indexing errors\n"
+     ]
+    }
+   ],
+   "source": [
+    "data_loader = generate_prediction_dataloader('bert-base-multilingual-cased', data_LGE)\n",
+    "#data_loader = generate_prediction_dataloader('camembert-base', data_LGE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "sFpAwbrBwF2h",
+    "outputId": "8d210732-619d-41f0-b6e2-ad9d06a85069"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 1 GPU(s) available.\n",
+      "We will use the GPU: Tesla P100-PCIE-16GB\n"
+     ]
+    }
+   ],
+   "source": [
+    "p = predict_class_bertFineTuning( model, data_loader )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "51HF6-8UPSTc",
+    "outputId": "26bff792-eb8d-4e1a-efa4-a7a6c9d32bf9"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "310"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "rFFGhaCvQHfh"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "qgJ-O4rcQHiI",
+    "outputId": "bfe93dd6-4d89-4d5c-be0d-45e1c98c6b14"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LabelEncoder()"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Il faudrait enregistrer l'encoder, \n",
+    "# sinon on est obligÃ© de le refaire Ã  partir du jeu d'entrainement pour rÃ©cupÃ©rer le noms des classes.\n",
+    "encoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "QuST9wJoQHnS"
+   },
+   "outputs": [],
+   "source": [
+    "p2 = list(encoder.inverse_transform(p))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "6ek7suq9QHqE",
+    "outputId": "6636983a-7eba-48c8-d884-f8fb437294dc"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Chimie',\n",
+       " 'Histoire naturelle',\n",
+       " 'GÃ©ographie',\n",
+       " 'MathÃ©matiques',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'Musique',\n",
+       " 'Commerce',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'GÃ©ographie',\n",
+       " 'Physique - [Sciences physico-mathÃ©matiques]',\n",
+       " 'Histoire naturelle',\n",
+       " 'Chimie',\n",
+       " 'Histoire',\n",
+       " 'Physique - [Sciences physico-mathÃ©matiques]',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire',\n",
+       " 'Histoire naturelle',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Architecture',\n",
+       " 'Histoire naturelle',\n",
+       " 'Histoire naturelle',\n",
+       " 'GÃ©ographie',\n",
+       " 'Arts et mÃ©tiers',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Marine',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'Architecture',\n",
+       " 'Histoire naturelle',\n",
+       " 'Beaux-arts',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Chimie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'Religion',\n",
+       " 'Histoire naturelle',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Agriculture - Economie rustique',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Jeu',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Histoire naturelle',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Histoire',\n",
+       " 'Histoire naturelle',\n",
+       " 'Commerce',\n",
+       " 'Histoire',\n",
+       " 'Militaire (Art) - Guerre - Arme',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'Religion',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Agriculture - Economie rustique',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'MÃ©tiers',\n",
+       " 'Belles-lettres - PoÃ©sie',\n",
+       " 'Beaux-arts',\n",
+       " 'Religion',\n",
+       " 'Architecture',\n",
+       " 'Architecture',\n",
+       " 'Architecture',\n",
+       " 'GÃ©ographie',\n",
+       " 'Chimie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Histoire naturelle',\n",
+       " 'Militaire (Art) - Guerre - Arme',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'MinÃ©ralogie',\n",
+       " 'Belles-lettres - PoÃ©sie',\n",
+       " 'Histoire naturelle',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Grammaire',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'MathÃ©matiques',\n",
+       " 'GÃ©ographie',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'Blason',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Histoire naturelle',\n",
+       " 'Militaire (Art) - Guerre - Arme',\n",
+       " 'GÃ©ographie',\n",
+       " 'AntiquitÃ©',\n",
+       " 'Agriculture - Economie rustique',\n",
+       " 'Chimie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Belles-lettres - PoÃ©sie',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'MÃ©tiers',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Arts et mÃ©tiers',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Musique',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'Religion',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire',\n",
+       " 'Droit - Jurisprudence',\n",
+       " 'Histoire',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'Histoire',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Chimie',\n",
+       " 'AntiquitÃ©',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Beaux-arts',\n",
+       " 'Histoire',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire naturelle',\n",
+       " 'AntiquitÃ©',\n",
+       " 'Grammaire',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Beaux-arts',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire',\n",
+       " 'Architecture',\n",
+       " 'Commerce',\n",
+       " 'AntiquitÃ©',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'Histoire naturelle',\n",
+       " 'Histoire',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Anatomie',\n",
+       " 'Commerce',\n",
+       " 'Beaux-arts',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'Histoire naturelle',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Commerce',\n",
+       " 'Architecture',\n",
+       " 'Commerce',\n",
+       " 'AntiquitÃ©',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'MÃ©decine - Chirurgie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'AntiquitÃ©',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Histoire',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'AntiquitÃ©',\n",
+       " 'GÃ©ographie',\n",
+       " 'Religion',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Philosophie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Chimie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie',\n",
+       " 'Beaux-arts',\n",
+       " 'Commerce',\n",
+       " 'Commerce',\n",
+       " 'GÃ©ographie',\n",
+       " 'GÃ©ographie']"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "p2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "XvdDj5PBQHtk"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "t39Xs0j7QHXJ"
+   },
+   "outputs": [],
+   "source": [
+    "df_LGE['class_bert'] = p2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "id": "-VZ7geRmQHaD",
+    "outputId": "350a4122-5b1f-43e2-e372-2f628f665c4a"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>tome</th>\n",
+       "      <th>rank</th>\n",
+       "      <th>domain</th>\n",
+       "      <th>remark</th>\n",
+       "      <th>content</th>\n",
+       "      <th>class_bert</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>abrabeses-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>623</td>\n",
+       "      <td>geography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ABRABESES. Village dâ€™Espagne de la prov. de Za...</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>accius-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1076</td>\n",
+       "      <td>biography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>achenbach-2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1357</td>\n",
+       "      <td>biography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACHENBACH(Henri), administrateur prussien, nÃ© ...</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>acireale-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1513</td>\n",
+       "      <td>geography</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+       "      <td>GÃ©ographie</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>actÃ©e-0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1731</td>\n",
+       "      <td>botany</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ACTÃ‰E(ActÅ“a L.). Genre de plantes de la famill...</td>\n",
+       "      <td>Histoire naturelle</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            id  ...          class_bert\n",
+       "0  abrabeses-0  ...          GÃ©ographie\n",
+       "1     accius-0  ...          GÃ©ographie\n",
+       "2  achenbach-2  ...          GÃ©ographie\n",
+       "3   acireale-0  ...          GÃ©ographie\n",
+       "4      actÃ©e-0  ...  Histoire naturelle\n",
+       "\n",
+       "[5 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_LGE.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "3xkzdkrKQHwA"
+   },
+   "outputs": [],
+   "source": [
+    "df_LGE.to_csv(\"drive/MyDrive/Classification-EDdA/classification_LGE.tsv\", sep=\"\\t\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "machine_shape": "hm",
+   "name": "EDdA-Classification_BertFineTuning.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.10"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "00bd66a81aad4cd7a10df4a67b52b14e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0143df420df444e9aac5c8b39c342021": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_044fc1f96f8347ddb4a79d31edf32174",
+      "placeholder": "â€‹",
+      "style": "IPY_MODEL_cf0d3320e06546789b5d5a2021dbc3ad",
+      "value": " 811k/811k [00:00&lt;00:00, 932kB/s]"
+     }
+    },
+    "0214f74b229a4232a9edf3cab751b90d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "044fc1f96f8347ddb4a79d31edf32174": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "06c6e7721b68449a9f3619ffdf18dfeb": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_5ec6a851b16c4339b51acb6129935f13",
+       "IPY_MODEL_fd39a852133144e2b4aed474b204451f",
+       "IPY_MODEL_0143df420df444e9aac5c8b39c342021"
+      ],
+      "layout": "IPY_MODEL_c61b6474b55948cb91a598e6b9aa10d2"
+     }
+    },
+    "12aa3280d3284c07ac12e2fe842b40b0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f470af786c1c4d049de4f0a7f373379f",
+      "placeholder": "â€‹",
+      "style": "IPY_MODEL_00bd66a81aad4cd7a10df4a67b52b14e",
+      "value": "Downloading: 100%"
+     }
+    },
+    "152afcb9245c416fae0fde257fa25e2e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "17bf94188b844f649642d9c6e6a20373": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "185ae5ef7be646b797467086ad7d3a82": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_152afcb9245c416fae0fde257fa25e2e",
+      "max": 1395301,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_fb3a174c597b47c7a527517004ba5f54",
+      "value": 1395301
+     }
+    },
+    "1bcdb04d16dd4f9e9d86938e1d2def02": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a5efb634a95c42a7abfaaf61e1c2c928",
+      "max": 445032417,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_600e627de1f0403595f701381dc3b164",
+      "value": 445032417
+     }
+    },
+    "1d97e83c703f4071b9176ba7bf57cddf": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "27a20a17123744948e0c1dbf49b51b27": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "27e18e1fa3884c0fb0339764e0397990": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_17bf94188b844f649642d9c6e6a20373",
+      "placeholder": "â€‹",
+      "style": "IPY_MODEL_d3aaecd7a6e34cc8918a689ac6299746",
+      "value": " 508/508 [00:00&lt;00:00, 15.9kB/s]"
+     }
+    },
+    "2af1124092684f8bafab311cbe9bf22c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3203783f58e54b0e856ab84503bf0d3c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "340241453dab4db88043d372aaa88c2e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_4422e64029184ba4ba30eecfdf2b4306",
+      "max": 508,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_1d97e83c703f4071b9176ba7bf57cddf",
+      "value": 508
+     }
+    },
+    "3ceaa994a3814d3c85e2051e37397342": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_75073a0f673345728871dfb0346e7c1b",
+      "placeholder": "â€‹",
+      "style": "IPY_MODEL_db8c94b4ed724f859d1ae8c153b01110",
+      "value": " 1.40M/1.40M [00:00&lt;00:00, 2.81MB/s]"
+     }
+    },
+    "41558bfcc0464711916c2d96337bef66": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4422e64029184ba4ba30eecfdf2b4306": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4873cc6c9e1d493c9a67d6536e4367a6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_12aa3280d3284c07ac12e2fe842b40b0",
+       "IPY_MODEL_1bcdb04d16dd4f9e9d86938e1d2def02",
+       "IPY_MODEL_b5f86071b23c40bf9c96f74c613c2729"
+      ],
+      "layout": "IPY_MODEL_27a20a17123744948e0c1dbf49b51b27"
+     }
+    },
+    "5879fadf430646f6af41b1a9b14864ff": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_95a3332ba4634d1c930a7021eacce230",
+      "placeholder": "â€‹",
+      "style": "IPY_MODEL_d53488432f8544de863210d9e8ee4e48",
+      "value": "Downloading: 100%"
+     }
+    },
+    "5ec6a851b16c4339b51acb6129935f13": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a0d9ceaa8d3a4876ae65d877687bcf50",
+      "placeholder": "â€‹",
+      "style": "IPY_MODEL_aa6ea92757df47eda1e41603cb109e79",
+      "value": "Downloading: 100%"
+     }
+    },
+    "600e627de1f0403595f701381dc3b164": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "6a29c1c28ceb415f91ec55512da981c5": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_5879fadf430646f6af41b1a9b14864ff",
+       "IPY_MODEL_340241453dab4db88043d372aaa88c2e",
+       "IPY_MODEL_27e18e1fa3884c0fb0339764e0397990"
+      ],
+      "layout": "IPY_MODEL_2af1124092684f8bafab311cbe9bf22c"
+     }
+    },
+    "75073a0f673345728871dfb0346e7c1b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "885f91c34b9c422889df8b556aad8ec0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "95a3332ba4634d1c930a7021eacce230": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a0d9ceaa8d3a4876ae65d877687bcf50": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "a5efb634a95c42a7abfaaf61e1c2c928": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "aa6ea92757df47eda1e41603cb109e79": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "b5f86071b23c40bf9c96f74c613c2729": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f3b7527bd4d04c81936d8392decee3ac",
+      "placeholder": "â€‹",
+      "style": "IPY_MODEL_885f91c34b9c422889df8b556aad8ec0",
+      "value": " 445M/445M [00:12&lt;00:00, 41.9MB/s]"
+     }
+    },
+    "c61b6474b55948cb91a598e6b9aa10d2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "cf0d3320e06546789b5d5a2021dbc3ad": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d3aaecd7a6e34cc8918a689ac6299746": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d53488432f8544de863210d9e8ee4e48": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "db8c94b4ed724f859d1ae8c153b01110": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "e674e279b13b41fda3df3a6c89f5fcb1": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f3b7527bd4d04c81936d8392decee3ac": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f470af786c1c4d049de4f0a7f373379f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f7224a1b831d459594852eece9f05543": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_3203783f58e54b0e856ab84503bf0d3c",
+      "placeholder": "â€‹",
+      "style": "IPY_MODEL_0214f74b229a4232a9edf3cab751b90d",
+      "value": "Downloading: 100%"
+     }
+    },
+    "fb3a174c597b47c7a527517004ba5f54": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "fba1d1d5c83b40659295a3457d74cb4e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_f7224a1b831d459594852eece9f05543",
+       "IPY_MODEL_185ae5ef7be646b797467086ad7d3a82",
+       "IPY_MODEL_3ceaa994a3814d3c85e2051e37397342"
+      ],
+      "layout": "IPY_MODEL_e674e279b13b41fda3df3a6c89f5fcb1"
+     }
+    },
+    "fd39a852133144e2b4aed474b204451f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_41558bfcc0464711916c2d96337bef66",
+      "max": 810912,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_fdf05cea504c42f793f9c06e58ef995b",
+      "value": 810912
+     }
+    },
+    "fdf05cea504c42f793f9c06e58ef995b": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/EDdA_Classification_ClassicModels.ipynb b/notebooks/EDdA_Classification_ClassicModels.ipynb
new file mode 100644
index 0000000..fcb2ba0
--- /dev/null
+++ b/notebooks/EDdA_Classification_ClassicModels.ipynb
@@ -0,0 +1,861 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "EDdA-Classification_ClassicModels.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "machine_shape": "hm"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aXLlx8vXQlJw"
+      },
+      "source": [
+        "# Train supervised models for EDdA classification"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3kYI_pq3Q1BT"
+      },
+      "source": [
+        "## Configuration"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "D_uwiuJq3pAM"
+      },
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "validation_path = 'validation_set.tsv'\n",
+        "test_path =  'test_set.tsv'\n",
+        "\n",
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "minOfInstancePerClass = 0\n",
+        "maxOfInstancePerClass = 10000\n",
+        "\n",
+        "\n",
+        "classifier_list = [\"bayes\"]\n",
+        "vectorizer_list = [\"bagofwords\", \"tf_idf\"]\n",
+        "\n",
+        "#classifier_list = [\"lr\", \"rfc\", \"sgd\", \"svm\"]\n",
+        "#vectorizer_list = [\"bagofwords\", \"tf_idf\", \"doc2vec\"]\n",
+        "\n",
+        "vectorization_max_df= 1.0\n",
+        "vectorization_min_df= 4\n",
+        "vectorization_numberOfFeatures= None\n",
+        "\n",
+        "doc2vec_vec_size = 700\n",
+        "max_epochs = 10\n",
+        "doc2vec_min_count = 12\n",
+        "doc2vec_dm = 0\n",
+        "doc2vec_workers = 8"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P_L0rDhZQ6Fn"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "FsAR4CsB3aUc",
+        "outputId": "a5e4efde-a5c9-45f9-ef1c-9223b4d52ac6"
+      },
+      "source": [
+        "from psutil import virtual_memory\n",
+        "ram_gb = virtual_memory().total / 1e9\n",
+        "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
+        "\n",
+        "if ram_gb < 20:\n",
+        "  print('Not using a high-RAM runtime')\n",
+        "else:\n",
+        "  print('You are using a high-RAM runtime!')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Your runtime has 27.3 gigabytes of available RAM\n",
+            "\n",
+            "You are using a high-RAM runtime!\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "h5MwRwL53aYY",
+        "outputId": "bc4c4c16-fb20-404a-e044-550fc4ca907d"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4z78CLYi75kV"
+      },
+      "source": [
+        "## Import libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "bcptSr6o3ac7",
+        "outputId": "19713482-dfeb-4be3-e63c-35b4253cb9e5"
+      },
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "\n",
+        "from sklearn.naive_bayes import MultinomialNB\n",
+        "from sklearn.svm import SVC\n",
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "from sklearn.linear_model import LogisticRegression\n",
+        "from sklearn.linear_model import SGDClassifier\n",
+        "from sklearn.metrics import classification_report\n",
+        "from sklearn.metrics import confusion_matrix\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "import pickle\n",
+        "\n",
+        "from sklearn.feature_extraction.text import CountVectorizer\n",
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "from nltk.stem.snowball import SnowballStemmer\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.tokenize import word_tokenize\n",
+        "from gensim.models.doc2vec import Doc2Vec, TaggedDocument\n",
+        "from nltk.tokenize import word_tokenize\n",
+        "import spacy\n",
+        "import os\n",
+        "import nltk\n",
+        "import string\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Package punkt is already up-to-date!\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dwSVXDtWZB5H",
+        "outputId": "44e2aa14-726f-43af-aa6a-1b7899e1025b"
+      },
+      "source": [
+        "!python -m spacy download fr_core_news_sm"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting fr_core_news_sm==2.2.5\n",
+            "  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.2.5/fr_core_news_sm-2.2.5.tar.gz (14.7 MB)\n",
+            "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 14.7 MB 5.5 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: spacy>=2.2.2 in /usr/local/lib/python3.7/dist-packages (from fr_core_news_sm==2.2.5) (2.2.4)\n",
+            "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (0.8.2)\n",
+            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (57.4.0)\n",
+            "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.1.3)\n",
+            "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (3.0.6)\n",
+            "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.0.5)\n",
+            "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (2.23.0)\n",
+            "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.0.0)\n",
+            "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (0.4.1)\n",
+            "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (2.0.6)\n",
+            "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (7.4.0)\n",
+            "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (4.62.3)\n",
+            "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.19.5)\n",
+            "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.0.6)\n",
+            "Requirement already satisfied: importlib-metadata>=0.20 in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->fr_core_news_sm==2.2.5) (4.8.2)\n",
+            "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->fr_core_news_sm==2.2.5) (3.10.0.2)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->fr_core_news_sm==2.2.5) (3.6.0)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->fr_core_news_sm==2.2.5) (2.10)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->fr_core_news_sm==2.2.5) (2021.10.8)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->fr_core_news_sm==2.2.5) (3.0.4)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->fr_core_news_sm==2.2.5) (1.24.3)\n",
+            "Building wheels for collected packages: fr-core-news-sm\n",
+            "  Building wheel for fr-core-news-sm (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for fr-core-news-sm: filename=fr_core_news_sm-2.2.5-py3-none-any.whl size=14727026 sha256=994d176b35663506dd047e65863238d29b9b60313ba0dee5997c107f116477aa\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-c8y7i3ag/wheels/c9/a6/ea/0778337c34660027ee67ef3a91fb9d3600b76777a912ea1c24\n",
+            "Successfully built fr-core-news-sm\n",
+            "Installing collected packages: fr-core-news-sm\n",
+            "Successfully installed fr-core-news-sm-2.2.5\n",
+            "\u001b[38;5;2mâœ” Download and installation successful\u001b[0m\n",
+            "You can now load the model via spacy.load('fr_core_news_sm')\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SuDZl6v48CBi"
+      },
+      "source": [
+        "## Utils functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Tunf_CYi3afO"
+      },
+      "source": [
+        "def create_dict(df, classColumnName):\n",
+        "    return dict(df[classColumnName].value_counts())\n",
+        "\n",
+        "def remove_weak_classes(df, classColumnName, threshold):\n",
+        "    dictOfClassInstances = create_dict(df,classColumnName)\n",
+        "    dictionary = {k: v for k, v in dictOfClassInstances.items() if v >= threshold }\n",
+        "    keys = [*dictionary]\n",
+        "    df_tmp = df[~ df[classColumnName].isin(keys)]\n",
+        "    df =  pd.concat([df,df_tmp]).drop_duplicates(keep=False)\n",
+        "    return df\n",
+        "\n",
+        "\n",
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "    #random numberOfInstances elements\n",
+        "    replace = False  # with replacement\n",
+        "    fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "    return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "\n",
+        "\n",
+        "def count_vect(data, max_df= 1.0 , min_df= 1, numberOfFeatures= None ):\n",
+        "  stop_words = set(stopwords.words('french'))\n",
+        "  stemmer_fr = SnowballStemmer(\"french\")\n",
+        "  analyzer = CountVectorizer().build_analyzer()\n",
+        "  def stemmed_words_fr(doc):\n",
+        "    return (stemmer_fr.stem(w) for w in analyzer(doc) if not w in stop_words)\n",
+        "  return CountVectorizer(stop_words = 'french', analyzer = stemmed_words_fr, max_df= max_df, min_df = min_df, max_features = numberOfFeatures)\n",
+        "\n",
+        "\n",
+        "def tf_idf(data, max_df= 1.0 , min_df= 1, numberOfFeatures = None):\n",
+        "  stop_words = set(stopwords.words('french'))\n",
+        "  stemmer_fr = SnowballStemmer(\"french\")\n",
+        "  analyzer = TfidfVectorizer().build_analyzer()\n",
+        "  def stemmed_words_fr(doc):\n",
+        "    return (stemmer_fr.stem(w) for w in analyzer(doc) if not w in stop_words)\n",
+        "  return TfidfVectorizer(stop_words= 'french', analyzer=stemmed_words_fr, max_df= max_df, min_df = min_df, max_features= numberOfFeatures)\n",
+        "\n",
+        "\n",
+        "def tokenize_fr_text(sentence):\n",
+        "  result = string.punctuation\n",
+        "  doc = nlp(sentence)\n",
+        "  return [X.text.lower() for X in doc if not X.text in stopWords and not X.text in result and not len(X.text) < 2]\n",
+        "\n",
+        "\n",
+        "def doc2vec(tagged_tr, max_epochs, doc2vec_vec_size, doc2vec_min_count ,  doc2vec_dm, doc2vec_workers):\n",
+        " \n",
+        "  stopWords = set(stopwords.words('french'))\n",
+        "  #tagged_tr = [TaggedDocument(words = tokenize_fr_text(_d),tags = [str(i)]) for i, _d in enumerate(data)]\n",
+        "  model = Doc2Vec(vector_size=doc2vec_vec_size, min_count = doc2vec_min_count, dm = doc2vec_dm, workers = doc2vec_workers)\n",
+        "  model.build_vocab(tagged_tr)\n",
+        "  model.train(tagged_tr, total_examples=model.corpus_count, epochs = max_epochs)\n",
+        "  return model\n",
+        "  #return np.array([model.docvecs[str(i)] for i in range(len(tagged_tr))])\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Lc1DRh4b7mto"
+      },
+      "source": [
+        "## Load datasets"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ybiJYL0h3ahh"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LRKJzWmf3pCg"
+      },
+      "source": [
+        "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+        "df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nkRUCjiR84Qr"
+      },
+      "source": [
+        "## Vectorization\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6QQXybaQ3pE9"
+      },
+      "source": [
+        "data_train = df_train[columnText].tolist()\n",
+        "vectorizer_dic = {}\n",
+        "\n",
+        "\n",
+        "nlp = spacy.load(\"fr_core_news_sm\")\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "stemmer = SnowballStemmer('french').stem\n",
+        "def stem_tokenize(text):\n",
+        "  return [stemmer(i) for i in word_tokenize(text) if not i in stop_words]\n",
+        "\n",
+        "for vectorizer_name in vectorizer_list:\n",
+        "\n",
+        "  vec_file_name = vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "  if os.path.isfile(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name):\n",
+        "    \n",
+        "    # load existing vectorizers \n",
+        "    with open(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name, 'rb') as file:\n",
+        "      vectorizer = pickle.load(file)\n",
+        "  \n",
+        "  else :\n",
+        "\n",
+        "    if vectorizer_name == \"bagofwords\" :\n",
+        "      #vectorizer = count_vect(data_train, vectorization_max_df, vectorization_min_df, vectorization_numberOfFeatures)\n",
+        "      vectorizer = CountVectorizer(analyzer = \"word\", lowercase=True, token_pattern='[a-zA-Z0-9]+', strip_accents='unicode',tokenizer=stem_tokenize)\n",
+        "      vectorizer.fit(data_train)\n",
+        "\n",
+        "    if vectorizer_name == \"tf_idf\" :\n",
+        "      #vectorizer = tf_idf(data_train, vectorization_max_df, vectorization_min_df, vectorization_numberOfFeatures)   \n",
+        "      vectorizer = TfidfVectorizer(analyzer='word', lowercase=True, token_pattern='[a-zA-Z0-9]+', strip_accents='unicode',tokenizer=stem_tokenize)\n",
+        "      vectorizer.fit(data_train)\n",
+        "\n",
+        "    if vectorizer_name == \"doc2vec\" :\n",
+        "      stopWords = set(stopwords.words('french'))\n",
+        "      tagged_tr = [TaggedDocument(words = tokenize_fr_text(_d),tags = [str(i)]) for i, _d in enumerate(data_train)]\n",
+        "      vectorizer = doc2vec(tagged_tr, max_epochs, doc2vec_vec_size, doc2vec_min_count, doc2vec_dm, doc2vec_workers)\n",
+        "      \n",
+        "    # saving vectorizer\n",
+        "    with open(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name, 'wb') as file:\n",
+        "      pickle.dump(vectorizer, file)\n",
+        "    \n",
+        "  vectorizer_dic[vectorizer_name] = vectorizer    "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wntk5s8c88w5"
+      },
+      "source": [
+        "## Training classifier"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "rx_0eV-M3pHc"
+      },
+      "source": [
+        "classifier_dic = {}\n",
+        "grid_param = {}\n",
+        "\n",
+        "for classifier_name in classifier_list:\n",
+        "  if classifier_name == \"bayes\":\n",
+        "    classifier_dic[classifier_name] = MultinomialNB()\n",
+        "  elif classifier_name == \"lr\":\n",
+        "    classifier_dic[classifier_name] = LogisticRegression()\n",
+        "    grid_param[classifier_name] = {\"C\":np.logspace(-3,3,7)}\n",
+        "  elif classifier_name == \"sgd\":\n",
+        "    classifier_dic[classifier_name] = SGDClassifier()\n",
+        "    grid_param[classifier_name] = { \"loss\" : [\"log\", \"modified_huber\"]}\n",
+        "  elif classifier_name == \"svm\":\n",
+        "    classifier_dic[classifier_name] = SVC()\n",
+        "    grid_param[classifier_name] = {'kernel':['linear','rbf']}\n",
+        "  elif classifier_name == \"rfc\":\n",
+        "    classifier_dic[classifier_name] = RandomForestClassifier()\n",
+        "    grid_param[classifier_name] = { 'max_features': ['sqrt', 'log2'], 'max_depth' : [4,5,6,7,8]}\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pO7oyeAF7KPK"
+      },
+      "source": [
+        "for clf_name, clf in classifier_dic.items():\n",
+        "  if clf_name != 'bayes' :\n",
+        "    clf = GridSearchCV(clf, grid_param[clf_name], refit = True, verbose = 3, n_jobs=-1)\n",
+        "\n",
+        "  for vec_name, vectorizer in vectorizer_dic.items():\n",
+        "\n",
+        "    if vec_name != 'doc2vec' :\n",
+        "      vec_data = vectorizer.transform(data_train)\n",
+        "    else : \n",
+        "      vec_data = np.array([vectorizer.docvecs[str(i)] for i in range(len(tagged_tr))])\n",
+        "\n",
+        "    clf.fit(vec_data, df_train[columnClass])\n",
+        "\n",
+        "    clf_file_name = clf_name + '_' + vec_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "\n",
+        "    # saving classifier\n",
+        "    with open(\"drive/MyDrive/Classification-EDdA/\"+clf_file_name, 'wb') as file:\n",
+        "      pickle.dump(clf, file)\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_evrNjmZ9E0e"
+      },
+      "source": [
+        "## Evaluation\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TfKAjtVFblYe"
+      },
+      "source": [
+        "dataset_name = [\"validation\", \"test\"]"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "h8vZar8c7KRq",
+        "outputId": "83511c89-9219-43d1-9e5a-820e75012166"
+      },
+      "source": [
+        "for dataset in dataset_name:\n",
+        "  df_eval = pd.read_csv(dataset+\"_set.tsv\", sep=\"\\t\")\n",
+        "  data_eval = df_eval[columnText].tolist()\n",
+        "\n",
+        "  for classifier_name in classifier_list:\n",
+        "\n",
+        "    for vectorizer_name in vectorizer_list:\n",
+        "\n",
+        "      clf_file_name = classifier_name + '_' + vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "      with open(\"drive/MyDrive/Classification-EDdA/\"+clf_file_name, 'rb') as file:\n",
+        "        clf = pickle.load(file)\n",
+        "\n",
+        "      vec_file_name = vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "      with open(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name, 'rb') as file:\n",
+        "        vectorizer = pickle.load(file)\n",
+        "\n",
+        "      if vectorizer_name != 'doc2vec' :\n",
+        "        vec_data = vectorizer.transform(data_eval)\n",
+        "      else : \n",
+        "        tagged_test = [TaggedDocument(words=tokenize_fr_text(_d), tags = [str(i)]) for i, _d in enumerate(data_eval)]\n",
+        "        vec_data = np.array([vectorizer.infer_vector(tagged_test[i][0]) for i in range(len(tagged_test))])\n",
+        "\n",
+        "\n",
+        "      y_pred = clf.predict(vec_data)\n",
+        "\n",
+        "\n",
+        "      report = classification_report(y_pred, df_eval[columnClass], output_dict = True)\n",
+        "      precision = []\n",
+        "      recall = []\n",
+        "      f1 = []\n",
+        "      support = []\n",
+        "      dff = pd.DataFrame(columns= ['class', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+        "      for c in df_eval[columnClass].unique() :\n",
+        "        precision.append(report[c]['precision'])\n",
+        "        recall.append(report[c]['recall'])\n",
+        "        f1.append(report[c]['f1-score'])\n",
+        "        support.append(report[c]['support'])\n",
+        "\n",
+        "      accuracy = report['accuracy']\n",
+        "      weighted_avg = report['weighted avg']\n",
+        "      cnf_matrix = confusion_matrix(df_eval[columnClass], y_pred)\n",
+        "      FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+        "      FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+        "      TP = np.diag(cnf_matrix)\n",
+        "      TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+        "\n",
+        "      dff['class'] = df_eval[columnClass].unique()\n",
+        "      dff['precision'] = precision\n",
+        "      dff['recall'] = recall\n",
+        "      dff['f1-score'] = f1\n",
+        "      dff['support'] = support\n",
+        "      dff['FP'] = FP\n",
+        "      dff['FN'] = FN\n",
+        "      dff['TP'] = TP\n",
+        "      dff['TN'] = TN\n",
+        "\n",
+        "\n",
+        "      print(dataset+\"_\"+classifier_name+'_' + vectorizer_name+\"_s\"+str(maxOfInstancePerClass))\n",
+        "\n",
+        "      print(weighted_avg)\n",
+        "      print(accuracy)\n",
+        "      print(dff)\n",
+        "\n",
+        "      dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_\"+dataset+\"_\"+classifier_name+'_' + vectorizer_name+\"_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)\n",
+        "\n"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "validation_bayes_bagofwords_s10000\n",
+            "{'precision': 0.8377945389222964, 'recall': 0.619530464967571, 'f1-score': 0.6842670335331308, 'support': 10947}\n",
+            "0.619530464967571\n",
+            "                                          class  precision  ...    TP     TN\n",
+            "0                         Droit - Jurisprudence   0.963590  ...     5  10735\n",
+            "1                                     Grammaire   0.321888  ...    46  10760\n",
+            "2                            Histoire naturelle   0.938776  ...    55  10665\n",
+            "3                                      Commerce   0.310249  ...    42  10679\n",
+            "4                                    GÃ©ographie   0.958193  ...     0  10839\n",
+            "5                                  Architecture   0.158491  ...     0  10863\n",
+            "6                                       Monnaie   0.000000  ...     4  10751\n",
+            "7                          MÃ©decine - Chirurgie   0.735981  ...     3  10860\n",
+            "8                                       MÃ©tiers   0.917495  ...     0  10925\n",
+            "9               Militaire (Art) - Guerre - Arme   0.182186  ...     1  10845\n",
+            "10                                     Anatomie   0.245989  ...     1  10853\n",
+            "11                                          Jeu   0.000000  ...   112  10553\n",
+            "12                                    Pharmacie   0.000000  ...  1138   9191\n",
+            "13                                    AntiquitÃ©   0.209125  ...     0  10921\n",
+            "14                      Belles-lettres - PoÃ©sie   0.020513  ...   150  10358\n",
+            "15              Agriculture - Economie rustique   0.023585  ...  2269   8114\n",
+            "16                                MathÃ©matiques   0.142857  ...   357   9728\n",
+            "17                                   Beaux-arts   0.000000  ...   874   9278\n",
+            "18  Physique - [Sciences physico-mathÃ©matiques]   0.364372  ...     0  10893\n",
+            "19                                       Marine   0.410468  ...   149  10579\n",
+            "20                                       Chasse   0.009804  ...     5  10850\n",
+            "21                              Arts et mÃ©tiers   0.000000  ...    18  10819\n",
+            "22                                     Religion   0.526646  ...     0  10912\n",
+            "23                                       Blason   0.034483  ...    45  10699\n",
+            "24                                        PÃªche   0.025641  ...     0  10926\n",
+            "25                                     Histoire   0.603041  ...     0  10886\n",
+            "26                           MarÃ©chage - ManÃ¨ge   0.051546  ...    11  10814\n",
+            "27                                       Mesure   0.000000  ...     0  10924\n",
+            "28                          Economie domestique   0.000000  ...   315  10264\n",
+            "29                                  Philosophie   0.000000  ...   923   8722\n",
+            "30                                 Superstition   0.000000  ...     0  10888\n",
+            "31                                       Chimie   0.010638  ...     0  10854\n",
+            "32                                    MÃ©dailles   0.000000  ...    90  10659\n",
+            "33                                      Musique   0.082707  ...     0  10925\n",
+            "34                                   CaractÃ¨res   0.000000  ...     1  10908\n",
+            "35                                    Spectacle   0.000000  ...   168  10570\n",
+            "36                                  MinÃ©ralogie   0.000000  ...     0  10938\n",
+            "37                                    Politique   0.000000  ...     0  10926\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "validation_bayes_tf_idf_s10000\n",
+            "{'precision': 0.9361172330822201, 'recall': 0.48853567187357266, 'f1-score': 0.6289575972884817, 'support': 10947}\n",
+            "0.48853567187357266\n",
+            "                                          class  precision  ...    TP     TN\n",
+            "0                         Droit - Jurisprudence   0.922100  ...     0  10735\n",
+            "1                                     Grammaire   0.000000  ...     7  10760\n",
+            "2                            Histoire naturelle   0.888292  ...     0  10684\n",
+            "3                                      Commerce   0.036011  ...     1  10682\n",
+            "4                                    GÃ©ographie   0.995777  ...     0  10839\n",
+            "5                                  Architecture   0.003774  ...     0  10863\n",
+            "6                                       Monnaie   0.000000  ...     0  10752\n",
+            "7                          MÃ©decine - Chirurgie   0.221963  ...     0  10860\n",
+            "8                                       MÃ©tiers   0.903579  ...     0  10925\n",
+            "9               Militaire (Art) - Guerre - Arme   0.004049  ...     0  10845\n",
+            "10                                     Anatomie   0.037433  ...     0  10853\n",
+            "11                                          Jeu   0.000000  ...    13  10585\n",
+            "12                                    Pharmacie   0.000000  ...  1089   9047\n",
+            "13                                    AntiquitÃ©   0.000000  ...     0  10921\n",
+            "14                      Belles-lettres - PoÃ©sie   0.000000  ...     0  10481\n",
+            "15              Agriculture - Economie rustique   0.000000  ...  2358   5636\n",
+            "16                                MathÃ©matiques   0.000000  ...    14  10349\n",
+            "17                                   Beaux-arts   0.000000  ...   827   9314\n",
+            "18  Physique - [Sciences physico-mathÃ©matiques]   0.004049  ...     0  10893\n",
+            "19                                       Marine   0.088154  ...    32  10583\n",
+            "20                                       Chasse   0.000000  ...     0  10850\n",
+            "21                              Arts et mÃ©tiers   0.000000  ...     0  10821\n",
+            "22                                     Religion   0.003135  ...     0  10912\n",
+            "23                                       Blason   0.000000  ...     1  10700\n",
+            "24                                        PÃªche   0.000000  ...     0  10926\n",
+            "25                                     Histoire   0.023649  ...     0  10886\n",
+            "26                           MarÃ©chage - ManÃ¨ge   0.000000  ...     0  10814\n",
+            "27                                       Mesure   0.000000  ...     0  10924\n",
+            "28                          Economie domestique   0.000000  ...    95  10502\n",
+            "29                                  Philosophie   0.000000  ...   909   8731\n",
+            "30                                 Superstition   0.000000  ...     0  10888\n",
+            "31                                       Chimie   0.000000  ...     0  10854\n",
+            "32                                    MÃ©dailles   0.000000  ...     1  10700\n",
+            "33                                      Musique   0.000000  ...     0  10925\n",
+            "34                                   CaractÃ¨res   0.000000  ...     0  10908\n",
+            "35                                    Spectacle   0.000000  ...     1  10628\n",
+            "36                                  MinÃ©ralogie   0.000000  ...     0  10938\n",
+            "37                                    Politique   0.000000  ...     0  10926\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "test_bayes_bagofwords_s10000\n",
+            "{'precision': 0.8343333806034451, 'recall': 0.6158940397350994, 'f1-score': 0.6801987597575112, 'support': 13137}\n",
+            "0.6158940397350994\n",
+            "                                          class  precision  ...    TP     TN\n",
+            "0                                      Histoire   0.579466  ...     3  12882\n",
+            "1                         Droit - Jurisprudence   0.953423  ...    44  12913\n",
+            "2                                    GÃ©ographie   0.953906  ...    58  12804\n",
+            "3                                       MÃ©tiers   0.922949  ...    48  12815\n",
+            "4                                  Architecture   0.150943  ...     0  13008\n",
+            "5                          MÃ©decine - Chirurgie   0.744639  ...     0  13037\n",
+            "6                                 MathÃ©matiques   0.225166  ...     2  12900\n",
+            "7                                     Grammaire   0.305357  ...     4  13032\n",
+            "8                                       Monnaie   0.000000  ...     0  13110\n",
+            "9                                      Commerce   0.327945  ...     1  13015\n",
+            "10                                     Anatomie   0.196429  ...     2  13025\n",
+            "11  Physique - [Sciences physico-mathÃ©matiques]   0.331081  ...   142  12652\n",
+            "12                                  Philosophie   0.000000  ...  1351  11028\n",
+            "13                      Belles-lettres - PoÃ©sie   0.008511  ...     0  13106\n",
+            "14              Militaire (Art) - Guerre - Arme   0.199324  ...   171  12399\n",
+            "15                                    AntiquitÃ©   0.183544  ...  2711   9779\n",
+            "16                           MarÃ©chage - ManÃ¨ge   0.008621  ...   412  11633\n",
+            "17                                       Chasse   0.008197  ...  1054  11199\n",
+            "18              Agriculture - Economie rustique   0.011811  ...     0  13072\n",
+            "19                           Histoire naturelle   0.942755  ...   185  12697\n",
+            "20                                     Religion   0.535248  ...     1  13021\n",
+            "21                                       Mesure   0.000000  ...    34  12983\n",
+            "22                                      Musique   0.062500  ...     0  13095\n",
+            "23                              Arts et mÃ©tiers   0.000000  ...    59  12838\n",
+            "24                                       Marine   0.425287  ...     0  13111\n",
+            "25                                       Blason   0.038095  ...     0  13064\n",
+            "26                                       Chimie   0.017857  ...    10  12976\n",
+            "27                          Economie domestique   0.000000  ...     0  13109\n",
+            "28                                   Beaux-arts   0.000000  ...   382  12312\n",
+            "29                                          Jeu   0.000000  ...  1114  10375\n",
+            "30                                        PÃªche   0.000000  ...     0  13066\n",
+            "31                                    Politique   0.000000  ...     0  13025\n",
+            "32                                  MinÃ©ralogie   0.000000  ...    98  12817\n",
+            "33                                    Pharmacie   0.000000  ...     0  13111\n",
+            "34                                 Superstition   0.000000  ...     0  13090\n",
+            "35                                   CaractÃ¨res   0.000000  ...   205  12686\n",
+            "36                                    MÃ©dailles   0.000000  ...     0  13126\n",
+            "37                                    Spectacle   0.000000  ...     0  13112\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "test_bayes_tf_idf_s10000\n",
+            "{'precision': 0.9374431375624079, 'recall': 0.4883915658065007, 'f1-score': 0.6291194809131295, 'support': 13137}\n",
+            "0.4883915658065007\n",
+            "                                          class  precision  ...    TP     TN\n",
+            "0                                      Histoire   0.018284  ...     0  12883\n",
+            "1                         Droit - Jurisprudence   0.928017  ...     3  12913\n",
+            "2                                    GÃ©ographie   0.997185  ...     0  12821\n",
+            "3                                       MÃ©tiers   0.906379  ...     0  12819\n",
+            "4                                  Architecture   0.000000  ...     0  13008\n",
+            "5                          MÃ©decine - Chirurgie   0.230019  ...     0  13037\n",
+            "6                                 MathÃ©matiques   0.000000  ...     0  12902\n",
+            "7                                     Grammaire   0.000000  ...     0  13032\n",
+            "8                                       Monnaie   0.000000  ...     0  13110\n",
+            "9                                      Commerce   0.036952  ...     0  13015\n",
+            "10                                     Anatomie   0.013393  ...     0  13025\n",
+            "11  Physique - [Sciences physico-mathÃ©matiques]   0.003378  ...    16  12701\n",
+            "12                                  Philosophie   0.000000  ...  1315  10852\n",
+            "13                      Belles-lettres - PoÃ©sie   0.000000  ...     0  13106\n",
+            "14              Militaire (Art) - Guerre - Arme   0.003378  ...     0  12577\n",
+            "15                                    AntiquitÃ©   0.000000  ...  2834   6749\n",
+            "16                           MarÃ©chage - ManÃ¨ge   0.000000  ...    13  12422\n",
+            "17                                       Chasse   0.000000  ...   978  11227\n",
+            "18              Agriculture - Economie rustique   0.000000  ...     0  13072\n",
+            "19                           Histoire naturelle   0.874776  ...    42  12702\n",
+            "20                                     Religion   0.002611  ...     0  13021\n",
+            "21                                       Mesure   0.000000  ...     0  12986\n",
+            "22                                      Musique   0.000000  ...     0  13095\n",
+            "23                              Arts et mÃ©tiers   0.000000  ...     1  12841\n",
+            "24                                       Marine   0.096552  ...     0  13111\n",
+            "25                                       Blason   0.000000  ...     0  13064\n",
+            "26                                       Chimie   0.000000  ...     0  12977\n",
+            "27                          Economie domestique   0.000000  ...     0  13109\n",
+            "28                                   Beaux-arts   0.000000  ...   118  12608\n",
+            "29                                          Jeu   0.000000  ...  1094  10439\n",
+            "30                                        PÃªche   0.000000  ...     0  13066\n",
+            "31                                    Politique   0.000000  ...     0  13025\n",
+            "32                                  MinÃ©ralogie   0.000000  ...     1  12840\n",
+            "33                                    Pharmacie   0.000000  ...     0  13111\n",
+            "34                                 Superstition   0.000000  ...     0  13090\n",
+            "35                                   CaractÃ¨res   0.000000  ...     1  12754\n",
+            "36                                    MÃ©dailles   0.000000  ...     0  13126\n",
+            "37                                    Spectacle   0.000000  ...     0  13112\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "mMiQo_sR7KWn"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/notebooks/EDdA_Classification_DeepLearning.ipynb b/notebooks/EDdA_Classification_DeepLearning.ipynb
new file mode 100644
index 0000000..d8e9ea6
--- /dev/null
+++ b/notebooks/EDdA_Classification_DeepLearning.ipynb
@@ -0,0 +1,1351 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "EDdA-Classification_DeepLearning.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0yFsoHXX8Iyy"
+      },
+      "source": [
+        "# Deep learning for EDdA classification"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EyksTV6277Jv"
+      },
+      "source": [
+        "## Configuration"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "G5LT5n9O7SLt"
+      },
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "validation_path = 'validation_set.tsv'\n",
+        "test_path =  'test_set.tsv'\n",
+        "\n",
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "minOfInstancePerClass = 0\n",
+        "maxOfInstancePerClass = 1500\n",
+        "\n",
+        "\n",
+        "batch_size = 64\n",
+        "max_len = 512 # \n",
+        "epochs = 20\n",
+        "embedding_dim = 300 "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tFlUCDL2778i"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Sp8d_Uus7SHJ",
+        "outputId": "82929364-d0a1-4962-fcb4-47224a48e6cf"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jQBu-p6hBU-j"
+      },
+      "source": [
+        "## Install packages"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bTIXsF6kBUdh"
+      },
+      "source": [
+        "#!pip install zeugma\n",
+        "#!pip install plot_model"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "56-04SNF8BMx"
+      },
+      "source": [
+        "## Import librairies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "HwWkSznz7SEv",
+        "outputId": "02ecbbf8-556f-4567-b57d-6e13a4ca28ff"
+      },
+      "source": [
+        "from nltk.tokenize import word_tokenize\n",
+        "import nltk\n",
+        "from nltk.corpus import stopwords\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')\n",
+        "\n",
+        "import keras\n",
+        "from keras import optimizers\n",
+        "from keras import backend as K\n",
+        "from keras import regularizers\n",
+        "from keras.models import Sequential\n",
+        "from keras.layers import Dense, Activation, Dropout, Flatten\n",
+        "from keras.layers import Embedding, Conv1D, MaxPooling1D, GlobalMaxPooling1D\n",
+        "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional\n",
+        "#from keras.utils import plot_model\n",
+        "from keras.preprocessing import sequence\n",
+        "from keras.preprocessing.text import Tokenizer\n",
+        "from keras.callbacks import EarlyStopping\n",
+        "\n",
+        "import string\n",
+        "import tensorflow as tf\n",
+        "#from zeugma import TextsToSequences\n",
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "from sklearn import preprocessing\n",
+        "from sklearn.metrics import classification_report\n",
+        "\n",
+        "\n",
+        "\n",
+        "from tqdm import tqdm\n",
+        "import requests, zipfile, io\n",
+        "import os, re, csv, math, codecs"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xrekV6W978l4"
+      },
+      "source": [
+        "## Utils functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4LJ5blQR7PUe"
+      },
+      "source": [
+        "\n",
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "  #random numberOfInstances elements\n",
+        "  replace = False  # with replacement\n",
+        "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "    \n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-Rh3JMDh7zYd"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MtLr35eM753e"
+      },
+      "source": [
+        "## Load Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FnbNT4NF7zal"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "WNqDms64lfaS"
+      },
+      "source": [
+        "# download FastText\n",
+        "zip_file_url = \"https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip\"\n",
+        "r = requests.get(zip_file_url)\n",
+        "z = zipfile.ZipFile(io.BytesIO(r.content))\n",
+        "z.extractall()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PGMIi0CAmqSd",
+        "outputId": "09c034fd-f689-43a9-fd75-5923906d89bf"
+      },
+      "source": [
+        "print('loading word embeddings...')\n",
+        "\n",
+        "embeddings_index = {}\n",
+        "f = codecs.open('crawl-300d-2M.vec', encoding='utf-8')\n",
+        "\n",
+        "for line in tqdm(f):\n",
+        "    values = line.rstrip().rsplit(' ')\n",
+        "    word = values[0]\n",
+        "    coefs = np.asarray(values[1:], dtype='float32')\n",
+        "    embeddings_index[word] = coefs\n",
+        "f.close()\n",
+        "\n",
+        "print('found %s word vectors' % len(embeddings_index))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "loading word embeddings...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "1999996it [03:40, 9087.22it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "found 1999996 word vectors\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nRLaQUO97zcq"
+      },
+      "source": [
+        "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+        "df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)\n",
+        "\n",
+        "df_validation = pd.read_csv(validation_path, sep=\"\\t\")\n",
+        "df_validation = resample_classes(df_validation, columnClass, maxOfInstancePerClass)\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vGWAgBH87ze8"
+      },
+      "source": [
+        "y_train  = df_train[columnClass]\n",
+        "y_validation = df_validation[columnClass]\n",
+        "numberOfClasses = y_train.nunique()\n",
+        "\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "\n",
+        "y_train = encoder.fit_transform(y_train)\n",
+        "y_validation = encoder.fit_transform(y_validation)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 452
+        },
+        "id": "7OYjo_uhoqcX",
+        "outputId": "79c4ff25-0476-4e12-d6ff-a8e073ee3f6c"
+      },
+      "source": [
+        "df_validation.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>normClass</th>\n",
+              "      <th>classEDdA</th>\n",
+              "      <th>author</th>\n",
+              "      <th>id_enccre</th>\n",
+              "      <th>domaine_enccre</th>\n",
+              "      <th>ensemble_domaine_enccre</th>\n",
+              "      <th>content</th>\n",
+              "      <th>contentWithoutClass</th>\n",
+              "      <th>firstParagraph</th>\n",
+              "      <th>nb_word</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th rowspan=\"5\" valign=\"top\">0</th>\n",
+              "      <th>10449</th>\n",
+              "      <td>14</td>\n",
+              "      <td>2879</td>\n",
+              "      <td>Sabler une allÃ©e</td>\n",
+              "      <td>Jardinage</td>\n",
+              "      <td>terme de Jardinier.</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>v14-1651-1</td>\n",
+              "      <td>jardinage</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nSabler une allÃ©e, (terme de Jardinier.) c'es...</td>\n",
+              "      <td>\\nSabler une allÃ©e, () c'est couvrir \\navec ar...</td>\n",
+              "      <td>\\nSabler une allÃ©e, () c'est couvrir \\navec ar...</td>\n",
+              "      <td>70</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8134</th>\n",
+              "      <td>17</td>\n",
+              "      <td>1598</td>\n",
+              "      <td>VolÃ©e</td>\n",
+              "      <td>Jardinage</td>\n",
+              "      <td>Jardin.</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>v17-842-3</td>\n",
+              "      <td>jardinage</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nVolÃ©e, (Jardin.) c'est le nom qu'on donne au...</td>\n",
+              "      <td>\\nVolÃ©e, () c'est le nom qu'on donne au travai...</td>\n",
+              "      <td>\\nVolÃ©e, () c'est le nom qu'on donne au travai...</td>\n",
+              "      <td>48</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5308</th>\n",
+              "      <td>13</td>\n",
+              "      <td>2051</td>\n",
+              "      <td>PRUNELLIER</td>\n",
+              "      <td>Jardinage</td>\n",
+              "      <td>Jardinage.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v13-1146-0</td>\n",
+              "      <td>jardinage</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nPRUNELLIER, s. m. (Jardinage.) arbrisseau Ã©p...</td>\n",
+              "      <td>\\nPRUNELLIER, s. m. () arbrisseau Ã©pineux qui ...</td>\n",
+              "      <td>\\nPRUNELLIER, s. m. () arbrisseau Ã©pineux qui ...</td>\n",
+              "      <td>275</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10064</th>\n",
+              "      <td>9</td>\n",
+              "      <td>3775</td>\n",
+              "      <td>MACQUE</td>\n",
+              "      <td>Economie rustique</td>\n",
+              "      <td>Econ. rustiq.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v9-2286-0</td>\n",
+              "      <td>economierustique</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nMACQUE, s. f. (Econ. rustiq.) instrument de\\...</td>\n",
+              "      <td>\\nMACQUE, s. f. () instrument de\\nbois dont on...</td>\n",
+              "      <td>\\nMACQUE, s. f. () instrument de\\nbois dont on...</td>\n",
+              "      <td>23</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5981</th>\n",
+              "      <td>9</td>\n",
+              "      <td>3262</td>\n",
+              "      <td>LOQUE</td>\n",
+              "      <td>Jardinage</td>\n",
+              "      <td>Jardinage.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v9-1905-0</td>\n",
+              "      <td>jardinage</td>\n",
+              "      <td>Agriculture - Economie rustique</td>\n",
+              "      <td>\\nLOQUE, s. f. (Jardinage.) terme de jardinage...</td>\n",
+              "      <td>\\nLOQUE, s. f. () terme de jardinage,\\nqui n'e...</td>\n",
+              "      <td>\\nLOQUE, s. f. () terme de jardinage,\\nqui n'e...</td>\n",
+              "      <td>61</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "         volume  ...  nb_word\n",
+              "0 10449      14  ...       70\n",
+              "  8134       17  ...       48\n",
+              "  5308       13  ...      275\n",
+              "  10064       9  ...       23\n",
+              "  5981        9  ...       61\n",
+              "\n",
+              "[5 rows x 13 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HuUVfklf-dSR"
+      },
+      "source": [
+        "## Training models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "NTNh6kMTp_eU",
+        "outputId": "3c1eb88c-7f1d-48f1-92bc-bc671f5e1bc1"
+      },
+      "source": [
+        "#https://github.com/emmanuellaanggi/disaster_tweet_sentiment/blob/master/(Medium)_Text_Classification_Disaster_Tweet_.ipynb\n",
+        "\n",
+        "raw_docs_train = df_train[columnText].tolist()\n",
+        "raw_docs_validation = df_validation[columnText].tolist() \n",
+        "\n",
+        "\n",
+        "print(\"pre-processing train data...\")\n",
+        "\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "processed_docs_train = []\n",
+        "for doc in tqdm(raw_docs_train):\n",
+        "    tokens = word_tokenize(doc, language='french')\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_train.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "processed_docs_validation = []\n",
+        "for doc in tqdm(raw_docs_validation):\n",
+        "    tokens = word_tokenize(doc)\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_validation.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "print(\"tokenizing input data...\")\n",
+        "tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
+        "tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
+        "word_seq_train = tokenizer.texts_to_sequences(processed_docs_train)\n",
+        "word_seq_validation = tokenizer.texts_to_sequences(processed_docs_validation)\n",
+        "word_index = tokenizer.word_index\n",
+        "print(\"dictionary size: \", len(word_index))\n",
+        "\n",
+        "#pad sequences\n",
+        "word_seq_train = sequence.pad_sequences(word_seq_train, maxlen=max_len)\n",
+        "word_seq_validation = sequence.pad_sequences(word_seq_validation, maxlen=max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "pre-processing train data...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 21129/21129 [00:15<00:00, 1359.31it/s]\n",
+            "100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10079/10079 [00:07<00:00, 1378.11it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tokenizing input data...\n",
+            "dictionary size:  95254\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Wj8RkOhT_e2c",
+        "outputId": "56152da7-47b7-4b07-84e7-8c499671d53e"
+      },
+      "source": [
+        "word_seq_validation"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([[  0,   0,   0, ..., 293,   8,   7],\n",
+              "       [  0,   0,   0, ..., 112,   8,   7],\n",
+              "       [  0,   0,   0, ..., 498, 212,   4],\n",
+              "       ...,\n",
+              "       [  0,   0,   0, ...,   1,  28,  45],\n",
+              "       [  0,   0,   0, ...,  67,  12, 460],\n",
+              "       [  0,   0,   0, ..., 188, 213,  37]], dtype=int32)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wGjQI0YgpQAS",
+        "outputId": "43a3d902-5a8d-4159-a21e-419b5ee35d7d"
+      },
+      "source": [
+        "#embedding matrix\n",
+        "\n",
+        "print('preparing embedding matrix...')\n",
+        "\n",
+        "words_not_found = []\n",
+        "nb_words = min(max_len, len(word_index)+1)\n",
+        "embedding_matrix = np.zeros((nb_words, embedding_dim))\n",
+        "\n",
+        "for word, i in word_index.items():\n",
+        "    if i >= nb_words:\n",
+        "        continue\n",
+        "    embedding_vector = embeddings_index.get(word)\n",
+        "    if (embedding_vector is not None) and len(embedding_vector) > 0:\n",
+        "        # words not found in embedding index will be all-zeros.\n",
+        "        embedding_matrix[i] = embedding_vector\n",
+        "    else:\n",
+        "        words_not_found.append(word)\n",
+        "print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "preparing embedding matrix...\n",
+            "number of null word embeddings: 70\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hjaeYIZCtGca",
+        "outputId": "5ab4dd1a-a500-479f-e289-892242c83de8"
+      },
+      "source": [
+        "print(\"sample words not found: \", np.random.choice(words_not_found, 10))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "sample words not found:  ['especes' \"d'argent\" \"d'oÃ¹\" \"d'argent\" \"qu'elle\" \"qu'elle\" \"c'Ã©toit\"\n",
+            " 'diffÃ©rens' 'faisoit' 'faisoit']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4O0gnsX8pNVU",
+        "outputId": "46feba64-b608-4b53-de15-b586dc24b880"
+      },
+      "source": [
+        "from keras.layers import BatchNormalization\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "model = tf.keras.Sequential()\n",
+        "\n",
+        "model.add(Embedding(nb_words,embedding_dim,input_length=max_len, weights=[embedding_matrix],trainable=False))\n",
+        "model.add(Bidirectional(LSTM(100)))\n",
+        "model.add(Dense(64,activation='relu'))\n",
+        "model.add(Dropout(0.2))\n",
+        "#model.add(Dense(numberOfClasses,activation='sigmoid'))\n",
+        "model.add(Dense(numberOfClasses,activation='softmax'))\n",
+        "model.summary()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Model: \"sequential\"\n",
+            "_________________________________________________________________\n",
+            " Layer (type)                Output Shape              Param #   \n",
+            "=================================================================\n",
+            " embedding (Embedding)       (None, 512, 300)          153600    \n",
+            "                                                                 \n",
+            " bidirectional (Bidirectiona  (None, 200)              320800    \n",
+            " l)                                                              \n",
+            "                                                                 \n",
+            " dense (Dense)               (None, 64)                12864     \n",
+            "                                                                 \n",
+            " dropout (Dropout)           (None, 64)                0         \n",
+            "                                                                 \n",
+            " dense_1 (Dense)             (None, 38)                2470      \n",
+            "                                                                 \n",
+            "=================================================================\n",
+            "Total params: 489,734\n",
+            "Trainable params: 336,134\n",
+            "Non-trainable params: 153,600\n",
+            "_________________________________________________________________\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GcfMJl8f-cBA"
+      },
+      "source": [
+        "\n",
+        "#model = NN_withEmbeddings(longueur_dict, embedding_dim, max_len, numberOfClasses)\n",
+        "\n",
+        "model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
+        "#model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.AUC(multi_label=True)])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OTQTH5VDuA3I",
+        "outputId": "b8286232-4938-4591-b483-6b6d1bdc015e"
+      },
+      "source": [
+        "#model.fit(padded, np.array(y_train), epochs=epochs, batch_size = batch_size) \n",
+        "model.fit(word_seq_train, y_train, batch_size=256, epochs=epochs, validation_data=(word_seq_validation, y_validation), shuffle=True)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/20\n",
+            "83/83 [==============================] - 530s 6s/step - loss: 3.0575 - accuracy: 0.1886 - val_loss: 2.2493 - val_accuracy: 0.4315\n",
+            "Epoch 2/20\n",
+            "83/83 [==============================] - 525s 6s/step - loss: 2.4420 - accuracy: 0.3559 - val_loss: 1.9674 - val_accuracy: 0.4978\n",
+            "Epoch 3/20\n",
+            "83/83 [==============================] - 538s 6s/step - loss: 2.1828 - accuracy: 0.4177 - val_loss: 1.8540 - val_accuracy: 0.5212\n",
+            "Epoch 4/20\n",
+            "83/83 [==============================] - 515s 6s/step - loss: 2.0359 - accuracy: 0.4555 - val_loss: 1.7155 - val_accuracy: 0.5439\n",
+            "Epoch 5/20\n",
+            "83/83 [==============================] - 533s 6s/step - loss: 1.9296 - accuracy: 0.4800 - val_loss: 1.6698 - val_accuracy: 0.5502\n",
+            "Epoch 6/20\n",
+            "83/83 [==============================] - 521s 6s/step - loss: 1.8527 - accuracy: 0.4990 - val_loss: 1.6268 - val_accuracy: 0.5634\n",
+            "Epoch 7/20\n",
+            "83/83 [==============================] - 517s 6s/step - loss: 1.7960 - accuracy: 0.5127 - val_loss: 1.6098 - val_accuracy: 0.5664\n",
+            "Epoch 8/20\n",
+            "83/83 [==============================] - 506s 6s/step - loss: 1.7429 - accuracy: 0.5213 - val_loss: 1.5687 - val_accuracy: 0.5741\n",
+            "Epoch 9/20\n",
+            "83/83 [==============================] - 524s 6s/step - loss: 1.6994 - accuracy: 0.5328 - val_loss: 1.5799 - val_accuracy: 0.5761\n",
+            "Epoch 10/20\n",
+            "83/83 [==============================] - 531s 6s/step - loss: 1.6568 - accuracy: 0.5426 - val_loss: 1.5366 - val_accuracy: 0.5874\n",
+            "Epoch 11/20\n",
+            "83/83 [==============================] - 515s 6s/step - loss: 1.6147 - accuracy: 0.5525 - val_loss: 1.5965 - val_accuracy: 0.5639\n",
+            "Epoch 12/20\n",
+            "83/83 [==============================] - 506s 6s/step - loss: 1.5833 - accuracy: 0.5601 - val_loss: 1.5263 - val_accuracy: 0.5880\n",
+            "Epoch 13/20\n",
+            "83/83 [==============================] - 505s 6s/step - loss: 1.5477 - accuracy: 0.5694 - val_loss: 1.5200 - val_accuracy: 0.5889\n",
+            "Epoch 14/20\n",
+            "83/83 [==============================] - 498s 6s/step - loss: 1.5119 - accuracy: 0.5776 - val_loss: 1.5272 - val_accuracy: 0.5887\n",
+            "Epoch 15/20\n",
+            "83/83 [==============================] - 500s 6s/step - loss: 1.4732 - accuracy: 0.5852 - val_loss: 1.5367 - val_accuracy: 0.5897\n",
+            "Epoch 16/20\n",
+            "83/83 [==============================] - 501s 6s/step - loss: 1.4471 - accuracy: 0.5914 - val_loss: 1.5411 - val_accuracy: 0.5832\n",
+            "Epoch 17/20\n",
+            "83/83 [==============================] - 501s 6s/step - loss: 1.4036 - accuracy: 0.6039 - val_loss: 1.5438 - val_accuracy: 0.5893\n",
+            "Epoch 18/20\n",
+            "83/83 [==============================] - 501s 6s/step - loss: 1.3778 - accuracy: 0.6075 - val_loss: 1.5547 - val_accuracy: 0.5825\n",
+            "Epoch 19/20\n",
+            "83/83 [==============================] - 502s 6s/step - loss: 1.3452 - accuracy: 0.6159 - val_loss: 1.5920 - val_accuracy: 0.5753\n",
+            "Epoch 20/20\n",
+            "83/83 [==============================] - 501s 6s/step - loss: 1.3247 - accuracy: 0.6223 - val_loss: 1.5850 - val_accuracy: 0.5773\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<keras.callbacks.History at 0x7f4269526a90>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 17
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Uw6YR76p_AF0"
+      },
+      "source": [
+        "## Saving models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ykTp9lyRaAma"
+      },
+      "source": [
+        "model.save(\"drive/MyDrive/Classification-EDdA/lstm_fasttext_s\"+str(maxOfInstancePerClass)+\".h5\")\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5J4xDoqRUSfS"
+      },
+      "source": [
+        "# save embeddings\n",
+        "\n",
+        "# saving embeddings index \n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HHlEtipG_Cp0"
+      },
+      "source": [
+        "## Loading models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fKt8ft1t_Cxx"
+      },
+      "source": [
+        "model = keras.models.load_model(\"drive/MyDrive/Classification-EDdA/lstm_fasttext_s\"+str(maxOfInstancePerClass)+\".h5\")\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zbS4poso-3k7"
+      },
+      "source": [
+        "## Evaluation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "G9pjdMdNW_KS"
+      },
+      "source": [
+        "predictions = model.predict(word_seq_validation)\n",
+        "predictions = np.argmax(predictions,axis=1)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "IHpVJ79IW_M0",
+        "outputId": "78e2a1aa-d35c-428c-e6c3-0ad332abcdfd"
+      },
+      "source": [
+        "report = classification_report(predictions, y_validation, output_dict = True)\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.5773390217283461 {'precision': 0.5977985581006744, 'recall': 0.5773390217283461, 'f1-score': 0.5808733866443131, 'support': 10079}\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "9SKjWffUW_PC"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LpgkGq-fW_RN"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4gGNaPY1iuXD"
+      },
+      "source": [
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "y_test = encoder.fit_transform(df_test[columnClass])\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "P67p7BUZiuZV",
+        "outputId": "f958a063-ee95-4157-fcd9-796991615f03"
+      },
+      "source": [
+        "raw_docs_test = df_test[columnText].tolist()\n",
+        "\n",
+        "print(\"pre-processing test data...\")\n",
+        "\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "processed_docs_test = []\n",
+        "for doc in tqdm(raw_docs_test):\n",
+        "    tokens = word_tokenize(doc, language='french')\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_test.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "print(\"tokenizing input data...\")\n",
+        "#tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
+        "#tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
+        "word_seq_test = tokenizer.texts_to_sequences(processed_docs_test)\n",
+        "\n",
+        "#pad sequences\n",
+        "word_seq_test = sequence.pad_sequences(word_seq_test, maxlen=max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "pre-processing test data...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 13137/13137 [00:09<00:00, 1317.07it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tokenizing input data...\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "czeIqlD5iudH"
+      },
+      "source": [
+        "predictions = model.predict(word_seq_test)\n",
+        "predictions = np.argmax(predictions,axis=1)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Q9eYqi5SW_Ta",
+        "outputId": "3682a42a-7c07-446e-d913-3d20640fb2bf"
+      },
+      "source": [
+        "report = classification_report(predictions, y_test, output_dict = True)\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.5957220065463956 {'precision': 0.6075119377257042, 'recall': 0.5957220065463956, 'f1-score': 0.59493432234528, 'support': 13137}\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ra4FOHVniwUI",
+        "outputId": "cbe576f6-ce14-49ef-9aba-2d26f76cab92"
+      },
+      "source": [
+        "from sklearn.metrics import confusion_matrix\n",
+        "\n",
+        "classesName = encoder.classes_\n",
+        "classes = [str(e) for e in encoder.transform(encoder.classes_)]\n",
+        "\n",
+        "precision = []\n",
+        "recall = []\n",
+        "f1 = []\n",
+        "support = []\n",
+        "dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+        "for c in classes:\n",
+        "  precision.append(report[c]['precision'])\n",
+        "  recall.append(report[c]['recall'])\n",
+        "  f1.append(report[c]['f1-score'])\n",
+        "  support.append(report[c]['support'])\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "\n",
+        "cnf_matrix = confusion_matrix(y_test, predictions)\n",
+        "FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+        "FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+        "TP = np.diag(cnf_matrix)\n",
+        "TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+        "\n",
+        "dff['className'] = classesName\n",
+        "dff['precision'] = precision\n",
+        "dff['recall'] = recall\n",
+        "dff['f1-score'] = f1\n",
+        "dff['support'] = support\n",
+        "dff['FP'] = FP\n",
+        "dff['FN'] = FN\n",
+        "dff['TP'] = TP\n",
+        "dff['TN'] = TN\n",
+        "\n",
+        "print(\"test_lstm_s\"+str(maxOfInstancePerClass))\n",
+        "\n",
+        "print(weighted_avg)\n",
+        "print(accuracy)\n",
+        "print(dff)\n",
+        "\n",
+        "dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_test_lstm_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "test_lstm_s1500\n",
+            "{'precision': 0.6075119377257042, 'recall': 0.5957220065463956, 'f1-score': 0.59493432234528, 'support': 13137}\n",
+            "0.5957220065463956\n",
+            "                                      className  precision  ...    TP     TN\n",
+            "0               Agriculture - Economie rustique   0.259843  ...    66  12780\n",
+            "1                                      Anatomie   0.446429  ...   100  12818\n",
+            "2                                     AntiquitÃ©   0.525316  ...   166  12425\n",
+            "3                                  Architecture   0.518868  ...   165  12597\n",
+            "4                               Arts et mÃ©tiers   0.007752  ...     1  13002\n",
+            "5                                    Beaux-arts   0.020000  ...     2  13016\n",
+            "6                       Belles-lettres - PoÃ©sie   0.200000  ...    47  12667\n",
+            "7                                        Blason   0.466667  ...    49  12908\n",
+            "8                                    CaractÃ¨res   0.074074  ...     2  13110\n",
+            "9                                        Chasse   0.262295  ...    32  12929\n",
+            "10                                       Chimie   0.348214  ...    39  12952\n",
+            "11                                     Commerce   0.524249  ...   227  12442\n",
+            "12                        Droit - Jurisprudence   0.750176  ...  1063  11473\n",
+            "13                          Economie domestique   0.000000  ...     0  13106\n",
+            "14                                    Grammaire   0.587500  ...   329  12094\n",
+            "15                                   GÃ©ographie   0.830753  ...  2361  10167\n",
+            "16                                     Histoire   0.459916  ...   327  11749\n",
+            "17                           Histoire naturelle   0.687835  ...   769  11871\n",
+            "18                                          Jeu   0.415385  ...    27  13034\n",
+            "19                                       Marine   0.708046  ...   308  12497\n",
+            "20                           MarÃ©chage - ManÃ¨ge   0.784483  ...    91  12991\n",
+            "21                                MathÃ©matiques   0.450331  ...    68  12922\n",
+            "22                                       Mesure   0.333333  ...    14  13078\n",
+            "23              Militaire (Art) - Guerre - Arme   0.510135  ...   151  12719\n",
+            "24                                  MinÃ©ralogie   0.000000  ...     0  13111\n",
+            "25                                      Monnaie   0.041096  ...     3  13057\n",
+            "26                                      Musique   0.525000  ...    84  12922\n",
+            "27                                    MÃ©dailles   0.000000  ...     0  13109\n",
+            "28                         MÃ©decine - Chirurgie   0.584795  ...   300  12279\n",
+            "29                                      MÃ©tiers   0.592378  ...   715  11248\n",
+            "30                                    Pharmacie   0.014085  ...     1  13065\n",
+            "31                                  Philosophie   0.160714  ...    18  12934\n",
+            "32  Physique - [Sciences physico-mathÃ©matiques]   0.533784  ...   158  12690\n",
+            "33                                    Politique   0.000000  ...     0  13111\n",
+            "34                                        PÃªche   0.127660  ...     6  13067\n",
+            "35                                     Religion   0.357702  ...   137  12580\n",
+            "36                                    Spectacle   0.000000  ...     0  13126\n",
+            "37                                 Superstition   0.000000  ...     0  13112\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "x03FC0D-iwWP"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gSVqcywgiwYH"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-T5LfFtwiwaV"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Yjd5c70_iwcY"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2UNjiHYliwes"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vLGTnit_W_V8"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "R-3lBXjDD9wE"
+      },
+      "source": [
+        "def predict(data, max_len):\n",
+        "  \n",
+        "  pad_sequ_test, _ = prepare_sequence(data, max_len)\n",
+        "  pred_labels_ = model.predict(pad_sequ_test)\n",
+        "\n",
+        "  return np.argmax(pred_labels_,axis=1)\n",
+        "\n",
+        "\n",
+        "def eval(data, labels, max_len):\n",
+        "  \n",
+        "  pred_labels_ = predict(data, max_len)\n",
+        "  report = classification_report(pred_labels_, labels, output_dict = True)\n",
+        "\n",
+        "  accuracy = report['accuracy']\n",
+        "  weighted_avg = report['weighted avg']\n",
+        "  \n",
+        "  print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6T3kAvKvExgc",
+        "outputId": "c6d4560e-fc64-4579-9adb-79c2e36d2386"
+      },
+      "source": [
+        "# evaluation sur le jeu de validation\n",
+        "eval(df_validation[columnText], y_validation, max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+            "  return np.array(self.texts_to_sequences(texts))\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "0.06925290207361841 {'precision': 0.09108131158125257, 'recall': 0.06925290207361841, 'f1-score': 0.06099084715237025, 'support': 10079}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pTDJA03_-8yu",
+        "outputId": "d8bcdf73-c4c3-4c88-b063-90bd1cad5122"
+      },
+      "source": [
+        "# evaluation sur le jeu de test\n",
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "#df_test = resample_classes(df_test, columnClass, maxOfInstancePerClass)\n",
+        "\n",
+        "y_test = df_test[columnClass]\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "y_test = encoder.fit_transform(y_test)\n",
+        "\n",
+        "eval(df_test[columnText], y_test, max_len)\n"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+            "  return np.array(self.texts_to_sequences(texts))\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "0.07231483595950369 {'precision': 0.081194635559303, 'recall': 0.07231483595950369, 'f1-score': 0.06322383877903374, 'support': 13137}\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/notebooks/EDdA_Classification_DeepLearning_2.ipynb b/notebooks/EDdA_Classification_DeepLearning_2.ipynb
new file mode 100644
index 0000000..444fc9a
--- /dev/null
+++ b/notebooks/EDdA_Classification_DeepLearning_2.ipynb
@@ -0,0 +1,1349 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "EDdA-Classification_DeepLearning_2.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0yFsoHXX8Iyy"
+      },
+      "source": [
+        "# Deep learning for EDdA classification"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EyksTV6277Jv"
+      },
+      "source": [
+        "## Configuration"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "G5LT5n9O7SLt"
+      },
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "validation_path = 'validation_set.tsv'\n",
+        "test_path =  'test_set.tsv'\n",
+        "\n",
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "minOfInstancePerClass = 0\n",
+        "maxOfInstancePerClass = 10000\n",
+        "\n",
+        "\n",
+        "batch_size = 64\n",
+        "max_len = 512 # \n",
+        "epochs = 20\n",
+        "embedding_dim = 300 "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tFlUCDL2778i"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Sp8d_Uus7SHJ",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "20e599da-b04f-4ed9-95b0-ce22c094eff0"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jQBu-p6hBU-j"
+      },
+      "source": [
+        "## Install packages"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bTIXsF6kBUdh"
+      },
+      "source": [
+        "#!pip install zeugma\n",
+        "#!pip install plot_model"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "56-04SNF8BMx"
+      },
+      "source": [
+        "## Import librairies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "HwWkSznz7SEv",
+        "outputId": "046fd487-180e-4c50-ae33-d5ccc122ef46"
+      },
+      "source": [
+        "from nltk.tokenize import word_tokenize\n",
+        "import nltk\n",
+        "from nltk.corpus import stopwords\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')\n",
+        "\n",
+        "import keras\n",
+        "from keras import optimizers\n",
+        "from keras import backend as K\n",
+        "from keras import regularizers\n",
+        "from keras.models import Sequential\n",
+        "from keras.layers import Dense, Activation, Dropout, Flatten\n",
+        "from keras.layers import Embedding, Conv1D, MaxPooling1D, GlobalMaxPooling1D\n",
+        "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional\n",
+        "#from keras.utils import plot_model\n",
+        "from keras.preprocessing import sequence\n",
+        "from keras.preprocessing.text import Tokenizer\n",
+        "from keras.callbacks import EarlyStopping\n",
+        "\n",
+        "import string\n",
+        "import tensorflow as tf\n",
+        "#from zeugma import TextsToSequences\n",
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "from sklearn import preprocessing\n",
+        "from sklearn.metrics import classification_report\n",
+        "\n",
+        "\n",
+        "\n",
+        "from tqdm import tqdm\n",
+        "import requests, zipfile, io\n",
+        "import os, re, csv, math, codecs"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xrekV6W978l4"
+      },
+      "source": [
+        "## Utils functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4LJ5blQR7PUe"
+      },
+      "source": [
+        "\n",
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "  #random numberOfInstances elements\n",
+        "  replace = False  # with replacement\n",
+        "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "    \n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-Rh3JMDh7zYd"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MtLr35eM753e"
+      },
+      "source": [
+        "## Load Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FnbNT4NF7zal"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "WNqDms64lfaS"
+      },
+      "source": [
+        "# download FastText\n",
+        "zip_file_url = \"https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip\"\n",
+        "r = requests.get(zip_file_url)\n",
+        "z = zipfile.ZipFile(io.BytesIO(r.content))\n",
+        "z.extractall()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PGMIi0CAmqSd",
+        "outputId": "f7f16180-fc1d-4163-c10b-0e7cae00b701"
+      },
+      "source": [
+        "print('loading word embeddings...')\n",
+        "\n",
+        "embeddings_index = {}\n",
+        "f = codecs.open('crawl-300d-2M.vec', encoding='utf-8')\n",
+        "\n",
+        "for line in tqdm(f):\n",
+        "    values = line.rstrip().rsplit(' ')\n",
+        "    word = values[0]\n",
+        "    coefs = np.asarray(values[1:], dtype='float32')\n",
+        "    embeddings_index[word] = coefs\n",
+        "f.close()\n",
+        "\n",
+        "print('found %s word vectors' % len(embeddings_index))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "loading word embeddings...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "1999996it [03:42, 9002.96it/s]"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "found 1999996 word vectors\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nRLaQUO97zcq"
+      },
+      "source": [
+        "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+        "df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)\n",
+        "\n",
+        "df_validation = pd.read_csv(validation_path, sep=\"\\t\")\n",
+        "df_validation = resample_classes(df_validation, columnClass, maxOfInstancePerClass)\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vGWAgBH87ze8"
+      },
+      "source": [
+        "y_train  = df_train[columnClass]\n",
+        "y_validation = df_validation[columnClass]\n",
+        "numberOfClasses = y_train.nunique()\n",
+        "\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "\n",
+        "y_train = encoder.fit_transform(y_train)\n",
+        "y_validation = encoder.fit_transform(y_validation)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "7OYjo_uhoqcX",
+        "outputId": "17cccba3-2878-4cf0-e86c-33a20510f0a4"
+      },
+      "source": [
+        "df_validation.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>normClass</th>\n",
+              "      <th>classEDdA</th>\n",
+              "      <th>author</th>\n",
+              "      <th>id_enccre</th>\n",
+              "      <th>domaine_enccre</th>\n",
+              "      <th>ensemble_domaine_enccre</th>\n",
+              "      <th>content</th>\n",
+              "      <th>contentWithoutClass</th>\n",
+              "      <th>firstParagraph</th>\n",
+              "      <th>nb_word</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>3</td>\n",
+              "      <td>3723</td>\n",
+              "      <td>Condition de Droit ou lÃ©gale</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v3-1814-8</td>\n",
+              "      <td>jurisprudence</td>\n",
+              "      <td>Droit - Jurisprudence</td>\n",
+              "      <td>\\nCondition de Droit ou lÃ©gale, est celle que\\...</td>\n",
+              "      <td>\\nCondition de Droit ou lÃ©gale, est celle que\\...</td>\n",
+              "      <td>\\nCondition de Droit ou lÃ©gale, est celle que\\...</td>\n",
+              "      <td>72</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>10</td>\n",
+              "      <td>177</td>\n",
+              "      <td>MANIER</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>Gramm.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v10-112-0</td>\n",
+              "      <td>grammaire</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nMANIER, v. act. (Gramm.) c'est ou toucher de...</td>\n",
+              "      <td>\\nMANIER, v. act. () c'est ou toucher de\\nla m...</td>\n",
+              "      <td>\\nMANIER, v. act. () c'est ou toucher de\\nla m...</td>\n",
+              "      <td>109</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>7</td>\n",
+              "      <td>1357</td>\n",
+              "      <td>GALAIQUE, galaÃ¯cos</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "      <td>Hist. nat.</td>\n",
+              "      <td>d'Holbach5</td>\n",
+              "      <td>v7-606-0</td>\n",
+              "      <td>histoirenaturelle</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "      <td>\\nGALAIQUE, galaÃ¯cos, s. f. (Hist. nat.) nom d...</td>\n",
+              "      <td>\\nGALAIQUE, galaÃ¯cos, s. f. () nom donnÃ© \\npar...</td>\n",
+              "      <td>\\nGALAIQUE, galaÃ¯cos, s. f. () nom donnÃ© \\npar...</td>\n",
+              "      <td>33</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>3</td>\n",
+              "      <td>3198</td>\n",
+              "      <td>Commis ambulant</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v3-1623-2</td>\n",
+              "      <td>commerce</td>\n",
+              "      <td>Commerce</td>\n",
+              "      <td>\\nCommis ambulant, est un commis dont l'emploi...</td>\n",
+              "      <td>\\nCommis ambulant, est un commis dont l'emploi...</td>\n",
+              "      <td>\\nCommis ambulant, est un commis dont l'emploi...</td>\n",
+              "      <td>43</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>17</td>\n",
+              "      <td>3047</td>\n",
+              "      <td>ZURMENTUM</td>\n",
+              "      <td>GÃ©ographie ancienne</td>\n",
+              "      <td>GÃ©og. anc.</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>v17-2047-0</td>\n",
+              "      <td>gÃ©ographie</td>\n",
+              "      <td>GÃ©ographie</td>\n",
+              "      <td>\\nZURMENTUM, (GÃ©og. anc.) ville de l'Afrique\\n...</td>\n",
+              "      <td>\\nZURMENTUM, () ville de l'Afrique\\npropre. Pt...</td>\n",
+              "      <td>\\nZURMENTUM, () ville de l'Afrique\\npropre. Pt...</td>\n",
+              "      <td>27</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   volume  numero  ...                                     firstParagraph nb_word\n",
+              "0       3    3723  ...  \\nCondition de Droit ou lÃ©gale, est celle que\\...      72\n",
+              "1      10     177  ...  \\nMANIER, v. act. () c'est ou toucher de\\nla m...     109\n",
+              "2       7    1357  ...  \\nGALAIQUE, galaÃ¯cos, s. f. () nom donnÃ© \\npar...      33\n",
+              "3       3    3198  ...  \\nCommis ambulant, est un commis dont l'emploi...      43\n",
+              "4      17    3047  ...  \\nZURMENTUM, () ville de l'Afrique\\npropre. Pt...      27\n",
+              "\n",
+              "[5 rows x 13 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 60
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HuUVfklf-dSR"
+      },
+      "source": [
+        "## Training models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "NTNh6kMTp_eU",
+        "outputId": "1ff499d7-a98e-47f9-815e-cbb13b5f307f"
+      },
+      "source": [
+        "#https://github.com/emmanuellaanggi/disaster_tweet_sentiment/blob/master/(Medium)_Text_Classification_Disaster_Tweet_.ipynb\n",
+        "\n",
+        "raw_docs_train = df_train[columnText].tolist()\n",
+        "raw_docs_validation = df_validation[columnText].tolist() \n",
+        "\n",
+        "\n",
+        "print(\"pre-processing train data...\")\n",
+        "\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "processed_docs_train = []\n",
+        "for doc in tqdm(raw_docs_train):\n",
+        "    tokens = word_tokenize(doc, language='french')\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_train.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "processed_docs_validation = []\n",
+        "for doc in tqdm(raw_docs_validation):\n",
+        "    tokens = word_tokenize(doc)\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_validation.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "print(\"tokenizing input data...\")\n",
+        "tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
+        "tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
+        "word_seq_train = tokenizer.texts_to_sequences(processed_docs_train)\n",
+        "word_seq_validation = tokenizer.texts_to_sequences(processed_docs_validation)\n",
+        "word_index = tokenizer.word_index\n",
+        "print(\"dictionary size: \", len(word_index))\n",
+        "\n",
+        "#pad sequences\n",
+        "word_seq_train = sequence.pad_sequences(word_seq_train, maxlen=max_len)\n",
+        "word_seq_validation = sequence.pad_sequences(word_seq_validation, maxlen=max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "pre-processing train data...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 30650/30650 [00:23<00:00, 1324.19it/s]\n",
+            "100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10947/10947 [00:08<00:00, 1355.66it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tokenizing input data...\n",
+            "dictionary size:  115205\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Wj8RkOhT_e2c",
+        "outputId": "7f486466-bf76-4b82-ed32-56c31ae6dc2f"
+      },
+      "source": [
+        "word_seq_validation"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([[  0,   0,   0, ...,   9,  64, 116],\n",
+              "       [  0,   0,   0, ..., 301,  57, 313],\n",
+              "       [  0,   0,   0, ...,   9, 285,   6],\n",
+              "       ...,\n",
+              "       [  0,   0,   0, ...,  26, 142,   6],\n",
+              "       [  0,   0,   0, ..., 333, 198,   2],\n",
+              "       [  0,   0,   0, ...,  24, 335,   1]], dtype=int32)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 62
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wGjQI0YgpQAS",
+        "outputId": "b2856dc6-793f-491e-8a90-bd5553f71933"
+      },
+      "source": [
+        "#embedding matrix\n",
+        "\n",
+        "print('preparing embedding matrix...')\n",
+        "\n",
+        "words_not_found = []\n",
+        "nb_words = min(max_len, len(word_index)+1)\n",
+        "embedding_matrix = np.zeros((nb_words, embedding_dim))\n",
+        "\n",
+        "for word, i in word_index.items():\n",
+        "    if i >= nb_words:\n",
+        "        continue\n",
+        "    embedding_vector = embeddings_index.get(word)\n",
+        "    if (embedding_vector is not None) and len(embedding_vector) > 0:\n",
+        "        # words not found in embedding index will be all-zeros.\n",
+        "        embedding_matrix[i] = embedding_vector\n",
+        "    else:\n",
+        "        words_not_found.append(word)\n",
+        "print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "preparing embedding matrix...\n",
+            "number of null word embeddings: 73\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hjaeYIZCtGca",
+        "outputId": "3ce480ec-21fa-4a94-f21d-586fd44c51bf"
+      },
+      "source": [
+        "print(\"sample words not found: \", np.random.choice(words_not_found, 10))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "sample words not found:  ['ptolomÃ©e' \"l'amÃ©rique\" \"l'une\" \"qu'on\" \"lorsqu'il\" \"aujourd'hui\"\n",
+            " \"c'Ã©toit\" \"qu'elle\" \"l'une\" 'lieues']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4O0gnsX8pNVU",
+        "outputId": "28807df5-3c6f-4b62-fe32-a8ae250ddb7b"
+      },
+      "source": [
+        "from keras.layers import BatchNormalization\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "model = tf.keras.Sequential()\n",
+        "\n",
+        "model.add(Embedding(nb_words,embedding_dim,input_length=max_len, weights=[embedding_matrix],trainable=False))\n",
+        "#model.add(Bidirectional(LSTM(100)))\n",
+        "model.add(Conv1D(64,5,activation='relu'))\n",
+        "model.add(MaxPooling1D(pool_size=(max_len - 5 + 1)))\n",
+        "model.add(Flatten())\n",
+        "model.add(Dense(numberOfClasses,activation='softmax'))\n",
+        "model.summary()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Model: \"sequential_2\"\n",
+            "_________________________________________________________________\n",
+            " Layer (type)                Output Shape              Param #   \n",
+            "=================================================================\n",
+            " embedding_2 (Embedding)     (None, 512, 300)          153600    \n",
+            "                                                                 \n",
+            " conv1d_2 (Conv1D)           (None, 508, 64)           96064     \n",
+            "                                                                 \n",
+            " max_pooling1d_2 (MaxPooling  (None, 1, 64)            0         \n",
+            " 1D)                                                             \n",
+            "                                                                 \n",
+            " flatten_2 (Flatten)         (None, 64)                0         \n",
+            "                                                                 \n",
+            " dense_2 (Dense)             (None, 38)                2470      \n",
+            "                                                                 \n",
+            "=================================================================\n",
+            "Total params: 252,134\n",
+            "Trainable params: 98,534\n",
+            "Non-trainable params: 153,600\n",
+            "_________________________________________________________________\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GcfMJl8f-cBA"
+      },
+      "source": [
+        "\n",
+        "#model = NN_withEmbeddings(longueur_dict, embedding_dim, max_len, numberOfClasses)\n",
+        "\n",
+        "model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
+        "#model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.AUC(multi_label=True)])"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "OTQTH5VDuA3I",
+        "outputId": "f01b4a29-6599-49b0-b1ed-52d241a68b19"
+      },
+      "source": [
+        "#model.fit(padded, np.array(y_train), epochs=epochs, batch_size = batch_size) \n",
+        "model.fit(word_seq_train, y_train, batch_size=256, epochs=epochs, validation_data=(word_seq_validation, y_validation), shuffle=True)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 2.4656 - accuracy: 0.3793 - val_loss: 2.1042 - val_accuracy: 0.4652\n",
+            "Epoch 2/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.9110 - accuracy: 0.5068 - val_loss: 1.8333 - val_accuracy: 0.5262\n",
+            "Epoch 3/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.6637 - accuracy: 0.5682 - val_loss: 1.6986 - val_accuracy: 0.5556\n",
+            "Epoch 4/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.5183 - accuracy: 0.6033 - val_loss: 1.6377 - val_accuracy: 0.5657\n",
+            "Epoch 5/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.4169 - accuracy: 0.6247 - val_loss: 1.5928 - val_accuracy: 0.5782\n",
+            "Epoch 6/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.3342 - accuracy: 0.6436 - val_loss: 1.5676 - val_accuracy: 0.5847\n",
+            "Epoch 7/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.2657 - accuracy: 0.6595 - val_loss: 1.5651 - val_accuracy: 0.5860\n",
+            "Epoch 8/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.2061 - accuracy: 0.6747 - val_loss: 1.5505 - val_accuracy: 0.5917\n",
+            "Epoch 9/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.1518 - accuracy: 0.6897 - val_loss: 1.5586 - val_accuracy: 0.5873\n",
+            "Epoch 10/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.1022 - accuracy: 0.7027 - val_loss: 1.5791 - val_accuracy: 0.5850\n",
+            "Epoch 11/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 1.0543 - accuracy: 0.7150 - val_loss: 1.5675 - val_accuracy: 0.5873\n",
+            "Epoch 12/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 1.0111 - accuracy: 0.7260 - val_loss: 1.5801 - val_accuracy: 0.5852\n",
+            "Epoch 13/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 0.9718 - accuracy: 0.7358 - val_loss: 1.5925 - val_accuracy: 0.5855\n",
+            "Epoch 14/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 0.9371 - accuracy: 0.7463 - val_loss: 1.5984 - val_accuracy: 0.5864\n",
+            "Epoch 15/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 0.9032 - accuracy: 0.7556 - val_loss: 1.6136 - val_accuracy: 0.5816\n",
+            "Epoch 16/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 0.8684 - accuracy: 0.7655 - val_loss: 1.6376 - val_accuracy: 0.5775\n",
+            "Epoch 17/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 0.8394 - accuracy: 0.7744 - val_loss: 1.6575 - val_accuracy: 0.5781\n",
+            "Epoch 18/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 0.8105 - accuracy: 0.7831 - val_loss: 1.6596 - val_accuracy: 0.5779\n",
+            "Epoch 19/20\n",
+            "120/120 [==============================] - 183s 2s/step - loss: 0.7826 - accuracy: 0.7910 - val_loss: 1.6774 - val_accuracy: 0.5741\n",
+            "Epoch 20/20\n",
+            "120/120 [==============================] - 184s 2s/step - loss: 0.7560 - accuracy: 0.7996 - val_loss: 1.6946 - val_accuracy: 0.5727\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<keras.callbacks.History at 0x7f6ca8a6d890>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 67
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Uw6YR76p_AF0"
+      },
+      "source": [
+        "## Saving models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ykTp9lyRaAma"
+      },
+      "source": [
+        "model.save(\"drive/MyDrive/Classification-EDdA/cnn_fasttext_s\"+str(maxOfInstancePerClass)+\".h5\")\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5J4xDoqRUSfS"
+      },
+      "source": [
+        "# save embeddings\n",
+        "\n",
+        "# saving embeddings index \n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HHlEtipG_Cp0"
+      },
+      "source": [
+        "## Loading models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fKt8ft1t_Cxx"
+      },
+      "source": [
+        "model = keras.models.load_model(\"drive/MyDrive/Classification-EDdA/cnn_fasttext_s\"+str(maxOfInstancePerClass)+\".h5\")\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zbS4poso-3k7"
+      },
+      "source": [
+        "## Evaluation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "G9pjdMdNW_KS"
+      },
+      "source": [
+        "predictions = model.predict(word_seq_validation)\n",
+        "predictions = np.argmax(predictions,axis=1)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "IHpVJ79IW_M0",
+        "outputId": "2e1657b3-04d1-42f1-ea8b-9bbcd4744108"
+      },
+      "source": [
+        "report = classification_report(predictions, y_validation, output_dict = True)\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.5726683109527725 {'precision': 0.6118028288513718, 'recall': 0.5726683109527725, 'f1-score': 0.5870482221489528, 'support': 10947}\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "9SKjWffUW_PC"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LpgkGq-fW_RN"
+      },
+      "source": [
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "y_test = encoder.fit_transform(df_test[columnClass])\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Q9eYqi5SW_Ta",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "31e45f20-583a-4ca6-eac8-21863f6fef5b"
+      },
+      "source": [
+        "raw_docs_test = df_test[columnText].tolist()\n",
+        "\n",
+        "print(\"pre-processing test data...\")\n",
+        "\n",
+        "stop_words = set(stopwords.words('french'))\n",
+        "\n",
+        "processed_docs_test = []\n",
+        "for doc in tqdm(raw_docs_test):\n",
+        "    tokens = word_tokenize(doc, language='french')\n",
+        "    filtered = [word for word in tokens if word not in stop_words]\n",
+        "    processed_docs_test.append(\" \".join(filtered))\n",
+        "#end for\n",
+        "\n",
+        "print(\"tokenizing input data...\")\n",
+        "#tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
+        "#tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
+        "word_seq_test = tokenizer.texts_to_sequences(processed_docs_test)\n",
+        "\n",
+        "#pad sequences\n",
+        "word_seq_test = sequence.pad_sequences(word_seq_test, maxlen=max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "pre-processing test data...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 13137/13137 [00:09<00:00, 1331.48it/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tokenizing input data...\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_WjpJN-Bqjeb"
+      },
+      "source": [
+        "predictions = model.predict(word_seq_test)\n",
+        "predictions = np.argmax(predictions,axis=1)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "zUwjL_dQqjgx",
+        "outputId": "912642ad-95eb-413a-d074-8d4881a57359"
+      },
+      "source": [
+        "report = classification_report(predictions, y_test, output_dict = True)\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.5698409073608891 {'precision': 0.6081680700148677, 'recall': 0.5698409073608891, 'f1-score': 0.5847417616022411, 'support': 13137}\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n",
+            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
+            "  _warn_prf(average, modifier, msg_start, len(result))\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ka6DcPe7qqvg",
+        "outputId": "0c8cfbe6-178d-4208-98ba-4ba688e32939"
+      },
+      "source": [
+        "from sklearn.metrics import confusion_matrix\n",
+        "\n",
+        "classesName = encoder.classes_\n",
+        "classes = [str(e) for e in encoder.transform(encoder.classes_)]\n",
+        "\n",
+        "precision = []\n",
+        "recall = []\n",
+        "f1 = []\n",
+        "support = []\n",
+        "dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+        "for c in classes:\n",
+        "  precision.append(report[c]['precision'])\n",
+        "  recall.append(report[c]['recall'])\n",
+        "  f1.append(report[c]['f1-score'])\n",
+        "  support.append(report[c]['support'])\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "\n",
+        "\n",
+        "cnf_matrix = confusion_matrix(y_test, predictions)\n",
+        "FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+        "FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+        "TP = np.diag(cnf_matrix)\n",
+        "TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+        "\n",
+        "dff['className'] = classesName\n",
+        "dff['precision'] = precision\n",
+        "dff['recall'] = recall\n",
+        "dff['f1-score'] = f1\n",
+        "dff['support'] = support\n",
+        "dff['FP'] = FP\n",
+        "dff['FN'] = FN\n",
+        "dff['TP'] = TP\n",
+        "dff['TN'] = TN\n",
+        "\n",
+        "print(\"test_cnn_s\"+str(maxOfInstancePerClass))\n",
+        "\n",
+        "print(weighted_avg)\n",
+        "print(accuracy)\n",
+        "print(dff)\n",
+        "\n",
+        "dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_test_cnn_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "test_cnn_s10000\n",
+            "{'precision': 0.6081680700148677, 'recall': 0.5698409073608891, 'f1-score': 0.5847417616022411, 'support': 13137}\n",
+            "0.5698409073608891\n",
+            "                                      className  precision  ...    TP     TN\n",
+            "0               Agriculture - Economie rustique   0.216535  ...    55  12636\n",
+            "1                                      Anatomie   0.459821  ...   103  12768\n",
+            "2                                     AntiquitÃ©   0.287975  ...    91  12710\n",
+            "3                                  Architecture   0.339623  ...   108  12722\n",
+            "4                               Arts et mÃ©tiers   0.015504  ...     2  12995\n",
+            "5                                    Beaux-arts   0.060000  ...     6  13018\n",
+            "6                       Belles-lettres - PoÃ©sie   0.127660  ...    30  12761\n",
+            "7                                        Blason   0.228571  ...    24  12993\n",
+            "8                                    CaractÃ¨res   0.037037  ...     1  13110\n",
+            "9                                        Chasse   0.221311  ...    27  12962\n",
+            "10                                       Chimie   0.160714  ...    18  12991\n",
+            "11                                     Commerce   0.443418  ...   192  12490\n",
+            "12                        Droit - Jurisprudence   0.762879  ...  1081  11263\n",
+            "13                          Economie domestique   0.000000  ...     0  13102\n",
+            "14                                    Grammaire   0.408929  ...   229  12254\n",
+            "15                                   GÃ©ographie   0.917312  ...  2607   9910\n",
+            "16                                     Histoire   0.405063  ...   288  11777\n",
+            "17                           Histoire naturelle   0.743292  ...   831  11661\n",
+            "18                                          Jeu   0.061538  ...     4  13067\n",
+            "19                                       Marine   0.590805  ...   257  12549\n",
+            "20                           MarÃ©chage - ManÃ¨ge   0.620690  ...    72  13001\n",
+            "21                                MathÃ©matiques   0.549669  ...    83  12903\n",
+            "22                                       Mesure   0.095238  ...     4  13087\n",
+            "23              Militaire (Art) - Guerre - Arme   0.476351  ...   141  12704\n",
+            "24                                  MinÃ©ralogie   0.000000  ...     0  13111\n",
+            "25                                      Monnaie   0.054795  ...     4  13051\n",
+            "26                                      Musique   0.287500  ...    46  12904\n",
+            "27                                    MÃ©dailles   0.000000  ...     0  13107\n",
+            "28                         MÃ©decine - Chirurgie   0.376218  ...   193  12149\n",
+            "29                                      MÃ©tiers   0.605634  ...   731  11047\n",
+            "30                                    Pharmacie   0.070423  ...     5  13045\n",
+            "31                                  Philosophie   0.071429  ...     8  12996\n",
+            "32  Physique - [Sciences physico-mathÃ©matiques]   0.378378  ...   112  12674\n",
+            "33                                    Politique   0.000000  ...     0  13110\n",
+            "34                                        PÃªche   0.170213  ...     8  13069\n",
+            "35                                     Religion   0.326371  ...   125  12488\n",
+            "36                                    Spectacle   0.000000  ...     0  13121\n",
+            "37                                 Superstition   0.000000  ...     0  13112\n",
+            "\n",
+            "[38 rows x 9 columns]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "BqJ1_hUUqqx5"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bhfuGNwIqrOQ"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NkL3MopyqrQk"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XLHl-pvzqjjI"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lLR_Xvi9qjlo"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8cGcLOFTqjoP"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vLGTnit_W_V8"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "R-3lBXjDD9wE"
+      },
+      "source": [
+        "def predict(data, max_len):\n",
+        "  \n",
+        "  pad_sequ_test, _ = prepare_sequence(data, max_len)\n",
+        "  pred_labels_ = model.predict(pad_sequ_test)\n",
+        "\n",
+        "  return np.argmax(pred_labels_,axis=1)\n",
+        "\n",
+        "\n",
+        "def eval(data, labels, max_len):\n",
+        "  \n",
+        "  pred_labels_ = predict(data, max_len)\n",
+        "  report = classification_report(pred_labels_, labels, output_dict = True)\n",
+        "\n",
+        "  accuracy = report['accuracy']\n",
+        "  weighted_avg = report['weighted avg']\n",
+        "  \n",
+        "  print(accuracy, weighted_avg)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6T3kAvKvExgc",
+        "outputId": "c6d4560e-fc64-4579-9adb-79c2e36d2386"
+      },
+      "source": [
+        "# evaluation sur le jeu de validation\n",
+        "eval(df_validation[columnText], y_validation, max_len)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+            "  return np.array(self.texts_to_sequences(texts))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.06925290207361841 {'precision': 0.09108131158125257, 'recall': 0.06925290207361841, 'f1-score': 0.06099084715237025, 'support': 10079}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pTDJA03_-8yu",
+        "outputId": "d8bcdf73-c4c3-4c88-b063-90bd1cad5122"
+      },
+      "source": [
+        "# evaluation sur le jeu de test\n",
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "#df_test = resample_classes(df_test, columnClass, maxOfInstancePerClass)\n",
+        "\n",
+        "y_test = df_test[columnClass]\n",
+        "encoder = preprocessing.LabelEncoder()\n",
+        "y_test = encoder.fit_transform(y_test)\n",
+        "\n",
+        "eval(df_test[columnText], y_test, max_len)\n"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+            "  return np.array(self.texts_to_sequences(texts))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "0.07231483595950369 {'precision': 0.081194635559303, 'recall': 0.07231483595950369, 'f1-score': 0.06322383877903374, 'support': 13137}\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/notebooks/EDdA_Classification_Generate_ConfusionMatrix.ipynb b/notebooks/EDdA_Classification_Generate_ConfusionMatrix.ipynb
new file mode 100644
index 0000000..14a33f7
--- /dev/null
+++ b/notebooks/EDdA_Classification_Generate_ConfusionMatrix.ipynb
@@ -0,0 +1,1181 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "EDdA-Classification_Generate_ConfusionMatrix.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "F-x2Ei_TdhSs"
+      },
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "validation_path = 'validation_set.tsv'\n",
+        "test_path =  'test_set.tsv'\n",
+        "\n",
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "minOfInstancePerClass = 0\n",
+        "maxOfInstancePerClass = 10000"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "En632UWohZBW"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WoNGyMbFdsh1",
+        "outputId": "c5542219-0412-4e16-9779-122d5f99a1e2"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1LXBuRs9kOOc",
+        "outputId": "1f5fe407-4a46-4b96-8124-1a0c334616df"
+      },
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import pickle\n",
+        "import matplotlib.pyplot as plt\n",
+        "from sklearn.metrics import plot_confusion_matrix\n",
+        "\n",
+        "from nltk.stem.snowball import SnowballStemmer\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.tokenize import word_tokenize\n",
+        "import nltk\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 4
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FNPXtQ19kbco"
+      },
+      "source": [
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "  #random numberOfInstances elements\n",
+        "  replace = False  # with replacement\n",
+        "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "    "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jHyc3VeFhrxs"
+      },
+      "source": [
+        "## Load data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "8-1HRF3Vhr3y",
+        "outputId": "bd5f5881-363f-41a9-ade7-33bbd1158adb"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "--2021-11-26 08:17:56--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 189925180 (181M) [text/tab-separated-values]\n",
+            "Saving to: â€˜training_set.tsvâ€™\n",
+            "\n",
+            "training_set.tsv    100%[===================>] 181.13M  31.9MB/s    in 6.3s    \n",
+            "\n",
+            "2021-11-26 08:18:02 (28.9 MB/s) - â€˜training_set.tsvâ€™ saved [189925180/189925180]\n",
+            "\n",
+            "--2021-11-26 08:18:03--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/validation_set.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 67474385 (64M) [text/tab-separated-values]\n",
+            "Saving to: â€˜validation_set.tsvâ€™\n",
+            "\n",
+            "validation_set.tsv  100%[===================>]  64.35M  24.4MB/s    in 2.6s    \n",
+            "\n",
+            "2021-11-26 08:18:06 (24.4 MB/s) - â€˜validation_set.tsvâ€™ saved [67474385/67474385]\n",
+            "\n",
+            "--2021-11-26 08:18:06--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 79961640 (76M) [text/tab-separated-values]\n",
+            "Saving to: â€˜test_set.tsvâ€™\n",
+            "\n",
+            "test_set.tsv        100%[===================>]  76.26M  25.5MB/s    in 3.0s    \n",
+            "\n",
+            "2021-11-26 08:18:09 (25.5 MB/s) - â€˜test_set.tsvâ€™ saved [79961640/79961640]\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "P_Psa_NhhyAA"
+      },
+      "source": [
+        "\n",
+        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
+        "df_test = resample_classes(df_test, columnClass, maxOfInstancePerClass)\n",
+        "#df_test.dropna(subset = ['content', 'contentWithoutClass', 'firstParagraph', 'ensemble_domaine_enccre', 'domaine_enccre', 'normClass'], inplace=True)\n",
+        "\n",
+        "\n",
+        "data_eval = df_test[columnText].tolist()\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AfsjFx1L_ddl"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iPQmgaSw_dnw"
+      },
+      "source": [
+        "## Test sur l'ensemble du corpus"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "I-BT_jRs74tI"
+      },
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/EDdA_dataframe_withContent.tsv"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0NrbzDu66-k3"
+      },
+      "source": [
+        "\n",
+        "## test sortie pour Katie avec la classification de tous les articles\n",
+        "df = pd.read_csv(\"EDdA_dataframe_withContent.tsv\", sep=\"\\t\")\n",
+        "df.dropna(subset = ['content', 'contentWithoutClass', 'firstParagraph', 'ensemble_domaine_enccre', 'domaine_enccre', 'normClass'], inplace=True)\n",
+        "\n",
+        "\n",
+        "data_eval = df[columnText].tolist()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k07oOrFyhPJ-"
+      },
+      "source": [
+        "## Load model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "SHCqMPk8iPZS"
+      },
+      "source": [
+        "classifier_name = \"sgd\"       # sgd | lr | rfc | svm | bayes | bert-base-multilingual | camembert-base\n",
+        "vectorizer_name = \"tf_idf\"    # bagofwords | tf_idf | doc2vec"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oJ2xKgoVSQFC"
+      },
+      "source": [
+        "# rÃ©cupÃ©rer les modÃ¨les depuis le serveur\n",
+        "\n",
+        "\n",
+        "# rÃ©cupÃ©rÃ©er les modÃ¨les depuis Google Drive\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xI_4exathQdd"
+      },
+      "source": [
+        "if classifier_name in [\"sgd\", \"lr\", \"rfc\", \"svm\", \"bayes\"]:\n",
+        "\n",
+        "  stop_words = set(stopwords.words('french'))\n",
+        "  stemmer = SnowballStemmer('french').stem\n",
+        "  def stem_tokenize(text):\n",
+        "    return [stemmer(i) for i in word_tokenize(text) if not i in stop_words]\n",
+        "\n",
+        "  vec_file_name = vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "  with open(\"drive/MyDrive/Classification-EDdA/\"+vec_file_name, 'rb') as file:\n",
+        "    vectorizer = pickle.load(file)\n",
+        "\n",
+        "  clf_file_name = classifier_name + '_' + vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".pkl\"\n",
+        "  with open(\"drive/MyDrive/Classification-EDdA/\"+clf_file_name, 'rb') as file:\n",
+        "    clf = pickle.load(file)\n",
+        "\n",
+        "  if vectorizer_name != 'doc2vec' :\n",
+        "    vec_data = vectorizer.transform(data_eval)\n",
+        "  else : \n",
+        "    tagged_test = [TaggedDocument(words=tokenize_fr_text(_d), tags = [str(i)]) for i, _d in enumerate(data_eval)]\n",
+        "    vec_data = np.array([vectorizer.infer_vector(tagged_test[i][0]) for i in range(len(tagged_test))])\n",
+        "\n",
+        "elif classifier_name in [\"bert-base-multilingual\", \"camembert-base\"]:\n",
+        "\n",
+        "  clf_file_name = \"drive/MyDrive/Classification-EDdA/model_\"+classifier_name + '_s' + str(maxOfInstancePerClass) +\".pt\"\n",
+        "\n",
+        "  model = torch.load(clf_file_name)\n",
+        "\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jJjCGPTFjC78",
+        "outputId": "099e267e-8f5e-4c85-ef8e-b6bb60104c8d"
+      },
+      "source": [
+        "df_test[columnClass]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0               Commerce\n",
+              "1                    NaN\n",
+              "2                 Marine\n",
+              "3             GÃ©ographie\n",
+              "4               Histoire\n",
+              "              ...       \n",
+              "15849         GÃ©ographie\n",
+              "15850                NaN\n",
+              "15851    Arts et mÃ©tiers\n",
+              "15852           Anatomie\n",
+              "15853                NaN\n",
+              "Name: ensemble_domaine_enccre, Length: 15854, dtype: object"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 853
+        },
+        "id": "o2J8mU_djDsm",
+        "outputId": "aa2784b6-623d-4605-cdfb-93e2b6adb3c1"
+      },
+      "source": [
+        "plot_confusion_matrix(clf, vec_data, df_test[columnClass], normalize=\"true\", include_values=False, xticks_rotation=\"vertical\", cmap=plt.cm.Blues)\n",
+        "name = classifier_name + '_' +vectorizer_name + '_s' + str(maxOfInstancePerClass) +\".png\"\n",
+        "\n",
+        "print(name)\n",
+        "pathSave = \"drive/MyDrive/Classification-EDdA/\" + name\n",
+        "plt.rcParams[\"figure.figsize\"] = (10,10)\n",
+        "plt.rcParams[\"font.size\"] = 10\n",
+        "\n",
+        "plt.savefig(pathSave, bbox_inches='tight')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function plot_confusion_matrix is deprecated; Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.\n",
+            "  warnings.warn(msg, category=FutureWarning)\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "sgd_tf_idf_s10000.png\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAL5CAYAAAAt27JEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzde7yd453//9c7O5GkEtQEdUgEpQ5BJDtOQRM07dRM0EYjVa3Shk5RbSkz/AzaDkbbIVQ1/ExoDYpqU1VxjEOKnI+KToXWUE1CS4pI9v58/7ivxW1l7cO9s9daOzvv5+OxHvu+r/tzX/d1r0OyrnWdFBGYmZmZmZmtix71LoCZmZmZma3/XLEwMzMzM7N15oqFmZmZmZmtM1cszMzMzMxsnbliYWZmZmZm68wVCzMzMzMzW2c9610AMzMzM7PurmGT7SPWvFWTa8Vby6ZFxCdqcrEcVyzMzMzMzKos1rxF7498pibXenv+DwfU5EJl3BXKzMzMzMzWmVsszMzMzMyqTqDu/Zt+9747MzMzMzOrCbdYmJmZmZlVmwCp3qWoKrdYmJmZmZnZOnOLhZmZmZlZLXiMhZmZmZmZWevcYmFmZmZmVgseY2FmZmZmZtY6t1iYmZmZmVWd17EwMzMzMzNrk1sszMzMzMxqwWMszMzMzMzMWucWCzMzMzOzahMeY2FmZmZmZtYWVyzMzMzMzGyduSuUmZmZmVnVyYO3zczMzMzM2uIWCzMzMzOzWvDgbTMzMzMzs9a5xcLMzMzMrBY8xsLMzMzMzKx1brEwMzMzM6s6eYyFmZmZmZlZW9xiYWZmZmZWbcJjLMzMzMzMzNriFgszMzMzs1rwGAszMzMzM7PWucXCzMzMzKzqPCuUmZmZmZlZm9xiYWZmZmZWCz08K5SZmZmZmVmrXLEwMzMzM7N15q5QZmZmZmbVJjx428zMzMzMrC1usTAzMzMzqwV58LaZmZmZmVmr3GJhZmZmZlZ1XiDPzMzMzMysTW6xMDMzMzOrBY+xMDMzMzMza51bLMzMzMzMasFjLMzMzMzMzFrnFgszMzMzs2qTPMbCzMzMzMysLW6xMDMzMzOrBY+xMDMzMzMza51bLMwMgIa+m0TPTbZqd/yQ7TatYmmsHqLK+RftWVykPNXutby6qdiz06uhe/ejtrY1RbH3TEMV+94X/WxvKO/eF154nuXLl9f2drv5GAtXLMwMgJ6bbMWHjv1Bu+NnfP+fq1gaq4co+EWoYDg9ehT7D3X1muZ2x/bqWd0G+GWvryoUv8UmvatUEltfvP7W6kLxm/TtVaWSQHNzsQ9r0c/q+mrkfo31LkK3465QdSbpKEkhadcOnn+RpMM7eO7zkgZI2kzSv3Qkj3WVyrBI0vz0mFSPclQiaRtJt9fwekMlfTK3P1bSObW6vpmZmdm6cItF/U0AHkt//73IiZIaIuL8TijDZsC/AFd34PpNnXD90RGxvBPy6VQR8RIwriPnShKgiGj/T64wFGgE7k7XnwpM7cj1zczMrKuRB29b9UjqBxwEnAQcm0vvIelqSU9Luk/S3ZLGpWPPS7pU0lzgGElTcsdGSPqtpAWSZkrqL+kESVfl8r5L0qiyolwC7JRaDC6TNErSXblzrpJ0QgvXHyPpcUlzJd2W7qkznpsPS7o/3ctcSTspc5mkxamVY3yKHSVpuqTb03N2U/pij6TDJM1L8ddL6p27j4vTPc+WNEzSNEl/kHRKihksaXHabkjXniVpoaSTK5R5sKRnJN0ILAYGSlqZOz5O0pS0fUy6jwWSHpG0EXARMD6VaXz+tZO0Q3qeF0n6TinfNl6r4ZIeljQn3dvWnfHamJmZmVXiikV9HQncExHPAiskDU/pnwIGA7sDxwMHlJ23IiKGRcQtpYT0xfRW4GsRsTdwOPBWO8txDvCHiBgaEWe1I35FRAwD7gfOAw5P+7OBb7TzmnkP5bpCfT2l3QT8MN3LgcDLZM/LUKB0f5flvizvA5xB9pztCIyU1AeYAoyPiD3JWui+krvuHyNiKPBoihsH7A9cWKGMJwF/i4gRwAjgy5J2qBC3M3B1ROwRES+0cs/nAx9P9zc2It5Jabem1+HWsvgrgB+l+3i5lXwBkNQLuBIYFxHDgeuB77Z1npmZmVVRaZG8aj/qxF2h6msC2RdGgFvS/hyyVozbUjeaP0t6qOy88i+dAB8BXo6IWQAR8TqAqvPmKl1/f7Iv8jPSdTYCHu9Afu/rCiWpP7BtRNwJEBFvp/SDgJtT96tXJD1M9iX/dWBmRLyY4uaTVczeAJamihvADcBXgcvTfqmb0SKgX0S8AbwhaZWkzcrKOAbYq9Q6BGxKVolYWhb3QkQ80Y57ngFMkfQz4OftiB8JfDpt/wS4tI34jwBDgPvSa9NAhQqJpInARICG/lu0oxhmZmZmlbliUSeSNgcOBfaUFGRf/EJSe1oM/l7gUmt4f8tUn044p3R9AfdFxISWMpI0EPhV2r0mIq5px/U7Ij9lSxPte2+XzmkuO7+5wvkCTouIaW3kWf7a5KfiePd5jIhTJO0HHAHMybVWtabStB4tvVYClkREeWvX+zOMmAxMBui91c7Vnm3UzMxswyU8xsKqZhzwk4jYPiIGR8RAsl+/Dyb7NfvTysZabAWMakd+zwBbSxoB2a/+knoCzwNDU14DgX0rnPsG0D+3/wKwu6Te6Zf7w1q45hNkXY4+nK65saRd8gER8afUtWdoeysVqeXgRUlHpXx7S/oAWZel8Wm8wxbAIcDMVrJ6BhhcKh9Zt7KH21OGCqYBX0ldjJC0i6SN23HeK5J2k9QDOLqUKGmniHgyDb5fBgxk7dchbwbvjcM5Lpfe0mv1DLCFpAPS9XpJ2qNdd2pmZmbWAW6xqJ8JrN2d5Y6U/lWyL4hPAX8C5gJ/ay2ziHgnDWa+UlJfsvEVh5N9IV2a8vpdyqv83BWSZqSByr+JiLNSF53F6dx5LVxzWRoofHNpUDTZmItnK8W34iFJpdmlFkbE58kqAT+WdBGwGjgGuJNsvMkCsl/vvxURf1YLU/VGxNuSvgjclipZs4COtphcR9a9am4aGL4MOKod550D3JXiZwOlwe2XSdqZ7PeLB9I9/RE4J3Xlurgsn68B/yPpbOCXpcSI+FOl1yq9H8YBkyRtSvZZvxxYUvC+zczMrFN0/1mhVHRBJKsNSf0iYqWkfyD7VX5kRPy53uWyrkHSyojolBm4SnpvtXMUWSDvGS+Q1+14gbyWeYE8K8oL5HV9I/drZM6c2TW72R6bbR+9Dz67Jtd6+66vzomImq8A6BaLruuu1LVlI+DbrlRYtQ3ZbtNCq2l/8Ijvtzv21buKTRZWpUkHNkjV/PGo2l8+GgrkX+0vTv/Qb6NC8UUUfY2Kfj6qnf+G4u3VxZZt6te763zF8kvahXTzF6PrvOvtfSJiVL3LYF1XZ7dWmJmZma2r7t3Ry6xKJB0lKVoa39HOPE6QtE0nlWcbSbd3Rl5mZmZWJepRm0eduGJh1jETgMfS3446AeiUikVEvBQR49qONDMzM6sOVyzMCpLUj2wRw5NIU8BKGiVpuqTbJT0t6aY0exSSzpc0S9JiSZOVGQc0AjelFcf7SjpM0jxJiyRdX5ppS9Lzki5OcbMlDZM0TdIfJJ2SYganWb1I0/Felq65UNLJdXiazMzMrFw3X3nbFQuz4o4E7kkreq/ILW63D3AG2WrkO5Ktlg1wVUSMiIghQF/gnyLidrLpZ4+LiKFk0+dOAcZHxJ5k45++krvmH1PcoyluHNnK5xdWKN9JwN8iYgTZyuRflrRDp9y5mZmZWQtcsTArbgJwS9q+hfe6Q82MiBcjohmYT7buBcBoSU9KWkS22nqlheo+AixNlRWAG8gWACyZmv4uAp6MiDciYhmwKs0eljcG+HxaD+NJ4B+AnSvdiKSJqRVk9rLly9q8cTMzM+sgqduPsfCsUGYFSNqcrHKwp6QAGshaG34N5Ce6bwJ6SuoDXA00psXsLgD6dODSpbyby67TzNqfYwGnRcS0tjKNiMnAZIDhwxu9qI2ZmZl1mFsszIoZB/wkIraPiMERMZBsxeuDW4gvVSKWp7EZ+QHWbwD90/YzwGBJH077xwMPd7CM04CvSOoFIGkXSRt3MC8zMzPrLB5jYWY5E4A7y9LuoIXZoSLir8C1wGKyL/yzcoenANekLksCvgjclrpMNQPXdLCM1wFPAXPTgO4f49ZJMzMzqzJ/2TArICJGV0ibBEwqSzs1t30ecF6F8+4gq5SUPEA2ALw8bnBuewpZhaT82HJgSEprBv4tPczMzMxqwhULMzMzM7MaUB27KdWCKxZm1iGv/fqb7Y7d8vgbC+X9yo3HF4rv7v9Qr4v1+bnp0aP9ZX9z1ZpCeTc1F5uroH/fXoXii6j2a7Q+vwe6kj69GupdBLMuz2MszLopST0lnVpaaM/MzMzqR2QV/Vo86sUVC7N1IOkoSSFp13bEniHpA7n9uyusQdHe6zZKmpS2R0k6sOy4gMuBhRGxqlIeZmZmZp3JFQuzdTMBeIwWZoUqcwbwbsUiIj6ZZo0qLCJmR8TpaXcUcGDZ8YiIUyPikY7kb2ZmZp1MNXzUiSsWZh2U1qU4CDgJODaljZI0XdLtkp6WdJMypwPbAA9JeijFPi9pQNo+V9Kzkh6TdLOkM1P6dEmNaXuApOdz17lL0mDgFODrkuZLOljSFpLukDQrPUbW8nkxMzOzDZMHb5t13JHAPRHxrKQVkoan9H2APYCXgBnAyIiYJOkbwOiIWJ7PJJ13LDCU7DM5F5jTngJExPOSrgFWRsT3Un7/A/xXRDwmaRDZ+hm7revNmpmZ2bqo7/iHWnDFwqzjJgBXpO1b0v5dwMyIeBEgLX43mKy7VEsOBu6MiDfTOVPXsVyHA7vn/vHaRFK/iFhZHihpIjARYOCgQet4WTMzM9uQuWJh1gGSNgcOBfaUFEADEMCvgfxg6SbW7XO2hve6LPZp5zk9gP0j4u22AiNiMjAZYPjwxmLzb5qZmVkh3b3FwmMszDpmHPCTiNg+IgZHxEBgKVnrQ0veAPpXSH8EOEpSX0n9gX/OHXseKHWxGtfOfO8FTivtSBra2o2YmZmZdQZXLMw6ZgJwZ1naHbQ+O9Rk4J7S4O2SiJgL3AosAH4DzMod/h7wFUnzgAEt5Psr4OjS4G3gdKBR0kJJT5EN7jYzM7M66+7rWLgrlFkHRMToCmmTgEllaafmtq8ErsztD85tfxf4LoCkC3LpTwN75bI8L6VPB6an7WfLYgDGF7kfMzMzs3XlioWZAdkAkabm9g+zaOjR/l9EXrnx+EJl2fJzNxaKX3bTFwrFW+eIKDYsp+ivaEXy792roVDeBd6+673mAp9rgB4Fn5wir1N371+e9/pbqwvFb9K3V5VKAgU/qmxAL1PNdffPgCsWZl1MRFxQ7zKYmZmZFeWKhZmZmZlZtdV5Vexa8OBt2+BJOkpSSNq14Hnvrpxdlj5W0jm5vHfvYLk2k/QvHTnXzMzMrNZcsTDLZnJ6jAozOkkq3KoXEVMj4pK0exTQoYoFsBlQuGIhqVhnczMzM7NO4IqFbdAk9QMOAk4Cjk1poyQ9mlbAfkpSg6TvSVqcpnA9LZfFaZLmSlpUavGQdIKkqyQdCIwFLktTwe6UHvdImpOuUTpnK0l3SlqQHgcClwA7pXMvS+W6K1f2qySdkLafl3SppLnAMZLGSHo8le22dJ9mZmZWJ6I2U816ulmz+jkSuCcinpW0QlJpMbphwJCIWCrpK8BgYGhErEmrbpcsj4hhqcvSmcCXSgci4repcnJXRNwOIOkB4JSI+L2k/YCryVbwngQ8HBFHpxaHfsA5qQxD07mj2riXFaksA4CfA4dHxN8lnQ18A7ioo0+SmZmZWVtcsbAN3QTgirR9S9q/C5gZEUtT+uHANRGxBiAiXs2d//P0dw7wqdYulFoNDgRuy/2a0Dv9PRT4fMq/CfibpA8WvJdb09/9ybpfzUjX2Qh4vIUyTQQmAgwcNKjg5czMzKwITzdr1k2llodDgT0lBdBAtpzDr4G/tzObVelvE21/nnoAfy21QHTAGt7ffbFP2fFSmQXcFxGtrQIOQERMJlsRnGHDGwvOdG5mZmb2Ho+xsA3ZOOAnEbF9RAyOiIHAUuDgsrj7gJNLA7nLukK15Q2gP0BEvA4slXRMykeS9k5xDwBfSekNkjbNn5u8AOwuqbekzYDDWrjmE8BISR9O+W0saZcCZTYzM7Mq6O5jLFyxsA3ZBODOsrQ7WHt2qOuAPwILJS0APlvgGrcAZ0maJ2kn4DjgpJTPErIxHgBfA0ZLWkTWrWr3iFhB1p1psaTLIuJPwM+AxenvvEoXjIhlwAnAzZIWknWDKjSVrpmZmVlR7gplG6yIGF0hbRLZQOp82hqywc/fKEsfnNueDYxK21OAKWl7BmtPN/uJCtd9hfcqGfn0z5btfwv4VoW4wWX7DwIjyuPMzMysfjzGwsw2CAIaerT/H7yI9g/JKPoP6bKbvlAo/oGnXykUf+hHtiwU393/Iyhpai42zKbIewCgZ0Ox57FIcYqWRT2KNdivaWouFN+zoet0COhR4HO9viv8Pijw2S6a95lTnyoUP3n83m0HdVC1B9BV83m39YsrFmZmZmZm1ab06Ma6zk8qZhs4SSMlHVLvcpiZmZl1hCsW1q1IOkpSlFa0biFms7SgXWdc7wRJ23RCPvsAXyS33oSkfyuL+e26XsfMzMzqx7NCma1fJgCPsfbMTgCkKWM3AzqlYkE2+9I6VywiYl5EfCkiVueS/60s5sD25pemsvXn28zMzGrGXzys20grWx8EnAQcm0sfJelRSVOBp4BLgJ0kzZd0maStJT2S9hdLKl/HAknDJT0saY6kaemccUAjcFM6t2/ZOdMl/Zek2ZJ+J2mEpJ9L+r2k7+TiPidpZsrjx2kdi0uAvintphS3MnfOWZJmSVoo6cKUNljSM5JuJJuSdqCkKemeFkn6eqc92WZmZlaIqE1rRT1bLDx427qTI4F7IuJZSSskDY+IOenYMGBIRCyVNDhtDwWQ9E1gWkR8V1ID8IF8ppJ6AVcCR0bEMknjge9GxImSTgXOTNPNVvJORDRK+hrwS2A48CrwB0n/BWwJjAdGRsRqSVcDx0XEOZJOrbRKt6QxwM7AvmTDwKYqG5vxx5T+hYh4QtJwYNuIGJLO26zwM2pmZmbWTq5YWHcyAbgibd+S9ksVi5kRsbSF82YB16cKxC8iYn7Z8Y8AQ4D70q8ADcDL7SzT1PR3EbAkIl4GkPQcMJCshWU4MCvl3Rf4Sxt5jkmP0gJ5/cgqFH8EXoiIJ1L6c8COkq4Efg3cW56RpInARICBgwa185bMzMysI+rZmlBO0ifIvjc1ANdFxCVlxwcBN5B1IW8AzomIu1vL0xUL6xYkbQ4cCuwpKcg+ACHprBTy95bOjYhH0i/+RwBTJP0gIm7MZ09WKTigA0Vblf4257ZL+z1T3jdExL8WyFPAxRHx4/clZi0x795nRLwmaW/g48ApwGeAE/PnRMRkYDLA8OGN1Z7q3MzMzLqA1EPjh8DHgBfJfuCcGhH5BVjOA34WET+StDtwNzC4tXw9xsK6i3HATyJi+4gYHBEDgaXAWuMlgDeA/qUdSdsDr0TEtcB1ZN2m8p4BtpB0QIrvJWmPSnl1wAPAOElbprw3T+UBWJ1aUcpNA05MY0qQtG3p/DxJA4AeEXEH2T8O5fdlZmZmG6Z9gf+NiOci4h2ynh5HlsUEsEna3hR4qa1M3WJh3cUE4NKytDtS+q35xIhYIWmGpMXAb8gGOp8laTWwEvh8Wfw7aaD2JEmbkn1uLgeWAFOAayS9BRwQEW8VKXREPCXpPOBeZbM4rQa+CrxA1pKwUNLciDgud869knYDHk9NqiuBzwFNZdlvC/y33psdqkiriJmZmXW2rtMTalvgT7n9F4H9ymIuIPt+chqwMXB4W5m6YmHdQkSMrpA2Kbc7vezYZ8vCb2gj//nAWovXpdaAO1o4Z1Rue3q+DGXHbqWs8pPSzwbOzu33y21fwXvjSfKG5GIW4FYKMzOzDdEASfmJZSan7s9FTACmRMT3U6+Nn0gaEhHNLZ3gioWZdUgUGJERRYKB5oLxh+26VaH4gRPXqse16k+TxxeKX1819Cj2U1pzi/+1dI4ipelKAyI3NF3pua9mWQr+s8QlR+xWnYIkzc3tL1DRz3ZRXek90KWpps/V8ohobOX4/5FNIlOyXUrLOwn4BEBEPC6pDzCAViaZ8RgLMzMzM7MNyyxgZ0k7SNqIbP2vqWUxfwQOA0hdsPsAy1rL1BULsxZIakoL1C2QNFdSu1e+rhdJZ0j6QNuRZmZmVmtdZYG8iFgDnEo2IczvyGZ/WiLpIkljU9g3gS9LWgDcDJwQbXRBcFcos5a9lVtE7+PAxcBH61uklqWp484Afgq8WefimJmZWReW1qS4uyzt/Nz2U8DIInm6xcKsfTYBXivtSDpL0ixJCyVdmEv/haQ5kpakxedK6Stz2+MkTUnbv5T0+bR9sqSbKl28tXwlfT/9mnAusA3wkKSHJDVImiJpsaRFkr7eac+GmZmZFdZVWiyqxS0WZi3rK2k+WZ/CrckW4EPSGLKVrvclG186VdIhEfEIcGJEvCqpL9liM3dExIpWrjERmCFpKVmT4/4txLWU78bAkxHxzVS2E4HREbFc0nBg24gYko5ttk7PhpmZmVkrXLEwa1m+K9QBwI2ShgBj0mNeiutHVtF4BDhd0tEpfWBKb7FiERGvSDofeAg4OiJebSG0pXybaGG6W+A5YEdJVwK/Bu4tD0itHxMBBg4a1FIxzczMbB2J+rYm1IIrFmbtkKZZGwBsQdZKcXFE/DgfI2kU2eIxB0TEm5Kmk7V2QLZ6ZUkf3m9PskrCNimfgcCv0rFrgKdbyfftiChfGK9U5tck7Q18HDgF+AxwYlnMZLKF+Bg+vLHgZIpmZmZm7/EYC7N2kLQr0EBWAZgGnCipXzq2raQtyZa7fy19+d+V93drekXSbspWwT46l+++wD8C+wBnStohIv4UEUPT45o28i33BtA/5T0A6JEW8TsPL5ZnZmZWX6rRo07cYmHWstIYC8g+pl9IrQP3KpvP+fHUpLkS+BxwD3CKpN8BzwBP5PI6B7iLbP7n2UA/Sb2Ba4EvRsRLkr4JXC/p0LLp3FrLt9xk4B5JL5HNEPXfqTID8K8dexrMzMzM2uaKhVkLIqKhlWNXAFdUOPSPLcTfDtxe4dDeuZiprL04DRGxqpV8+5XtXwlcmUtyK4WZmVlXUNuVt+vCXaHMzMzMzGyducXCzDqkR4/2/+rypxXF1uvbbvO+RYtTyJ8mjy8UP2bSY+2Ovff0g4oWp5A2Fj1dy/r861hzgXtd3VT0eSlWlp4F3u8Azc3tL0/RWRMaCpal2oq8J7va+7Ho56mID27cq2p5Q7HPB83F8i7y73tHFPl8VLsstdbVPgOdzS0WZusBSdtIOr7e5TAzMzNriSsWts4kNUmaL2mBpLmSDmzHOSvT38GSFndCGUZJuquNmKGSPll2Tptl7UySTpC0LD1fT0n6cjvO2Qz4AfBAG3FjJZ3TWWU1MzMzK8Jdoawz5BeS+zhwMfDR+hapoqFAI3B32h9FNqPTb8sDJfWMiDVVKsetEXFqmqJ2iaSpEfFKS8ER8Vfg2LYybWnwt5mZmXUN7gplVswmwGulHUlnSZolaaGkC1s7UVKDpMty8Sen9K0lPZJ+5V8s6eA28tlY0vWSZkqaJ+lISRsBFwHjUz5nky0a9/W0f7CkKZKukfQk8J+SdpJ0j6Q5kh5Na0gg6ZhUjgWSHunoExURfwH+AGwv6bBU1kWp7L3TtYZLejiVYZqkrVP66anFY6GkW1LaCZKuSttbSLojPZezJI3saDnNzMzM2sMtFtYZSus99AG2Bg4FkDQG2BnYl2wdiKmSDomIlr6MnwT8LSJGpC/WMyTdC3wKmBYR35XUAHygjfKcCzwYESembkQzgfuB84HGiDg1la8vsDIivpf2TwK2Aw6MiCZJDwCnRMTvJe0HXJ3u7Xzg4xHxfyn/DpG0I7Aj8CLwJHBYRDwr6UbgK5J+SDZ17JERsUzSeOC7ZKtnnwPsEBGrWijDFcB/RcRjkgaRLeq3W0fLamZmZp2gezdYuGJhnSLfFeoA4EZJQ4Ax6TEvxfUjq2i0VLEYA+wlaVza3zTFzyJbOK4X8IuImN/C+fl8xko6M+33AQa1815uS5WKfsCBwG25Zsve6e8MYIqknwE/b2e+eeMlHQSsAk4GtgCWRsSz6fgNwFfJKkNDgPtSGRqAl1PMQuAmSb8AflHhGocDu+fKvomkfhGxMh8kaSIwEWDgoPY+RWZmZmZrc8XCOlVEPC5pANmXZQEXR8SP23m6gNMiYtpaB6RDgCPIvtD/AHgD+Pd0+EsV8vl0RDxTlsd+7SjD39PfHsBfSxWmvIg4JeV1BDBH0vCIWJG7znfTMSqdTxpjkYvfu0JM6T6WRMQBFY4dARwC/DNwrqQ9y473APaPiLdbyLt0L5PJVutm+PDG6s27aGZmZh5jYVZEGofQAKwg635zYvr1H0nbpgHLLZlG1gWoV4rfJY2X2B54JSKuBa4DhkXEnRExND1mV8jnNKVPr6R9UvobQP9cXPn+uyLidWCppGNSHipVACTtFBFPRsT5wDJgYNm555bK1sq95j0DDJb04bR/PPBwSt8itQIhqZekPST1AAZGxEPA2WQtO/3K8rwXOK20I6m9ZTEzMzPrELdYWGcojbGA7Ff2L0REE3CvpN2Ax9N3/JXA54C/tJDPdcBgYG6qFCwDjiKbveksSatTHp9vozzfBi4HFqYv4UuBfwIeAs5JZb0Y+BVwu6QjyX0JzzkO+JGk84BewC3AAuAySTune30gpXVYRLwt6Ytk3a56knX9uiYi3kndwiZJ2pTs83o58Czw05QmYFJE/LXsV5DTgR9KWpjOe4RssLqZmZnVgaRu32Khaq46aWbrj+HDG2PGk+WNP52j2itvV/sfaq+8XVmR1XOh+Aq6a5rav1xwV1t5u0eBC3jl7fopUvaiX5eK3mrR56bI56PI+xE2nJW3R1deqhkAACAASURBVO7XyJw5s2v2puy91c7xofE/qMm1/njl2DkR0ViTi+W4xcLMqm7bD3atikJRRSoLHzzyykJ5v/bLSo1lLetqz001NRX48tF3o4ZCea9e0/4vZdC1vphVu0JX1Pr8nixS9qK3WfQ91qtnsQv0bFh/e7NX+z3Zla3Pn5f2WH/flWZmZmZm1mW4xcLMzMzMrAbcYmFmnUJSk7JVvhdImivpwJQ+WNLiepfPzMzMbF24xcKsdvILCX6cbGaqj9a3SGZmZlYz3bvBwi0WZnWyCfBaeWJqvXg0tWjkWzW2lvRIavFYLOnglD5B0qKUdmkun5WSvptaR56QtFXN7szMzMw2SK5YmNVO31QxeJpszY5vV4j5C/CxiBgGjAcmpfTPAtNSi8fewHxJ2wCXAocCQ4ERko5K8RsDT0TE3mRrWHy5WjdlZmZm7VNay6Laj3pxxcKsdt5KK3LvCnwCuFFrf/p7AddKWgTcBuye0mcBX5R0AbBnRLwBjACmR8SyiFgD3AQckuLfAe5K23PIFh5ci6SJkmZLmr1s+bJOuUkzMzPbMLliYVYHEfE4MADYouzQ14FXyFolGoGNUvwjZJWG/wOmSGpr9fHV8d7KT020MJ4qIiZHRGNENG4xoLwoZmZmZu3nioVZHUjaFWgAVpQd2hR4OSKageNTDJK2B16JiGvJulENA2YCH5U0QFIDMAF4uEa3YGZmZkWo+3eF8qxQZrXTV9L8tC3gCxHRVPYPwNXAHalF4h7g7yl9FHCWpNXASuDzEfGypHOAh1J+v46IX9bgPszMzMzW4oqFWY1EREML6c8DQ9L274G9cofPTuk3ADdUOPdm4OYK6f1y27cDt69D0c3MzGwdCejm6+O5YmFmmeaAt1c3tTu+T6+K9aSKevTo5v+S5rz2y9MKxZ/682JrI171qSGF4qup2q9rtB3yriLv3Y5oaipSGujTo/2fj6KKPu9Fn5sin+2i+RfNe3328l/fLhQ/aMAHqlSS6r8HivJ7pvtyxcLMzMzMrOrqO/6hFjx426yLkvSZNGjbzMzMrMtzxcKsBZI+JOkWSX+QNEfS3ZJ2qdK1hkr6ZFnyg8AlkjwPrJmZWTcg1eZRL65YmFWQFq67k2wBup0iYjjwr8BW7TlXUtHP1lDgfRWLiFgeERMiouLKdZLcldHMzMy6DFcszCobTbbI3DWlhIhYAMyT9ICkuZIWSToSQNJgSc9IuhFYDAyU9KO0qvUSSReW8pE0QtJvJS2QNFPSpsBFwHhJ8yWNl7SxpOslzUppR6VzT5A0VdKDwAO5uJmS5uXKs0dKmy9poaSda/bMmZmZWUVex8JswzQEmFMh/W3g6Ih4XdIA4AlJU9OxncnWpngCQNK5EfFqWrzuAUl7AU8DtwLjI2KWpE2AN4HzgcaIODWd+x/AQxFxoqQPAjMl3ZeuMwzYK+X9H8CDKW6zFHc/cApwRUTcJGkj0kJ7ZmZmZtXiioVZMQL+Q9IhQDOwLe91j3qhVKlIPiNpItnnbGtgd7IZNF+OiFkAEfE6UOnXhTFkq2qflPabgIFp+76IeDUXN1bSmWm/DzAIeBw4V9J2wM/T+hhr30xWvokA2w0c1O4nwczMzAqq8/iHWnDFwqyyJcC4CunHAVsAwyNitaTnyb7Mw3urZCNpB+BMYEREvCZpSi6uPQScFBFPvy9R2j9/nRT36Yh4puz830l6EjgCuFvSyRHxYPlFImIyMBlgn2GNxSbqNzMzM8vxGAuzyh4Eeqdf9AFIXZm2B/6SKhWj034lm5BVAP4maSvgH1P6M8DWkkakPPunQdhvAP1z508DTk2DyJE0vIXrTANOy8Xtk/7uCDwXEZOAX/L+1bzNzMysxkS2wGUtHvXiioVZBRERwNHA4Wm62SXAxcDdQKOkRcDnycZMVDp/ATAvHf8fYEZKfwcYD1wpaQFwH1lLxkPA7qXB28C3gV7AwnTtC9e+ClSI+3ZK/wywWNJ8svEiN3b4yTAzMzNrB3eFMmtBRLxE9gW93AEtnDKk7PwTWsh3FrB/hUMjyvZPrnDuFGBKbv+tFuIuAS5poZxmZmZWBx5jYWYbBAk2anAjZq1d9akhbQflfHDspELxr009vVB8EavXNBeK79Wz2PurT6/qTWbW3FxsSFFzdJ0hSGuaij3v1Xwea5H/+mqrTXvXuwjv6mqvUVcrj3Uef4swMzMzM7N15hYLsyqT9CHgcrKuTn8FXgF+AYyNiH+qZ9nMzMysduq5eF0tuMXCrIrSbE13AtMjYqeIGA78K++tfWFmZmbWLbhiYVZdo4HVEXFNKSHNGPUo0E/S7ZKelnRTbsrY8yXNkrRY0uRc+umSnpK0UNItKe2jaSap+ZLmSeqf0s9KeSyU1NKMUmZmZlYraYG8WjzqxRULs+oaAsxp4dg+wBlkK3LvCIxM6VdFxIiIGAL0BUrdpc4B9omIvYBTUtqZwFcjYihwMPCWpDHAzsC+wFBgeFop3MzMzKxqXLEwq5+ZEfFiRDQD84HBKX20pCfTWhmHAnuk9IXATZI+B6xJaTOAH0g6HdgsItYAY9JjHjAX2JWsorEWSRMlzZY0e/nyZZ1/h2ZmZgZkC+RJqsmjXlyxMKuuJUBLq2avym03AT0l9QGuBsZFxJ7AtWQL6AEcAfwQGAbMktQzrVfxJbKWjRmSdiX7t+viiBiaHh+OiP+/UgEiYnJENEZE44ABW6zjrZqZmdmGzBULs+p6EOgtaWIpQdJeZN2WKilVIpZL6geMS+f0AAZGxEPA2cCmZGM0doqIRRFxKTCLrHViGnBiOh9J20rasgr3ZmZmZu1Wm9aKerZYeLpZsyqKiJB0NHC5pLOBt4HnyaabrRT/V0nXAouBP5NVFgAagJ9K2pSsRWJSiv22pNFAM1nryG8iYpWk3YDH0z8uK4HPAX+p1n2amZmZuWJhVmUR8RLwmQqHrs3FnJrbPg84r0L8QRXyPq2Fa14BXFG4sGZmZlY13XwZC3eFMjMzMzOzdecWCzMDsv5VPXp0859SkubmKBTflZ6X16aeXih+/+880O7YJ847rFDeDVV+Xoq8TkVfo6Lxb77dVCi+X0P7f7cr+n7sWSDvWqjm69SVPqsvvfZWofhNP9CrUHzvQtHFn5siutK/ed2NV942MzMzMzNrgysWZjUi6UOSbpH0B0lzJN2d1pG4q4X46yTtXiD/RkmTOq/EZmZm1mk2gJW33RXKrAaUtX3eCdwQEcemtL2BsS2dExFfKnKNiJgNzF6XcpqZmZl1lFsszGpjNLA6Iq4pJUTEAuBRsvUobpf0tKSbUiUESdMlNabtlZIuk7RE0v2S9k3Hn5M0NsWMKrV+SNpY0vWSZkqaJ+nImt+xmZmZvcsrb5tZZxkCzGnh2D7AGcDuwI7AyAoxGwMPRsQewBvAd4CPAUcDF1WIPzfF70tWqblM0sbrdAdmZmZmrXDFwqz+ZkbEixHRDMwHBleIeQe4J20vAh6OiNVpu1L8GOAcSfOB6WQreg8qD0pjPGZLmr1s+bJ1vQ8zMzPbgHmMhVltLAHGtXBsVW67icqfy9URUZpbsLl0TkQ0S6oUL+DTEfFMa4WKiMnAZIDhwxurN3ehmZmZeYE8M+sUDwK9JU0sJUjaCzi4StebBpyWG6+xT5WuY2ZmZga4YmFWE6m14Wjg8DTd7BLgYuDPVbrkt4FewMJ0rW9X6TpmZmbWTt198La7QpnVSES8BHymwqFrczGn5rZH5bb75bYvKMu3X/o7nWw8BRHxFnByZ5TbzMzMrD1csTCzDU6PHt28k2vOE+cd1u7YLY+/sVDeL/735wrFb1TweS8y6Oe9IUjV0Xejhqrlvb73ud5QPk/bfLBvofjX31pdpZJkNpTnvbtZ3z/vbXFXKDMzMzMzW2dusTAzMzMzqzZR1/EPteAWC7NWSPqQpFvSgOs5ku6WtEu9y2VmZmbW1bjFwqwFaarWO4EbIuLYlLY3sBXwbD3LViKpISKa6l0OMzMza53wGAuzDdlosoXpriklRMQC4DFJl0laLGmRpPEAkkZJeljSLyU9J+kSScdJmpnidkpxUyT9SNITKW6UpOsl/U7SlNK1JI2R9LikuZJuk9QvpT8v6VJJc4FjJH0ixSyQ9ECK2TjlOVPSPElH1u5pMzMzsw2RWyzMWjYEmFMh/VPAUGBvYAAwS9Ij6djewG7Aq8BzwHURsa+krwGnAWekuA8CBwBjganASOBLKa+hwIvAecDhEfF3SWcD3wAuSueviIhhkrYA5gKHRMRSSZun4+cCD0bEiZI2A2ZKuj8i/t4Jz4uZmZkVVt81JmrBFQuz4g4Cbk5dkF6R9DAwAngdmBURLwNI+gNwbzpnEVkLSMmvIiIkLQJeiYhF6ZwlwGBgO2B3YEb6R2gj4PHc+bemv/sDj0TEUoCIeDWljwHGSjoz7fcBBgG/y99IWgl8IsDAQYM69GSYmZmZgSsWZq1ZAowreM6q3HZzbr+Z93/eVlWIycc1AfdFxIQWrtNWy4OAT0fEM60FRcRkYDLA8OGN1V0IwMzMbAPXzRssPMbCrBUPAr3Tr/oASNoL+CswXlJD6op0CDCzk6/9BDBS0ofTdTduYTaqJ4BDJO2Q4kpdoaYBp6UB6Ejap5PLZ2ZmZvY+brEwa0HqqnQ0cHka4/A28DzZOIl+wAKyxYG/FRF/lrRrJ157maQTgJsl9U7J51E2G1WKmwj8XFIP4C/Ax4BvA5cDC1P6UuCfOqt8ZmZmVpzHWJhtwCLiJeAzFQ6dlR752OnA9Nz+qErHIuKEXPrzZIPEqXDsQbKxG+VlGly2/xvgN2VpbwEnV7onMzMzs2pwxcLMzAB45cbjC8VvftC3CsW/NuOyQvENPar3y15EsSFF1SxLd/8Fc130qOLzXm39+/grlm14/K43MzMzM6s2efC2mZmZmZlZm1yxsKqQ1CRpvqQlaUXob6ZBxEXyGCvpnLR9lKTdO1CO6ZIaO3rdWpG0spbXMzMzs9oSWdfHWjzqxV2hrFreioihAJK2BP4H2AT493yQpJ4RsaZSBhExlWxVaoCjgLuAp6pW4vfKk79uR/MRoIho7pySmZmZmXVtbrGwqouIv5Ct7nyqMidImirpQeABSZtL+oWkhZKeSGtFkOKuknQgMBa4LLWC7NSRcuRbBSSNkzQlbU+RdI2kJ4H/LF03HTtG0uLU6vJIrly/TK0hv5f07yl9sKRnJN0ILAYGtnLNHSQ9LmmRpO+UlfMsSbPS83FhLu/fSbo2tQLdK6lvOvZhSfenMs4tPT+V8jEzM7P66e4tFq5YWE1ExHNAA7BlShoGjIuIjwIXAvMiYi/g34Aby879LVkLwlkRMTQi/lCFIm4HHBgR3yhLPx/4eETsTVa5KdkX+DSwF3BMrrvVzsDVEbFHRLzQyvWuAH4UEXsCL5cSJY1JeewLDAWGSzokl/cPI2IPskX6Pp3Sb0rpewMHAi+3kc+7JE2UNFvS7GXLl7VSXDMzM7PWuWJh9XJfRLyatg8CfgLvrt3wD5I2qXF5bouIpgrpM4Apkr5MVjEquS8iVqT1In5Odg8AL0TEE+243kjg5rT9k1z6mPSYB8wFdiWrIAAsjYj5aXsOMFhSf2DbiLgTICLejog328jnXRExOSIaI6JxiwFbtKPYZmZm1lFSbR714jEWVhOSdgSayFaGBvh7J+XbQPYlG2BqRJzfSnh+4vo+ZccqliciTpG0H3AEMEfS8Ap55ffL82ntmpUm0hdwcUT8+H2J0mBgVS6pCehbqcyt5WNmZmZWLW6xsKqTtAVwDXBVVF6V6lHguBQ7ClgeEa+XxbwB9C8/MSKaUveooW1UKgBekbSbstmpjm5n2XeKiCdT3suAgenQx9LYkL5kA8tnFLzmDODYtH1cLn0acKKkfun626bB7xVFxBvAi5KOSvG9JX2gaD5mZmZWfR5jYdYxfZWmmwXuB+4lG0tRyQVkYwAWApcAX6gQcwtwlqR5KjZ4uyfv/dJ/DtnMUr8lN66hDZelAdaL03kLUvpM4A5gIXBHRMxu4fyWrvk14KuSFgHblhIj4l6yGbQeT8dup0KFqszxwOnp+fst8KEO5mNmZmbWYar8A7LZ+k9Sb+B/gSER8bdOzPcEoDEiTu2sPLuC4cMbY8aTLdWPbENQ9P+DzQ/6VqH412ZcVii+moreaz1/AbT1k99jXd/I/RqZM2d2zZ74/oN2jcZvXl+Ta00/Y+SciCi0jldn8BgL65bSLE0/IZuhqdMqFbZhWrW60rj+ynr3amg7qIsq+sWmaEVh7I/bM6/Be276/PC2g5L+fXsVyrvovRZ5D0Cx90E187bOU/R1emH5m4Xid9najcq2/nPFwrql1DVptyrlPQWYUo28zczMrHsS9R3/UAseY2EdIqkpjaEoPc6pd5lKJDVKmrQO50+RNK4zy9TO656RBl6X9u+WtFmty2FmZmbWEW6xsI56KyKG1rsQlaTWivVxsMAZwE+BNwEi4pP1LY6ZmZl1pm7eYOEWC+tckkZI+q2kBZJmSuovqY+k/06zK82TNDrFniDp55LukfR7Sf+Zy2dCaTYmSZfm0ldKukzSEkn3S9pX0nRJz0kam2JGSborbW8s6fpUlnmSjqxQZkm6StIzku7nvdXBkXRYOm9Ryqd3Sn9e0sWptWa2pGGSpkn6g6RTcuefJWmWpIWSLsyV6dfpOVosabyk04FtgIckPZS7xoC0fa6kZyU9JulmSWem9OlpPAmSBkh6Pm03pOepdO2TO+P1NTMzM2uJKxbWUaXpZEuP8ZI2Am4FvhYRewOHA28BXwUiIvYEJgA3SCotFjcUGA/sCYyXNFDSNsClwKHp+AildRqAjYEHI2IPsrUtvgN8jGyNiIsqlPPcFL8vMJps+tiNy2KOBj4C7A58HjgQIJVxCjA+lb0n8JXceX9MrTaPprhxwP6kaXUljSFb7XrfdB/DJR0CfAJ4KSL2joghwD0RMQl4CRgdEaPzhVO2KN+xKY9PAiMq3Ge5k4C/RcSIFP9lSTu04zwzMzOzDnFXKOuotbpCSdoTeDkiZgGUFrmTdBBwZUp7WtILwC7ptAdKszZJegrYHvgHYHpELEvpNwGHAL8A3gHuSecuAlZFxOq0VsPgCuUcA4wt/cJPtvr1IOB3uZhDgJsjogl4SdKDKf0jwNKIeDbt30BWSbo87U/NlaNfWqzuDUmr0tiIMekxL8X1I6toPAp8P7XE3BURj1Yod97BwJ0R8WZ6Pqa2EV+6771yY0U2Tddemg+SNBGYCDBw0KB2ZGtmZmYd1aOb94VyxcLqbVVuu4m235Orc6t3N5fOj4hmSZXOFfDpiHhmnUu6tlLZm3n/fTST3YeAiyPix2sVShpG1vrwHUkPRESl1pb2WMN7LY99cukCTouIaa2dHBGTgcmQrWPRwTKYmZmZuSuUdapngK0ljQBI4yt6kv1Cf1xK24WsxaC1L/ozgY+mMQMNZN2nHu5gmaYBp0nZTwSS9qkQ8whZN6wGSVuTdZkq3c9gSR9O+8cXLMc04ERJ/dK1t5W0Zerq9WZE/BS4DBiW4t+g8urYjwBHSeorqT/wz7ljzwOlyf7zM1lNA74iqVe69i4VuoCZmZlZDUm1edSLWyyso/pKmp/bvycizpE0HrhSUl+y8RWHA1cDP0rdldYAJ0TEKrXwzo+Il5VNX/sQ2S/vv46IX3awnN8m67q0UFIPsq5A/1QWcyfZeI6ngD8Cj6dyvC3pi8BtqYI0C7imvReOiHsl7QY8nu51JfA54MNkYz2agdW8N25jMnCPpJfy4ywiYq6kW4EFwF9SOUq+B/wsdWn6dS79OrKuYXNTpWoZcBRmZmZmVaKiS86bWX1JugBYGRHf68x8hw9vjBlPro+z9FbfhrLydrV1pZW3i/LK2+aVt7ufkfs1MmfO7Jr9vr/p9rvF/mdPqcm17v3q/nMiorG1GEmfAK4AGoDrIuKSCjGfAS4AAlgQEZ9tLU+3WJiZmZmZbUBSV/Mfks2s+SIwS9LUiHgqF7Mz8K/AyIh4TdKWlXN7jysWZuuZiLig3mXY0PRs2DCGozU3F2vBfvXv7xSKn3ry/oXi9/n/Wp174H3mXDimUN6r1jQXiu/VUL0fNavdAlG0Z0JL3VTXB9W816L/Duy4ZXWHtRW51672mq5pav/nr7v9+9uj67wU+wL/GxHPAUi6BTiSrFt4yZeBH0bEawAR8Ze2Mu1er5aZmZmZmbVlW+BPuf0XU1reLsAukmZIeiJ1nWqVWyzMykjaCvgvssXuXiNbO+M/I+LOuhasjKSLgEci4v56l8XMzMzaVsPWowGS8gMnJ6cp5ovoSbYG1ihgO+ARSXtGxF9bO8HMkjSD0i+AG0oDlCRtD4wti+sZEWvqUMR3RcT5ldIlNaTF/szMzGzDtLyNwdv/BwzM7W+X0vJeBJ6MiNXAUknPklU0ZtECd4Uye79DgXci4t1pZSPihYi4UtIJkqamlbkfkNRP0gOS5kpaJOlIAEmDJT0taYqkZyXdJOnw1JT4e0n7prgLJN0g6VFJL0j6lKT/THndk1uD4nxJsyQtljQ5tybHlNLK2pKel3SppLnAMZLGSHo8le220loaZmZmVj9daB2LWcDOknaQtBFwLDC1LOYXZK0VSBpA1jXqudYydcXC7P32AOa2cnwYMC4iPgq8DRwdEcPIFtX7fulLP9laFd8Hdk2PzwIHAWcC/5bLbyeyysxY4KfAQxGxJ9kaIEekmKsiYkREDAH6svY6HCUrUlnuB84DDk/7s4FvtPP+zczMrJtLvS5OJVtQ93fAzyJiiaSLJJV6aUwDVkh6imxtsbMiYkVr+borlFkrJP2QrELwDtm0bPdFxKulw8B/SDoEaCYb9LRVOrY0IhalPJYAD0REpEUCB+cu8ZuIWJ3SG4B7Uno+brSkbwEfADYHlgC/qlDcW9Pf/YHdgRmpnrMRadG/Cvc3Efh/7N15nFxVnf//17uTkISEffuCQwgiewgh3UEhikEhLjigsoQA40TUgOOg4wx+xYHBADqo4E9BZYlOJijKJsJEyJeAQBQhhOwbi44sgqAECEhiCCH9+f1xT5Gborq7bndV9ZL3M4969L3nnnvu51ZXderU2SYD7D5sWEdPh5mZmXWSANFzpoWKiJnAzLK083PbQfbFZNVfTrpiYbapFcDxpZ2I+Fxq/isNgFqTy3sqsBPQnCoHTwKD0rF1uXytuf1WNn3frUvXaZW0PjbOH9gK9Jc0iGzl8paIeDotjjeIykqxiawCNLGjm00DuaZCtkBeR/nNzMzM2uKuUGabugcYJOmzubQt28i7DfB8qlQcCexRh3hKlYgX0jiJE6o450FgrKR3AEgaImmfOsRmZmZmBTSpMY/u4hYLs5zUXemjwHdS96OVZC0BXyYb35D3U+CXqRvTfODROsTzsqQfAsuBP9POTAy5c1ZKmgRcJ2lgSj4P+F2t4zMzMzMrccXCrExEPEc2O0Il03P5XgAOayPfiFy+SbntJ0vHylfQjoihue0pue3zyCoG5XHmyx1eduweYEwbsZmZmZnVnCsWZmYd6Ned7coN1FTwPnfcamDHmbpg0UUfqDrvdmP+uVDZq+Z9v2g4vVYDF+TqdvW81+J/B+r7vPfm32v/fptpT3ypV//eqrGZ/mbNzMzMzKyWXLEwK0DSKZI8L6uZmZkV1oMWyKsLVyzMEkm7SPqZpMclLUgrV38sd/xTwM4R8cdujHGSpIp9OCTNlLRto2MyMzMzA4+xMAMgrZh9K3BNRJyS0vYgWxEbgIj4rxpfs19EbKhVeRHx4VqVZWZmZrUloMljLMw2C+8DXo+Iq0oJEfFURHxPUj9Jl0iaJ2mppDMgq4yk9OWSlkmakNKbJF0h6VFJd6WWhBPSsSclfVPSQuBESZ9J5S6RdLOkLVO+6ZKukjRf0u8kfSQX626S7pD0e0nfKiWmsndM26dJekjSYklXS+pX92fQzMzMNmtusTDLHAgsbOPYp4BXImJMWhfifkl3AqOBUcDBwI7APEm/AcYCw4EDgJ2BR4BpufJejIjRAJJ2iIgfpu2vpWt9L+UbDhwK7AXcW1rwLl3zELJVux+T9L2IeLpUuKT9gQnA2LR43xVkq4T/uDNPjJmZmdVGH2+wcMXCrBJJPwDeDbwOPAWMLLU6kK24vXc6fl3qzvQXSb8mWzvi3cBNEdEK/FnSvWXF35DbHpEqFNsCQ4FZuWM3pjJ+L+lxYL+UfndEvJLifJhsxe+nc+e9H2gmq+hAtrDf823c52RgMsDuwzwm3czMzDrPFQuzzArg+NJORHwudSuaD/wROCsi8h/6kfShTl5rTW57OvDRiFiSVsselzsWZeeV9tfl0jbw1vexyMaKfKWjQCJiKjAVoLm5pfx6ZmZmVkNex8Js83APMEjSZ3NpW6afs4DPShoAIGkfSUOA+4AJaQzGTsARwEPA/cDxaazFLmxaWSi3FfBcKvvUsmMnpjL2At4OPFblvdwNnCBp5xTv9mkgupmZmVnduMXCDIiIkPRR4DuS/i+wkqxl4cvATWTjHRam2aNWAh8FbgEOA5aQtSb834j4s6SbybojPUzWRWkh8Eobl/4PYG4qcy5ZRaPkj2QVla2BMyPitWq+6YiIhyWdB9wpqQlYD3yOrEuXmZmZdYPuXmOiEVyxMEsi4jng5DYO/3t6lPtSeuTLaZV0dkSslrQDWeVgWTo2vCzvlcCVbVzzVxFxZln+6WTdp0r7H8ltD89t38CmYznMzMzM6soVC7P6uC0tVrcFcFFE/Lm7A+pO69YXW65ji/7Femn29T6rjdLaWmyYzV/Xri+Uf9shWxTKv6FAPM/df1mhsg8+945C+RdcOL5Q/v79qn8NRxR73jen13tPem56UixF9bTY17/RWnXeAQX/P+jp+vo6Fq5YmNVBRIzr4vmTahOJmZmZWWO4YmFmZmZm1gB9u73CPngBlAAAIABJREFUs0KZ1Y2k1WX7kyR9P22fKekT7Zw7TtLhVVyj3XLMzMzMGsUtFmbdICKu6iDLOGA18EBnypHUPyLe6Fx0ZmZmVg89aexNPbjFwqwbSJoi6ey0/XlJD0taKul6ScOBM4EvSlos6T2Shku6J+W5W9KwCuXMlvRdSfOBL0hqlvRrSQskzZK0azfdrpmZmW0G3GJhVj+DJS3O7W8PzKiQ7xxgz4hYJ2nbiHhZ0lXA6oi4FEDSL8lW075G0unA5WRraZTbIiJa0oJ7vwaOi4iVkiYAXwdOz2eWNBmYDLD7sGFdu1szMzPbrLliYVY/ayNiVGlH0iSgpUK+pcBPJd0K3NpGWYcBH0/bPwG+1Ua+0toV+wIjgLtSs2s/4LnyzBExFZgK0NzcUmw+QjMzM6uagKa+3RPKFQuzHuAY4Ajg74FzJR3UhbLWpJ8CVkTEYV0NzszMzKwaHmNh1o0kNQG7R8S9wJeBbYChwKvAVrmsD7BxVfBTgfs6KPoxYCdJh6XrDJB0YC1jNzMzswIk1KBHd3HFwqx79QOulbQMWARcHhEvA78EPlYavA2cBXxS0lLgH4AvtFdoRLwOnAB8U9ISYDHQ4fS1ZmZmZp3lrlBmdRIRQ8v2pwPT0/aU3KF3Vzj3d8DIsuT3Vcg3Jbc9ruzYYrIuVmZmZtYD9PHZZl2xMLNMABHVj98u0tQ6cEC/TkRktVDkd9pUcFThtkO2KBpOIf0KxNM0oFgD/JKvf7BQ/u0O/7dC+Vc98O2q8/b1ee27oic9Nz0plqJ6WuwD+rvDTF/VZsVC0vfIPmtUFBGfr0tEZmZmZmZ9UE+r5NVaey0W8xsWhVkFklbnuxOVpmuNiH+WdCbwt4j4cRvnjgNej4h2V67uqJxak7QtcEpEXFHDMmcDZ0fEfElPkj1HL9SqfDMzM7NqtFmxiIhr8vuStoyIv9U/JLOORcRVHWQZB6wmm02pcDmS+kfEG52Lrl3bAv8EFKpYSOoXERvqEI+ZmZk1wOawjkWHndwkHSbpYeDRtH+wpJp922rWGZKmSDo7bX9e0sOSlkq6XtJw4Ezgi6VZlSQNl3RPynO3pGEVypkt6buS5gNfkNQs6deSFkiaJWnXCnFMl3S5pAckPS7phJQ+NF1noaRlko5Lp3wD2CvFdYmkcZJuy5X3/dQyg6QnJX1T0kLgREnjJc1JZd4kaWh5PGWxnSbpoXStqyV5oIOZmZnVTTWDt78LfACYARARSyR5phlrhMGSFuf2tye9DsucA+wZEeskbRsRL0u6ClgdEZcCSPolcE1EXCPpdOBy4KMVytoiIlokDQB+DRwXESslTQC+Dpxe4ZxdyWZ22i/F93PgNeBjEfFXSTsCD0qakWIdUVqRO3XZas+LETE6lfEL4KiIWCPpy8C/AhdWOknS/sAEYGxErE9fBpwKNKTLl5mZmb3V5jzG4k0R8XTZE+EuGdYIa0sfwGHjGIsK+ZYCP5V0K3BrG2UdBnw8bf8E+FYb+W5IP/cFRgB3pdd+P+C5Ns65NSJagYcl7VIKF/jPVAlvBd4G7NLG+e0pxfMu4ADg/hTPFsCcds57P9AMzEv5BwPPl2eSNBmYDLD7sGGdCM/MzMwsU03F4mlJhwORvsX9AvBIfcMyK+QYsvUa/h44V9JBXShrTfopYEVEHFbFOety26Ua+KnATkBzajF4EhhU4dw32LRLYnmefDx3RcTEKuIp5b8mIr7SXqaImApMBRjd3FL9vKRmZmZWWN9ur6hu5e0zgc+RfeP6LDAq7Zt1O0lNwO4RcS/wZWAbYCjwKrBVLusDwMlp+1Tgvg6KfgzYSdJh6ToDJB1YILRtgOdTpeJIYI+UXh7XU8ABkgamGaPe30Z5DwJjJb0jxTNE0j7tXP9u4ARJO6f820vao538ZmZmZl3SYYtFmrby1AbEYtYZ/YBrJW1D9kXA5WmMxS+Bn6dB02elx39L+hKwEvhke4VGxOtpIPblqez+ZOONVlQZ10+BX0paRjZ186Op3Bcl3S9pOfD/IuJLkm4ElgNPAIvaiGdl6gp2naSBKfk84Hdt5H9Y0nnAnanytZ7sC4GnqozfzMzMakiCpj4+xkIdrcoq6e3AZWR9vIOsX/cXI+Lx+odnZo0yurkl7n9wXtX5+/oAtL6iXqup9zRF7hOK32s9V942s+4x9p0tLFgwv2F/+Hba68A47j9v6DhjDfzXyQctiIhK41LrqpquUD8DbiSb+WY34CbgunoGZWZmZmZmvUs1g7e3jIif5PavTd1JzKwPEb37G+t62tBav3Ht/eq8WlKR32k97xOK32uReOpZNhRvgdjt9J9VnffZaacUKruoovda79fk5uKVv60vlH+bLQfUKRLrSfr6f7NtViwkbZ82/5+kc4DrybpCTQBmNiA2MzMzMzPrJdprsVhAVpEo1a3OyB0LoN1pLM2sfiStjoh2V942MzOznqWv9wxos2IREXs2MhAzMzMzM+u9qlp5W9IIslV/31y8KyJ+XK+gzKw6abzTScBA4JaI+Kqk4cBtETEi5TkbGBoRU7orTjMzM9uMx1iUSPoqMI6sYjET+BDwW8AVC7NuJGk8sDdwKFmXxRmSjgD+2K2BmZmZ2WapmhaLE4CDgUUR8UlJuwDX1jcsM6vC+PQoLao3lKyiUXXFQtJkYDLA7sOG1To+MzMzS4T6/AJ51VQs1kZEq6Q3JG0NPA/sXue4zKxjAi6OiKs3SZT+jk3XqBlEGyJiKjAVoLm5pb5zjZqZmVmfVs0CefMlbQv8kGymqIVkq2+bWfeaBZwuaSiApLdJ2hn4C7CzpB0kDQQ+0p1BmpmZGaBsjEUjHt2lwxaLiPintHmVpDuArSNiaX3DMrO2SOoPrIuIOyXtD8xJ09etBk6LiOclXQg8BPwJeLT7ojUzM7PNRXsL5I1u71hELKxPSGbWgQOBPwBExGXAZeUZIuJy4PIGx2VmZmbt2GzXsQC+3c6xAN5X41jMrAOSzgQ+D/xLd8eyOenX1Lf/IyjpaXdZ5GmPKDZEqN6/02ennVJ13u0+cHGhslfNKrY+bb3vtbW1+ue+aTN5LwEM3qJfd4dg1nDtLZB3ZCMDMbOORcRVwFXdHYeZmZkVV83g5t6sr9+fmZmZmZk1gCsWZg0iKSRdm9vvL2mlpNsKlrObpJ/XPkIzMzOrF5GNsWjEo7u4YmHWOGuAEZIGp/2jyWZtqpqk/hHxbEScUPPozMzMzLqgw4qFMqdJOj/tD5N0aP1DM+uTZgLHpO2JwHWlA5IOlTRH0iJJD0jaN6VPkjRD0j3A3ZKGS1qeO/YLSXdI+r2kb+XKG5/KWyjpptJ6F2ZmZmb1UE2LxRXAYWQfggBeBX5Qt4jM+rbrgZMlDQJGAnNzxx4F3hMRhwDnA/+ZOzYaOCEi3luhzFHABOAgYIKk3SXtCJwHHBURo4H5wL/W/G7MzMysak1qzKO7dLhAHvDOiBgtaRFARKyStEWd4zLrkyJiqaThZBX1mWWHtwGukbQ32ZTOA3LH7oqIl9oo9u6IeAVA0sPAHsC2wAHA/amv5RbAnPITJU0GJgPsPmxY527KzMzMjOoqFusl9SP7oIOknYDWukZl1rfNAC4FxgE75NIvAu6NiI+lysfs3LE17ZS3Lre9gex9LbLKyMTKp2QiYiowFaC5uaXYQgBmZmZWSF9fyqWarlCXA7cAO0v6OvBbNu2iYWbFTAMuiIhlZenbsHEw96QuXuNBYKykdwBIGiJpny6WaWZmZtamDlssIuKnkhYA7yf7FvSjEfFI3SMz66Mi4hmyCnu5b5F1hToPuL2L11gpaRJwnaSBKfk84HddKdfMzMw6R6Jbp4JthA4rFpKGAX8DfplPi4g/1jMws74mIt4yK1NEzCZ1eYqIOUC+VeG8lD4dmJ4750lgRBvHPpLbvgcYU5vozczMzNpXzRiL28nGVwgYBOwJPAYcWMe4zKzBAmhtrc8wi6a+3qm0gYr+joo89z3t9xQFbrWnxV7EqllfKZR/u2O+Xaz82/+tUP6ievNzX8T6N4oNL40iL2DbbPT1t0s1XaEOyu9LGg38U90iMjMzMzOzXqfwytsRsRB4Zx1iMatI0hmStuvuOLqTpKGSzlRf75xpZmbWh2XjLOr/6C7VjLHIL6rVRLZQ17N1i8h6NUkB/DQiTkv7/YHngLn5/v8FyjsfeDQiVnWQ70mgJSJeKB51baVYno6I9+TSFgP9I2JEZ8qMiNWSnga+Bpxbk0DNzMzMaqiaMRZb5bbfIBtzcXN9wrE+YA0wQtLgiFgLHM3GKVSrIql/RLwBEBEX1iHGRthK0u4R8bSk/WtRYETcThdnizIzM7PuIaCpj3c8aLcrVFoYb6uIuCA9vh4RP42I1xoUn/VOM4Fj0vZE4LrSAUmHSpojaZGkByTtm9InSZoh6R7g7rTuwjRJD6W8x6V8/SRdKmm5pKWSzspd9yxJCyUtk7RfB9fbUtKNkh6WdIukuZJa0rHx6ZyFkm6S9JbZnKpwIzChjedguKT7UvkLJR2e0sdJmi3p55IelfTTUtcnSc2Sfp3yz5K0a0ofk56HxZIukbQ89zxdImleOn5GJ+7BzMzMrGptVizSt8YbgLENjMf6huuBkyUNAkYCc3PHHgXeExGHAOez6WKLo4ETIuK9ZN197omIQ4EjgUskDQEmA8OBURExEvhp7vwXImI0cCVwdgfX+ydgVUQcAPwH0AwgaUeyaV6PSmXNB/LdAat1M/DxtP335KZrBp4Hjk7lT2DTNS0OAf4FOAB4O9kidwOA7wMnpnOm5+7jv4EzImIU2arbJZ8CXomIMWRTzn5G0p7lQUqaLGm+pPkvvLCyE7dpZmZm1Wpq0KO7tNcV6iGyD3qLJc0AbiLr5gJARPyizrFZLxURSyUNJ/umfmbZ4W3IFoHbm2yG0wG5Y3dFxEtpezxwrKRSBWEQMAw4Crgq11Xqpdz5pdfkAjZ+qG/reu8GLktlLJe0NKW/i+xD/f2psWALYE6R+09eBFZJOhl4hGwtmJIBwPcllSoD+bUrHkoL6JXGZQwHXk55bkwx9QdelrQtWYtiKb6fAaVxLOOBkZJOyD0PewNP5IOMiKnAVIDRzS2eG9HMzMw6rZoxFoPIPiS9j43rWQQbP8SZVTIDuBQYB+yQS78IuDciPpYqH7Nzx9bktgUcHxGP5QvtYFKkdennBja+ttu7XiUiq+BMbDODtDsbWyCuioir2sh6A/ADYFJZ+heBvwAHk32xkO9auC63XboPAb+PiHFlcWzbwX2cFRGz2sljZmZmDdTHh1i021qyc5oRajmwLP1ckX4ub0Bs1rtNAy6IiGVl6duwcTD3pHbOn0U2ZqI0xuCQlH4XcEaabQpJ23cQR1vXux84KZVxAFBar+VBsu5H70jHhkjKtygQEU9HxKj0aKtSAXAL8K10L+UxPRcRrcA/AP06uIfHgB0lHZZiGiDpoIh4GXhVUmn655Nz58wCPpu6USFpn9SVzMzMzKwu2qtY9AOGpsdWue3Sw6xNEfFMRFxe4dC3gIslLaL9FrOLyLoMLZW0Iu0D/Aj4Y0pfApzSQShtXe8KYCdJD5NN4bqCbEzCSrIKyHWpe9QcYL8OrlFRRLwaEd+MiNfLDl0B/GOKfz82bampVM7rwAnAN9M5i8m6bEE2luKHqdvUEOCVlP4j4GFgYRrQfTXVtVCamZmZdYraWnJe0sI0UNSsz0kzng2IiNck7QX8Cti3QiWgR5M0NCJWp+1zgF0j4gudKWt0c0v8ds68msZX0tTUx9t+G6i1tdhQmN783Be51958n0Vtd8y3C+Vfdfu/1SmSzcv6N1oL5W9t4/NVWwYO6Kjx2mpt7DtbWLBgfsP+eOy2z0HxqcsbM5Lgax/aZ0FEtDTkYjntfYO5+fyVts3RlsC9qauQgH/qbZWK5BhJXyF7Lz9F+93L2iV6zoezDQU/PPfrIXE3Qk/5HUH9f09F+iL3tNdMW1/aVdLB2LG3KFpR2P7kaYXyv3jdJwvlLxp/bzWgf7G5doq+JosqUv7a1zd0nCln6CA3cFvntPfKeX/DojBrsIh4FWh4Tb7WIuIGskHiZmZm1sP19Xp4m9Xvsmk8zTZrksZKOqK74zAzMzPrqbpzDQ2zLpEUkq7N7feXtFLSbR2cN0rSh3P7U3LrZVTKfwjwSTq3nkWl8oZLOiW33yKp0kB3MzMz60Oa1JhHt91f913arMvWACMkDU77R7Nxatn2jAI+3GGuJCIWRcSnI2J9J2KsZDi52awiYn5EfL5GZZuZmZl1C1csrLebCRyTticC15UOSDpU0hxJiyQ9IGlfSVsAFwITJC2WNCFlP0DSbEmPS/p8rozTJD2U8l6dZpNC0mpJl0haIelX6Vql849NeYZLuk/SwvQ4PBX7DeA9qcwvShpXamWRtIOkO1O5P5L0lKQdU1nLc3GdLWlK2t5L0h2SFqTr7ZfST5S0XNISSb+p+TNvZmZmVRPQJDXk0V1csbDe7nrgZEmDgJHA3NyxR4H3RMQhwPnAf6aZn84HbkgL3JUGPu8HfAA4FPhqWoRuf2ACMDYiRpGthH1qyj8EuCciDgReJVsL42jgY2QVF4DngaPTtM0TgFJ3p3OA+9L1v1N2P18FfpvKvQUYVsVzMJVsle1m4GyydTJI9/mBiDgYOLbSiZImS5ovaf7KF1ZWcSkzMzOzyjyfmPVqEbFU0nCy1oqZZYe3Aa6RtDcQZAvuteX2iFgHrJP0PLAL2cxozcC8NJ3iYLLKAsDrwB1pexmwLiLWS1pG1tWJdL3vSypVSjZZwbsNRwAfT/d2u6RV7WWWNBQ4HLgpN+XjwPTzfmC6pBuBihNnR8RUsooJzc0t9Z0b0czMbDPX12eFcsXC+oIZwKXAOGCHXPpFwL0R8bFU+ZjdThnrctsbyN4bAq6JiK9UyL8+Nk5U31o6PyJaJZXeV18E/gIcTNY6+FrVd/RWb7BpC+Og9LMJeDm1qGwiIs6U9E6yrmILJDVHxItdiMHMzMysTe4KZX3BNOCCiFhWlr4NGwdzT8qlvwpsVUW5dwMnSNoZQNL2kvYoENc2wHMR0Qr8A1BaVrW96/+GNLBb0oeA7VL6X4Cd0xiMgcBHACLir8ATkk5M50jSwWl7r4iYGxHnAyuB3QvEbmZmZrXUoBmhPCuUWRdExDMRUWm61m8BF0taxKatc/eSDdbOD96uVO7DwHnAnZKWAncBuxYI7QrgHyUtIRvDsSalLwU2pEHVXyw75wLgCEkryLpE/THFsp5s7MZDKY5Hc+ecCnwqXWcFcFxKv0TSsjTo+wFgSYHYzczMzApxVyjrtSJiaIW02aQuTxExh03HNZyX0l8CxrRT7ojcdsWVrfPXjogplY5FxO/JBpSXfDmlrwfeV1ZkKeYXgfGlRElP5sq9nI0DwPPXewL4YIX0j7/l5szMzKzbiL49yMIVCzOruzc2tNb5Cn37D3Wj3P+/LxTKP/YdO9YpkowKjHJ8ff2GQmWvL/ia3Hpwe3M/vFWR2OvtpetPL5T/jBuXFsp/9UkjO860Gdo4DK9axV4z/Qr0dxk6yB/3rDH8SjPrwSJieHfHYGZmZl2XrWPR3VHUl8dYmNWZpJB0bW6/v6SVpUXxzMzMzPoCVyzM6m8NMELS4LR/NBtnq6qL3JS3ZmZmZg3hioVZY8wkW08CssX8risdkDRE0jRJD0laJOm4lH5gSlssaamkvSUNT7M8lc49W9KUtD1b0nclzQe+IKlZ0q8lLZA0S1KRGa3MzMysxjzdrJnVwvXAyZIGkc0UNTd37Fzgnog4FDiSbJrYIcCZwGVp8bsW4JkqrrNFRLSQzR71PeCEiGgmW+vj6zW7GzMzM7My7i5h1gARsTSt/j2RrPUibzxwrKSz0/4gYBgwBzhX0t8Bv4iI31cx00xpatx9gRHAXemcfsBz5ZklTQYmA+w+bFixmzIzM7NCetKMcfXgioVZ48wALgXGATvk0gUcHxGPleV/RNJcsi5UMyWdAfyOTVsaB5WdU1qET8CKiDisvYAiYiowFaC5uaXo3IhmZmZmb3JXKLPGmQZcEBHLytJnAWcpfY0h6ZD08+3A42lhvP8h60L1F2BnSTtIGgh8pI1rPQbsJOmwVNYASQfW/I7MzMysKqXpZj3Gwsy6LCKeSZWEchcBA4ClklakfYCTgOWSFpN1a/pxWrX7QuAh4C7g0Tau9TpwAvBNSUuAxcDhtbwfMzMzszx3hTKrs4gYWiFtNjA7ba8FzqiQ5xvANyqkX042OLs8fVzZ/mLgiM5FbWZmZjUl6ONDLNxiYWZmZmZmXecWCzOru/79/B1GbzD2HTt2dwidNniLfoXyN62vUyCdEFFs3oR6zypz9UkjC+Xf7iPfqTrvqtu+WDScXmtDa7Hfa/9iL+FCisbSrzs76fdxTX28ycL/25uZmZmZWZe5YtFLSApJ1+b2+0taKem2tH+spHPS9pTSmgiSLpR0VNr+F0lbVnGtH0k6oIvx/jzNalTaH5Xu4YMdnPfvue0tJP1GUuGWNUmHppWofy9poaTbJR1UtJxGSr+f1yRt092xmJmZWW31tFmhJH1Q0mOS/rf0GbKNfMenz3AtHZXpikXvsQYYIWlw2j8a+FPpYETMSIN9NxER50fEr9LuvwAdViwi4tMR8XB5uqSqGmrTtKb9IuLxXPJE4LfpZ6VzJKkJeLNikWY2uhuYUM11c2XtAtwI/HtE7B0Ro4GLgb2KlNNBnBX3u2giMA/4eBvXdtdFMzMz67L0me4HwIeAA4CJlb5UlrQV8AVgbjXlumLRu8wkWywNsg+h15UOSJok6fvlJ0iaLukESZ8HdgPulXRvOnalpPmSVki6IHfO7FKtVNJqSd9OU5YeJuk0SQ9JWizp6jYqG6eSrbtQKk/AicAk4GhJg1L68FRT/jGwHPgvYHAq+6fp9FtTeUX8M3BNRDxQSoiI30bErfnnJBff6tz2lyTNk7S09JxUiPM9Zfu7VzqvCEl7AUOB88hVvtLvdYake4C70/6tku6S9KSkf5b0r5IWSXpQ0val8iTdIWmBpPsk7Vc0JjMzM6stqTGPKhwK/G9EPJ6+yL0eOK5CvouAbwKvVVOoKxa9y/XAyemD+UiqrD3Cm1OUPgscGRFHpuRzI6IllfVeSZVG7A0B5kbEwcCLZK0HYyNiFLCByh/6xwILcvuHA09ExB/Iplg9Jndsb+CKiDgwIj4JrI2IURFRKnc5MKba+0wOBBYWPAdJ41M8hwKjgGZJpela34wTeKpsf992zqvWyWS/3/uAfVOrS8lo4ISIeG/aH0HWqjEG+Drwt4g4BJgDfCLlmQqcFRHNwNnAFQXjMTMzs77rbcDTuf1nUtqbJI0Gdo+I26st1F0repGIWCppONk32jNrUORJkiaTvQ52JWsKW1qWZwNwc9p+P9AMzMsaIRgMPF+h3F2Blbn9iWQfmkk/P5Er86mIeLCtACNig6TXJW0VEa9We2N5kuYCWwN3RsQX2sk6Pj0Wpf2hZBWGP1aIM7/f1nm/KRDmROBjEdEq6WayFp5SC9RdEfFSLu+96bl4VdIrwC9T+jJgpKShZJW5m7Txa4uBlS6afv+TAXYfNqxAuGZmZlaMaKJhs0LtKGl+bn9qREyt9uTUzfv/I+ttUjVXLHqfGcClwDhgh84WImlPsm+yx0TEKknTgUEVsr4WERtKp5F1MfpKB8WvLZWVukodDxwn6dxUxg6pzx5kY0c6MpCyJjhJnwM+k3Y/HBHP5g6vIPuW/38AIuKdqevTR9LxN0itdemNs0Xu/i6OiKvLrjW8Qpz5/YrnlZXxMeCraffTETE/d+wgsorIXakisAXwBBsrFuXXXpfbbs3tt5K9p5uAl1OrUrvSH5mpAM3NLcXmIzQzM7Oe6oXUK6UtfwJ2z+3/Hbmxu8BWZD0kZqfPJv8HmCHp2PxnmHLuCtX7TAMuiIhlnTj3VbIXCmTf4K8BXkndbj5Uxfl3AydI2hlA0vaS9qiQ7xHgHWn7/cDSiNg9IoZHxB5krRUfa+Ma6yUNKO1I2oHszbHJrPMR8YPUZWpUWaUCssFIkyQdnkvLD1p/kqzlBeBYoHS9WcDp6Rt/JL2tdK8d6PC8iLglF2/5G3IiMCU9P8MjYjdgtzae2w5FxF+BJySdmOKRpIM7U5aZmZn1SfOAvSXtKWkLsi7ZM0oHI+KViNix9NkEeBBot1IBrlj0OhHxTBov0RlTgTsk3RsRS8i67jwK/Ay4v4prP0w2uPhOSUuBu8i6PZW7naxFBbIPzbeUHb+ZNmaHSjEuzQ3ePjKVV7WI+DPZWJCLlU2h9gBwAhtbAH5INqZkCXAYqUUgIu4key7mSFoG/JyNFbH2rtep83JO5q3P0S0pvbNOBT6V7nEFlQdkmZmZWYOInjN4OyLeIJvsZhbZF8I3RsQKZcsUHNvpeyy64qdZR5RNiXsv2SDvDR3l76CsXwDnRMTvahKctam5uSXun9vuFxFmfca69cX+NA0cUL9lkXvayttFeeXtynrSa8wrb1c29p0tLFgwv2E3u8d+I+PL02Z0nLEGPjd2zwUddIWqC4+xsJqLiLWSvko2u8AfO1tOapq71ZUKs8575W/rO86UDBxQrBF7UB0/CNVb0Q9xD/7hxUL537VX9UPgelpFoagilYXJNywpVPbUCb23F+dr61sL5a9nxWJzqSj0eAUWr+utXLGwuoiIWTUo43XgxzUIx8zMzMzqzBULszqQdAzwdESUT99rZmZmm6mmXt5C2REP3jYDJIWka3P7/SWtlHRb2j9W0jlVlvVB4L1k60oUjePNVc87ce6Zkj7RcU4zMzOz2nOLhVlmDTBC0uCIWAscTW4+54iYQW4atvZExB3AHZWOKetMrYgo1vm2uuteVesyzczMrDZKs0L1ZW6xMNtoJnBM2p4IXFc6IGmSpO+n7emSLpf0gKTH0+J7pXxfkjRxNN1sAAAgAElEQVRP0lJJF6S04ZIek/RjYDmwu6QrJc2XtKKUr5ykiZKWSVou6Zu59E9J+p2khyT9MBfXFElnp+29JN0haYGk+yTtV9NnyszMzKyMKxZmG10PnCxpEDASmNtO3l2Bd5Ot5v0NAEnjyVbQPhQYBTRLOiLl3xu4IiIOjIingHPTNHAjydbUGJkvXNJuwDeB96Wyxkj6aEr/D+BdwFigrQrDVOCsiGgmW2H9ikqZJE1OFZz5K19Y2c7tmpmZWVc1SQ15dBd3hTJLImKppOFkrRUzO8h+a+rO9HBauRxgfHosSvtDySoUfwSeiogHc+efJGky2XtwV+AAID/QewwwOyJWAqQFA0uVlF9HxEsp/SZgn3xgaQXww4GbctNYDmzjnqeSVUJobm7xojZmZmbWaa5YmG1qBnAp2crh7U1Evy63rdzPiyPi6nzGVFlZk9vfk6wVYUxErJI0HRjUxbjzmoCXI2JUDcs0MzOzLvIYC7PNyzTggogoPKMTMAs4PbUYIOltknaukG9rsorGK6m140MV8jxE1kVqR0n9yFpRfg3MS+nbSeoPHF9+YkT8FXhC0okpDknqvatMmZmZWa/gFguznIh4Bri8k+feKWl/YE7qgrQaOA3YUJZviaRFwKPA08D9Fcp6Lk1vey9ZS8jtEfE/AJL+k6zi8VIq45UK4ZwKXCnpPGAA2fiRYkvempmZWc2Ivv+NvisWZkBEDK2QNhuYnbanA9PT9qS2zo2Iy4DLKlxiRNk5kyrkISLG5bavIzczVc7PImJqarG4Bbg15Z+SO/cJ4IOVrmFmZmZWD65YmPU+UyQdRTYu405SxcKskm22HFB13r+uXV+o7AH9in331q+p93Yuftde7Q256poNrfWdN6EnPe9TJxTrlbnHmTcVyv/UVScWyl/P576v96W3ThCoj78wXLEw62Ui4uzujsHMzMysXF/v6mXWEJJC0rW5/f6SVkq6rYExnCnpE426npmZmVmeWyzMamMNMELS4IhYCxwN/KmRAUTEVY28npmZmRXTtztCucXCrJZmAsek7YnkBl5L2l7SrZKWSnqwtNK2pCmSpkmaLelxSZ9P6cMlPSLph5JWSLpT0uB07DOS5klaIulmSVvmyjo7be8l6Q5JCyTdJ6mtFbrNzMzMasIVC7PauR44WdIgYCQwN3fsAmBRRIwE/h34ce7YfsAHgEOBr0oqjbbdG/hBRBwIvMzGNSt+ERFjIuJg4BHgUxVimQqcFRHNZIvxXVGLGzQzM7POEdAkNeTRXdwVyqxGImJpWmV7IlnrRd67SRWDiLhH0g6Stk7Hbo+IdcA6Sc8Du6T0JyJicdpeAAxP2yMkfQ3YFhhKtjDfm9ICfYcDN+VmnxhYKWZJk4HJALsPG1bkds3MzMw24YqFWW3NAC4FxgHVzk+5Lre9gY3vy/L0wWl7OvDRtNDepHStvCbg5YgY1dGFI2IqWesGzc0t9Z3z0szMbDPnMRZmVsQ04IKIWFaWfh/ZathIGge8EBF/7eQ1tgKeS12mTi0/mMp9QtKJ6XqSVGzyeDMzM7OC3GJhVkMR8QxweYVDU4BpkpYCfwP+sQuX+Q+y8Rsr08+tKuQ5FbhS0nnAALLxH0u6cE0zMzProj6+Pp4rFma1EBFDK6TNBman7ZeAj1bIM6Vsf0Rud0Qu/dLc9pXAle2VFRFPAB+s+gbMzMzMusgVCzMzMzOzuhPq400WrliYWd1FFBsX3tf/8PZUWw8e0HGmnA2t9R3vX+R1U/Q105Nek/2a/Hpvy1NXnVgo/3ZHnl8o/6p7LyyUv4ghA3vOR6ye9HqH+r63rXv1nFe9mZmZmVkfJfr+rEl9/f7MGkZSSLo2t99f0kpJt3WyvB9JOqB2EZqZmZnVj1sszGpnDdnidYMjYi1wNPCnzhYWEZ+uWWRmZmbW7fp61y63WJjV1kzgmLQ9EbiudEDSFEln5/aXSxouaYik2yUtSWkT0vHZklrS9icl/U7SQ5J+KOn7KX26pBNyZa7ObX9J0jxJSyVdUNe7NjMzs82eKxZmtXU9cLKkQcBIsnUmOvJB4NmIODhNN3tH/qCkXYELgLHAu4EOu0dJGg/sDRwKjAKaJR1R5EbMzMysttSgR3dxxcKshiJiKTCcrLViZpWnLQOOlvRNSe+JiFfKjr8TmB0RKyPideCGKsocnx6LgIXAfmQVjU1ImixpvqT5K19YWWW4ZmZmZm/lioVZ7c0ALiXXDSp5g03fc4MAIuJ3wGiyCsbXJBWZL/HNMiU1AVukdAEXR8So9HhHRPxX+ckRMTUiWiKiZacddypwWTMzM7NNuWJhVnvTgAsiYllZ+pNkFQgkjQb2TNu7AX+LiGuBS0p5cuYC75W0g6QBQH5i9yeB5rR9LFBaiGAWcLqkoekab5O0c9dvzczMzDpF2eDtRjy6i2eFMquxiHgGuLzCoZuBT0haQVZZ+F1KPwi4RFIrsB74bFl5z0maAswBXgYW5w7/EPgfSUvIxmasSefcKWl/YE76A7MaOA14vhb3aGZmZlbOFQuzGomIoRXSZgOz0/ZasnEP5Z4ka2EoP3dcbvu/gf8GkDQJaEnpfwHelTvty7lzLgMuK3YXZmZmVg9eIM/MzMzMzKwKbrEw62UiYjowvZvDKGTlq68Xyr/z1gPrFMnm57X1G6rOO2hAv0Jl92uqbz/eevYT7uuLVPUVRV6/AKvuvbBQ/t1O/1nVeZ+ddkqhsuv9/iiip73ee1o8jdTX790tFmZ1JukMSdt1dxxmZmZm9eSKhVknSApJ1+b2+0taKem2snznA6siYlUb5by5unY71/qRpAPS9pOSdkzbq9s7z8zMzHqWvr5AnrtCmXXOGmCEpMFpUPbRwJ/KM0VEsXb5CiLi010tw8zMzKze3GJh1nkzgWPS9kRyC+JJGiJpmqSHJC2SdFxKHyzpekmPSLoFGJw758q0CvYKSRfk0qtp1fiSpHmSlpbOTTHcLmmJpOWSJtTu1s3MzKwoqTGP7uIWC7POux44P3V/Gkm2MN570rFzgXsi4nRJ2wIPSfoVcAbZYnj7SxoJLMyVd25EvCSpH3C3pJERsbSjICSNB/YGDiVrAZ0h6QhgJ+DZiDgm5dumFjdtZmZmVolbLMw6KX3oH07WWjGz7PB44BxJi8nWsRgEDAOOAK7NnZ+vOJwkaSGwCDgQOKDKUManxyKyisp+ZBWNZcDRkr4p6T0R8Ur5iZImp1aS+StfWFnl5czMzKyobB0LNeTRXdxiYdY1M4BLgXHADrl0AcdHxGP5zG1NMydpT+BsYExErJI0nawyUg0BF0fE1RXKHQ18GPiapLvLx3xExFRgKkBzc0tUeT0zMzOzt3CLhVnXTAMuiIhlZemzgLOUahKSDknpvwFOSWkjyLpQAWxNNiD8FUm7AB8qEMMs4HRJQ1O5b5O0s6TdyLpdXQtcAowufHdmZmZWMx5jYWZtiohngMsrHLoI+C6wVFIT8ATwEeBK4L8lPQI8AixI5SyRtAh4FHgauL9ADHdK2h+Yk+oxq4HTgHcAl0hqBdYDn+3UTZqZmZlVwRULs06IiKEV0maTjacgTUF7RoU8a4GT2yhzUhvp43LbwyvFEBGXAZeVnfoHstYMMzMz63ZC3brKRP25YmFmnfLGhtaq8+689cA6RmLtGTSgX3eH0GkR1Q/7aWv8Uq2sfu2NQvmHDvJ/r7VQ79fvs9NOqTrvdsdfVajsF296y3dL7Wpq6tsfOG3z4DEWZmZmZmbWZa5YWLeSdIak7bo7DjMzM7N66+uDt12xsJqTFJKuze33l7QyLSSXz3c+sCoiVrVRTocrTrdxXoukSgOqa0rS/0mraP9B0gJJMyXtI2lc+b3mzvmRpGrXp+hMTA/Uq2wzMzOz9rgTqNXDGmCEpMFpsPLRwJ/KM5WvqVArETEfmF+PskvSNLK3ANdExMkp7WBglw5i+3Qb5fWLiA1VXLd/RLTZ2TsiDu+oDDMzM2u80gJ5fZlbLKxeZgLHpO2JwHWlA5KGSJom6SFJiyQdl9IHpxaARyTdAgzOnTNe0hxJCyXdlFuzYYykByQtSeVtlW8xkDQlXWu2pMclfT5X5mnpnMWSrpZUZJTgkcD6iHhzNF9ELImI+9LuUEk/l/SopJ/m1rN4sxVG0mpJ35a0BDhM0pOSdkzHWiTNzt3DTyTdD/xE0k6S7pK0IrWAPJU7b3Xu/r4kaZ6kpZIuKHBvZmZmZoW5YmH1cj1wsqRBZIvAzc0dOxe4JyIOJfuAfomkIWTrLPwtIvYHvgo0A6QPzecBR0XEaLLWiH+VtAVwA/CFiDgYOApYWyGW/YAPAIcCX5U0IK37MAEYGxGjgA3AqQXubwRpDYo2HAL8C3AA8HZgbIU8Q4C5EXFwRPy2g+sdQHb/E8mem3si4kDg58Cw8sySxgN7k93zKKBZ0hEV8k2WNF/S/JUvrOwgBDMzM+u0Bo2v8AJ51udExFJJw8laK2aWHR4PHCvp7LQ/iOzD8RGkxebS+UvT8XeRfbC+P33xvwUwB9gXeC4i5qVz/goVp528PSLWAeskPU/WXen9ZBWXeSn/YOD5rt53zkNp8TwkLQaGA+WVhw3AzVWWNyN1KwN4N/AxgIi4Q1KlMSrj02NR2h9KVtH4TT5TREwFpgI0N7dUP7enmZmZWRlXLKyeZgCXAuOAHXLpAo6PiMfymduZh17AXenb+nz+g6qMY11uewPZ615k4yO+0uZFpXcCV6fd8yNiRu7wCuCEgtcs91rZuIo32NiKOKgs75p2rlWJgIsj4uoOc5qZmVlDdGdrQiO4K5TV0zTggohYVpY+CzgrN+7gkJT+G+CUlDaCrAsVwIPAWEnvSMeGSNoHeAzYVdKYlL6VpGory3cDJ0jaOZ27vaQ98hkiYm5EjEqPGWXn3wMMlDS5lCBppKT3VHn9Sp4kdf8Cjm8n3/3ASema44FK0/XOAk7PjUV5W+lezczMzOrBFQurm4h4JiIqTft6ETAAWCppRdoHuJJs0PMjwIWkMQwRsRKYBFyXukfNAfaLiNfJxkl8Lw2Avou3ftPfVmwPk43buDOVeRewa4F7C7LuSEel6WZXABcDf662jAouAC6TNJ+slaO9fOMlLQdOTNd8tSy+O4GfAXMkLSMbi7FVF2IzMzOzLlKD/nXb/WWfj8yst5A0ENgQEW9IOgy4Mg1A75Lm5pa4f271s/S+saG16rz9+/k7DCuuyP9P7XSlrInVr7U5y3NFQwe5p3Ffs93xV3WcKefFm84olL+pqY/3kemBxr6zhQUL5jfsid9nxKj4wU2/asi1xh+w04KIKLwWWFf5L59Z7zMMuFFSE/A68JnuCKKelYXW1mJfePg/5Noo+kVT0e+levPvacjAIrNRF1P0ea93JcoqW3XzmYXyb3f014qVf9d5hfJb7yOgF/8ZrIorFmY9hKQzgBvbWom8JCJ+TzadrZmZmVmP4f4JZnUkKSRdm9vvL2llaQG/XPr5wKpSpULSKEkfzh0/VtI5DQvczMzMaq6vj7Fwi4VZfa0BRkganNahOBr4U3mmiLiwLGkU0EJaAyTNSlU+M1WbJPWPiGKdws3MzMy6wC0WZvU3EzgmbU8ErisdSFPnTpP0kKRFko5LK4pfCEyQtFjSBEmTJH0/nbOTpJslzUuPsSl9iqSfSLof+ImkA1O5iyUtlbR3Y2/bzMzM8vr6ytuuWJjV3/XAyZIGka3NMTd37Fzgnog4FDgSuIRsKt7zgRvSGho3lJV3GfCdiBhDtt7Fj3LHDgCOSosJnglclmaMagGeqf2tmZmZmWXcFcqsziJiqaThZK0VM8sOjweOlXR22h9ENutTe44CDsjNDLN1aSE8YEbqcgXZeh/nSvo74Bdp0Pcm0gJ/kwF2H9bRZc3MzMza5oqFWWPMAC4FxgE75NIFHB8Rj+UzS3pnO2U1Ae+KiNfKzoFsTAcAEfEzSXPJumHNlHRGRNyTPycipgJTIVvHouA9mZmZWQHdObC6EdwVyqwxpgEXRMSysvRZwFlKtQJJpWlkX6XtlbLvBM4q7UiquDiepLcDj6fVz/+HrBuWmZmZWV24YmHWABHxTPqAX+4isjEVSyWtSPsA95J1d1osaULZOZ8HWtKA7IfJxlJUchKwXNJiYATw4y7fiJmZmXVKaYG8Rjy6i7tCmdVRRAytkDYbmJ221wJnVMjzEjCmLHl6OvYCUF7ZICKmlO1/A/hGZ+I2MzMzK8oVCzMzMzOzuuvexesawRULMwMggA2t1Y/f7lfHttam7mzH7WPWv9Fadd6iz3s9XwPw5oQEPcLv/7y6UP59dm1riNRb9aT7tLbN/cNLhfKvuuu8QvnP/uUjhfJf+vf7F8pv1giuWJiZmZmZ1Vs3L17XCB68bdYOSRvSAOrlkm6StKWk4ZKWd3ds5SS1SKo0QNzMzMys7lyxMGvf2rT69QjgddqegalqkurSUhgR8yPi8/Uo28zMzLpODXp0F1cszKp3H/COtN1P0g8lrZB0p6TBAJI+I2mepCWSbpa0ZUqfLumqtGDdt9L+lZIelPS4pHGSpkl6RNL00gVTnvnpOhfk0sdIeiBd5yFJW6UybkvHh6TyHpK0SNJxjXqSzMzMbPPkioVZFVIrw4eA0gJ3ewM/iIgDgZeB41P6LyJiTEQcDDwCfCpXzN8Bh0fEv6b97YDDgC+Srcz9HeBA4KDconfnRkQL2eJ275U0UtIWwA3AF9J1jgLWloV8LnBPRBwKHAlcImlIl58IMzMz65RsHQs15NFdXLEwa9/gtMDcfOCPwH+l9CciYnHaXgAMT9sjJN0naRlwKllFoeSmiNiQ2/9lRARZZeUvEbEsIlqBFbnyTpK0EFiUyjoA2Bd4LiLmAUTEXyPijbK4xwPnpNhnA4OAYeU3J2lyahGZ/8ILK6t+UszMzMzKeVYos/atjYhR+YQ0NeS6XNIGYHDang58NCKWSJoEjMvlW1NWdqmM1rLyWoH+kvYEzgbGRMSq1EVqUJVxCzg+Ih5rL1NETAWmAoxubql+rlkzMzMrrI9PCuUWC7Ma2wp4TtIAshaLrtiarDLyiqRdyLpiATwG7CppDEAaX1H+JcEs4CylWpCkQ7oYi5mZmVm73GJhVlv/AcwFVqaf1a+SVSa1eiwCHgWeBu5P6a9LmgB8Lw0aX0s2ziLvIuC7wFJJTcATwEc6G4uZmZnVQB9vsnDFwqwdETG0QtqTwIjc/qW57SuBKyucM6mt/QrlTaq0XXb+POBdZcmz04OIWAucUelcMzMzs3pwxcLMzMzMrAHUx5ssXLEwMyBrne3XVJ8/eK2txcaFN9Upjs3RgP71G0q3bv2GjjPlDBzQr1D+Z1eVz6Lctt22G9xxppyir8m37+zZmjd3Y/bcrlD+oq+xS/9+/0L5R503q+q886YcXajsev7dgGJ/O4r+3bD/n70zj9tsrv//82WMZhiGIiFSiCSmmSEkqaS0UFGSFkuhRduXH1JopdIiWpAlQpKS7EsNsmRmGGNpt6SUpSxjN+P1++PzOXOf+7rPOdd17rk3vJ/343rc1znX+3zO55zrnHN93p/3NrpE8HYQBEEQBEEQBItMKBZBUELSAklzJN0o6XRJS0paXdKNNfJfkrRlfj9D0vQR6OPBkvau+ezK4d5/EARBEASDQxqZ12gRikUQ9OdR21Nsrwc8AezZJGz7QNsXj0zXumN709HuQxAEQRAEz05CsQiCei4H1szvx0k6RtJNki7MaV6RdIKk7Ts3lLSjpBuy5ePred24LH9j/uwzef0USVdLmivpV5KWy+tnSDq8ZEHZqLSLdfPnt0j6ZGm/D5Xe7yNpZm73i0N+doIgCIIgaIVG6DVahGIRBBXkgnNbAzfkVWsB37f9cuB+YLuGbVcGvg68HpgCbCjpHfn9KrbXs/0K4Pi8yYnAvrbXz/s7qNTckrny98eA40rr1wHeBGwEHJQL8pX7sFXu80Z5v9Mkbd7uLARBEARBEPROKBZB0J+JkuYAs4B/AMfm9bfanpPfzwZWb2hjQ2CG7XtszwdOBjYHbgFeIukISW8GHpQ0GVjW9qV5259k2YJTAWxfBiwjadm8/hzbj9u+F7gbWLGjD1vl13XAtSRFZK3OjkraXdIsSbPuufeehkMKgiAIgmCReYabLCLdbBD059FsIViIUhTU46VVC4B2uS0B2/dJ2oBkadgTeA/wmW6b1Sx39qfzXhZwiO2juvTpaOBogGnTprfLjRgEQRAEQVAiLBZBMPRcA7xW0vKSxgE7ApdKWh5YzPYZwOeBqbYfAO6T9Jq87QeAS0tt7QAgaTPggSzfCxcAu0qalLdfRdLzF/nIgiAIgiAYFMmYMDJ/o0VYLIJgiLH9b0n7Ab8jPUfOsf3rbK04XlKh0O+f/38I+JGkJUnuUruUmntM0nXAeGDXFn24UNLLgKuyxeUh4P0kt6kgCIIgCIIhJxSLIChhe1LFutuA9UrLh5Xe71x6v0Xp/ank+IjSuuuBqRXtzwE2runST21/ukP+4I7lct8mld4fDhxe024QBEEQBCPJKNeYGAlCsQiCAEjBGwue6j3MYtxivT8dF2shC2APb7iHWj7Z2/SnbdtPZ54zftywtr/ycq1DmXqm9dc0jJdk2+u97TU2f8FTreQXH9fOS/rZcn+07Xqb5ynAYi3dV+Z85U09yy63w7HdhUrcd9pureTbMtzPjmD0CMUiCMYoZQtIEARBEARPf56+qnVvRPB2EFQg6eWSthntfgRBEARBEDxdGDbFQtKCUsXg0yUtKWl1STcOQdt7SvrgUPRzkPsvjm3lvLxrrqQ8Nx/vtg3brizpFyPX28FRV1F6EO1cORT9WcQ+rC7pfaXlnSUd2SC/GnAAMGOI9r+spI+VlkfsGpC0Rr5WH+ouHQRBEATBsPIMr2MxnBaLR21PyYGlT5Dy9g8Jtn9k+8Sham8QFMd2p6QXkgahm+XKyRsDc+s2tH2n7UUesD9dsL3paPeBVMzufd2ECmz/w/b7bD84RPtfllQ5u2h/xK4B23/vrMsRBEEQBEEwHIyUK9TlwJr5/ThJx0i6SdKFkibmWdVrC2FJaxXLkg6VdHO2BhyW1x0sae/8fpqk6/Prm4VFpHNWWtLZkrbI77eSdJWka7M1ZUAmoBY8H5hHSueJ7Yds35r3s6aki3Pfrs3HuXqpj+Nyn2fm49sjr99C0gxJv5D0J0knK0e8SdpQ0pW5zWskLd3QzkqSLitZjl7T2XlJt0n6Rra4XCNpzdLHm+d93VJYLySdKOkdpe1PlrRtdh26Ju9rrqS18ucPlWT3zfu5XtKhed0USVfnbX4labmqkyzpoXyMN+VzulE+R7couyzlc3t5PtfXSiqUmkOB1+S+FQXpVpZ0vqS/SvpGaT+V10Y+T4fkNmZJmirpAkl/l7Rnlpkk6ZK87Q3qs1wdChSWg292XAMTJf1M0h/z8f9B0vSKc7e9pBPy+xUknZG/75mSXp3XvzbvY46k6yQtXXUugyAIgiAYDUaqisXomSyGXbGQtDiwNXBDXrUW8H3bLwfuB7az/XfgAUnFzOoupHz/zwPeCbw8WwO+UrGL44G9bG/QY3+WJxUn29L2VGAW8NnBHR0A1wN3AbdKOl7S20ufnUw61g2ATYF/d2y7G6no2YbAhsBHJL04f/ZK4NPAusBLgFdLWgI4DfhUbnNL4NGGdt4HXJBnrDcA5tQcwwO2XwEcCXy3tH4lYDPgbaTBMcCxwM4Akibn4zqHZJE6PO9rOvDP8g4kbQ1sC7wq970YzJ8I7Ju/3xuAg2r6uBTw23zdzCNdC28kXR9fyjJ3A2/M3+sOwPfy+v2Ay7OV6Tt53ZQs8wpgB0mr9nBt/CMf3+XACcD2JAvVF/PnjwHvzNu+DvhWVgj3A/6e979Px3F9FHjE9svysU+rOf4yhwPfyd/3dsCP8/q9gY/nPr6GdG00Imn3rCjNuvfee3rYdRAEQRAEzwQkvVnSnyX9Tan+Vufnn1Xf5P4lkl7Urc3hzAo1UVIxkL2cNCBdGbg15+0HmE1yU4E0ONpF0mdJA76NgAdIg7VjJZ0NnF3egaRlgWVtX5ZXnURSYprYmDRYvyKN+VgCuGowBwhge4GkN5MG9G8AviNpGvAtYBXbv8pyj+U+lzffClhffbEMk0mK1xPANbb/mbeZQzpPDwD/tj0zt/lg/ryunZnAcZLGA2eWznsnp5b+f6e0/kzbTwE3S1ox7/NSST+QtAJpUHuG7fmSrgIOUHIN+6Xtv3bsY0vgeNuP5Hb+lxWTZW0XlaZ/Apxe08cngPPz+xuAx20/KekG+q6h8cCRWUFdALy0pi2AS4oq1pJuBl5EcllqujbOKu1/ku15wDxJj+dr8WHga5I2B54CVgFWbOgDwOZkBcj2XEm1bnQltgTWLV1Ly2TLyhXAtyWdTPoO/lnXQIHto4GjAaZOmz68OV6DIAiCIBgTSBoHfJ80SftPYKaks2zfXBK7Dphu+xFJHyVNCu/Q1O5wKhaPdvp254HQ46VVC4AiUfkZpBnb3wKzbf83b7MRacC+PfAJ4PU97n8+/S0yE4puABfZ3rFuQ0mrAr/Jiz+y/aOmHTkl8b4GuEbSRSQryrd66KNI1pYLOva/BQPPU9N3VdlObmtz4K3ACZK+XROb4pr35T6UNaITSVWc30uuEm37FEl/yPs6V9Ietn/b0Of6g0kX++y8eJbtA4En3Zcs/amib7afylYxgM+QrEcbkL77xxp2U3V+u10bxTZP0X/7p/L2OwErANOy0nMbfdfdYCh/F+V2FgM2LpTVEodKOgd4C0k5epPtPy3C/oMgCIIgGELGUCmXjYC/2b4FQNLPSJ4lCxUL278ryV9NGvs1MmbSzeZB0gXAD0kDc/Is7GTb55IGjRt0bHM/cL+kzfKqnUof3wZMkbRYVhQ2yuuvJrkVrZn3sZSkfjPbtu/IbitTuikVShl+ytWUpxh8QbIAACAASURBVAC359nsfyrHI0h6jqQlOza/APhotigg6aWSlmrY3Z+BlSRtmOWXzoPqynayyeou28eQLEIDqj5ndij978V6cwLJTYtCs5X0EuAW298Dfg2s37HNRSSL1JJZ/rnZYnCf+mI/PgBcantB6fwf2EN/CiaTLDpP5baKCjzzgF7iDbpeGz3s/+6sVLyOZAXptv/LyIHlktaj/3m7S9LLJC1GcvkquBDYq1goXAglrWH7BttfJ1mr1mnR9yAIgiAInj2sAtxRWv5nXlfHbsB53RodawXyTiYNoC7My0sDv5Y0gTSbXBULsQvJ3cel7SC5hdxK0rz+CFwLYPseSTsDp0p6Tpb9PPCXQfZ5PHCYUurZx4B76MuA9QHgKElfAp4E3k2a3S74McmN59rsi38P8A5qsP2EpB2AIyRNJPnQb9nQzhbAPpKeJAWX16XoXS674DwO1FpySv24S9IfgTNLq98DfCDv6z/A1zq2OT8PgGdJegI4F/gc8CHgR1nhuIVsARkkPwDOUEpFfD7JNQlSlq4Fkq4nKUX31RzXol4bJwO/ye5Zs4A/5Xb/K+kKpYDt80imx4IfkuKJ/ki6TmeXPtuP5P53T26vSDLwSeD7+TtbnKSc7Al8Ois0TwE30cMDIAiCIAiCkWGEM8EuL2lWafno7P7cGknvJ8XPvrarbJ93yeijlOlpsu0vDHL71YGzc4rbYUPSQ7YXJZPUmCG760y3fW+LbZYkxRlMLeIUgqFB0gxgb9uzusm2bLfrNTt12nRfftXMntsct9jwPR6H+7mklrboNv1p23YwOrS9xhY81U5+8XG9OwS07Uvba2z+gqe6C5Vo03d49twfY+maactyOxzbSv6+03Ybpp6MLV79qunMnj1rxC7Kl68/1aecfWl3wSFgyouWmW17et3nkjYBDrb9pry8P4DtQzrktgSOAF5r++5u+x0zFgtJvwLWoPcYitHkwRxQ/Rbbd452Z0aSfIEdS8pKFErFGEfSGqT4pbu6yjK8ykIbxtrgY6z1Z7h44JEnW8nf8d9HWsmvt+rkVvKPPbmglXwb7n7g8e5CJVZbvtOTdeh4fH67gf+E8eO6C5UYzgErPHvuj7bH+fDj81vJT16y3ff01//0Xve0raLQpm2AtV7wjJhrHRnGzu0yE1hLKYvov0hxs/1qfkl6JXAU8OZelAoYQ4qF7Xd2l+raxm3AsFor8n5WHu59jBS2V28pfzF9sQPBEGN7iyFu7++kuJ8gCIIgCAIAckbPT5DidMcBx9m+Kbvvz7J9FvBNkhv26Vmx/oftbZraHTOKRRA8nZG0gOQetjgpVuJDRWrdCtmdSe5nn1AqrveI7RPz+gsLK5ikHwPf7kj9FgRBEATB05TRLF7XSU6OdG7HugNL77ds2+aYyQoVBE9zHs1ZrNYj1dzYs9sGALZ/VEoBvDOp1kvx2YdDqQiCIAiC4OlCKBZBMPRcDqwp6bmSzlSqWHm1pM4UvEg6WNLeSsUNpwMnS5ojaaKkGZKmZ7ldJP1F0jWSjpF0ZF5/gvoKIyLpodL7fSTNzPv/Yue+gyAIgiAYWaSReY0WoVgEwRCS64psTXKL+iJwne31Sal1q4oTAmD7F6SUsjtly8ejpTZXym29GtiMVB28Wz+2IlVf34gUYzFNqVhiEARBEATBsBAxFkEwNEzMmcIgWSyOBf4AbAdg+7eSnidpmUG0/Spghu17ACSdBnQr3LdVfl2XlyeRFI3LykKSdgd2B1h1tdUG0bUgCIIgCHpl7ERYDA+hWATB0PCo7X7Zl0YoBeN8suUxV+heotg9cIjto5o2zsVyjgaYNm362ClqEwRBEATB045whQqC4eNyYCcASVsA99p+sEF+HqnafCd/AF6bLR7jSRXcC24DpuX325AqwUNKH7erpEl5/6tIev4gjyMIgiAIgkVFI/gaJcJiEQTDx8HAcZLmAo8AH+oifwLwI0mPApsUK23/W9LBwFXA/cCc0jbHAL+WdD1wPvBw3uZCSS8DrsqWk4eA9wM9FbgJgiAIgiBoSygWQTAE2B5QdtT2/4B3VKw/gaREYPvg0vozSFWyC7YofXY8cDz01cHI6+8CNi5ts29pm8OBw9seSxAEQRAEw8NYqmMxHIQrVBAEQRAEQRAEi0xYLILgaUbZ4hEEQ8nkJcd3F+onP3mYepKYMH5cz7LzFzzVqu3Vll+ybXeGjTbHGTx9eM7iwzt3u9YLBhjKR63t5d7QrlTSfZcc1Eo+ePoQFosgGINI+rikNUa7H0EQBEEQDA0iCuQFQTBMSFqQq2zfKOl0SUvm9R8H/gd8QdJSXdq4TdLyI9HfIAiCIAiCJkKxCILR49FcZXs94AlgTwDb37d9qu2dbT88ul0MgiAIgmCoeIZnmw3FIgjGCJcDa0paStJxkq6RdJ2kbQEkjZN0WLZuzJW0V2nbvSRdK+kGSetk+cp2giAIgiAIhosI3g6CUUbS4sDWpDoUBwC/tb2rpGWBayRdDHwQWB2YYnu+pOeWmrjX9lRJHwP2Bj5c105YQIIgCIJgFHlmZ5sNi0UQjCITJc0BZgH/AI4FtgL2y+tnABOA1YAtgaNsz4eFNTIKfpn/zyYpHzS00w9Ju0uaJWnWPffeM6QHFwRBEATBs4uwWATB6PGo7SnlFUplsrez/eeO9U3tPJ7/L6Dvnq5spxPbRwNHA0ybNt29dz0IgiAIgrZEgbwgCEaSC0gxEwKQ9Mq8/iJgj+w2RYcrVJt2giAIgiAIhoVQLIJgbPFlYDwwV9JNeRngxyR3qbmSrgfeN8h2giAIgiAYJZ7pdSzCFSoIRgnbA0qb2n4U2KNi/Xzgs/lVXr966f0sYIumdoIgCIIgCIaLUCyCIAiCIAiCYAR4ZkdYhGIRBMEIMH/BU63kFx8XXppPB8bS99q27cefXNBK/jnjx7WSD4Lxiw/vc8zuPd9GlwQgi8x9lxzUSn65bY/ove1f79VdKBgzhGIRBEEQBEEQBCPBM9xkEdOCQTDESFogaU6ukv2bXKCuSf5gSXvn91+StGUX+W0k7TeUfQ6CIAiCIFhUwmIRBEPPwvoUkn4CfBz4ai8b2j6wB5mzgLMWqYdBEARBEIwoIupYBEGwaFwFrAIgaQ1J50uaLelySet0Cks6QdL2+f1bJP0py39P0tl5/c6SjszvV5f0W0lzJV0iabVSO9+TdKWkW4o2gyAIgiAIhotQLIJgmJA0DngDfdaFo4G9bE8D9gZ+0LDtBOAoYOssv0KN6BHAT2yvD5wMfK/02UrAZsDbgEMX4VCCIAiCIAi6Eq5QQTD0TJQ0h2Sp+CNwkaRJwKbA6aXsHM9paGMd4Bbbt+blU4HdK+Q2Ad6V358EfKP02Zm2nwJulrRi1U4k7V60u+pqq3U7riAIgiAIBssoF68bCcJiEQRDTxFj8SKSS+XHSffa/banlF4vG+Z+PF56X/kos3207em2p6+wfJ1RJAiCIAiCoDuhWATBMGH7EeCTwP8BjwC3Sno3gBIbNGz+Z+AlklbPyzvUyF0JvDe/3wm4fBG7HQRBEATBMKEReo0WoVgEwTBi+zpgLrAjaeC/m6TrgZuAbRu2exT4GHC+pNnAPOCBCtG9gF0kzQU+AHxqaI8gCIIgCIKgNyLGIgiGGNuTOpbfXlp8c4X8waX3O5c++p3tdZSCMr4PzMoyJwAn5Pe3A6+vaHPnjuVJnTJBEARBEIwwz/AYi1AsgmDs8hFJHwKWAK4jZYl6WrL4uKe3cfS/Dz3Rs+zzJi0xjD0ZWzzw6PxW8sN5btp8RwB/+c+8VvKbrPm8VvLPJsbS/dH2OhjO/tz/yJOt5Nv2RS2igMfSeQG479d79Sw71voeNBOKRRCMUWx/B/jOaPcjCIIgCIKhQFEgLwiCIAiCIAiCoBuhWARBSyQdIOmmXO16jqRXDWHbn1uEbRdW5A6CIAiCYOwhjcxrtAjFIghaIGkTUiXrqbna9ZbAHUO4i0ErFkEQBEEQBKNJKBZB0I6VgHttPw5g+17bd0q6TdI3JN0g6RpJawJIWkHSGZJm5ter8/pJko7P8nMlbSfpUHLVbkknZ7kzJc3OFpKFlbclvVnStZKul3RJZyfr9hsEQRAEwegwUjUsRjOKI4K3g6AdFwIHSvoLcDFwmu1L82cP2H6FpA8C3yVZNg4HvmP795JWAy4AXgZ8oZAHkLSc7TMkfSJX7S7Y1fb/JE0EZko6gzQhcAywue1bJT23op91++1HVlZ2B1h1tdUW7cwEQRAEQfCsJhSLIGiB7YckTQNeA7wOOE3SfvnjU0v/i2xOWwLrltICLiNpUl5fVMzG9n01u/ykpHfm96sCawErAJfZvjVv+7+K7Sr3a/uhjuM5GjgaYNq06W469iAIgiAIFpFndlKoUCyCoC22FwAzgBmSbgA+VHxUFsv/FwM2tv1YuY1e8o9L2oKkIGxi+xFJM4AJPXazcr9BEARBEATDRcRYBEELJK0taa3SqinA7fn9DqX/V+X3FwILKwFJKtycLgI+Xlq/XH77pKTx+f1k4L6sVKwDbJzXXw1sLunFedsqV6i6/QZBEARBMEpohP5Gi1AsgqAdk4CfSLpZ0lxgXeDg/Nlyed2ngM/kdZ8EpucA7ZuBPfP6r2T5GyVdT3KrguSWNDcHb58PLC7pj8ChJIUC2/eQ4iJ+mbc9raKfdfsNgiAIgiAYFsIVKghaYHs2sGnn+uza9E3b+3bI30ufJaO8/iH6XKjK6/cFym1sXdOP84DzOtadAJzQtN8gCIIgCILhIhSLIAiGnceeXNBKfsL4ccPUk8Gx1HPGVn/GCs+btMSwtt/mumnbl5essFTb7gQ1DPd10IboSzVP52dY2/P4iv3P6y6UueNfD7TtziIzmsXrRoJQLIJgCLC9+mj3IQiCIAiCYDSJGIvgWY+kA3IBurm5ON2rRqEPO0taubT8Y0nr5vef65C9cqT7FwRBEATBovNML5AXikXwrEbSJqRCdlNtr09K73rHMO1LkuruuZ2BhYqF7Q/bvjkv9lMsbA+I8QiCIAiCIBhtQrEInu2sBNxr+3FIQc+275R0m6TlASRNzzUkkHSwpJMkXSXpr5I+UjQkaR9JM7Pl44t53eqS/izpROBGYFVJJ+RsUDdI+oyk7YHpwMnZYjJR0oy830OBiXn9ybnNh/J/Sfpmqa0d8vot8va/kPQnSSerl8IZQRAEQRAMH0oxFiPxGi0ixiJ4tnMhcKCkvwAXA6fZvrTLNuuTakosBVwn6RxgPVJV7I1IVsizJG0O/COv/5Dtq3PV7lVsrwcgaVnb90v6BLC37Vl5PQC295P0CdtVdSjeRaqjsQGwPDBT0mX5s1cCLwfuBK4AXg38vuW5CYIgCIIg6JmwWATPanLa12mkuhD3AKdJ2rnLZr+2/WhO6fo7kjKxVX5dB1wLrENSKABut311fn8L8BJJR0h6M/DgInR/M+BU2wts3wVcCmyYP7vG9j9tPwXMAVavakDS7pJmSZp1z733LEJXgiAIgiDozjM7yiIsFsGzHtsLgBnADEk3kOpLzKdP8Z7QuUnFsoBDbB9V/kDS6sDDpX3dJ2kD4E2konXvAXYdiuPo4PHS+wXU3Ou2jyYV5WPatOmdxxUEQRAEQdAzYbEIntVIWlvSWqVVU4DbgdtIlgyA7To221bSBEnPA7YAZgIXALtKmpTbXUXS8yv2tzywmO0zgM8DU/NH84Cla7r5pKTxFesvB3aQNE7SCsDmwDVNxxsEQRAEweggIsYiCJ7pTAKOkLQsyUrxN5Jb1MuAYyV9mWTNKDOX5AK1PPBl23cCd0p6GXBVjo94CHg/yVpQZhXg+FJ2qP3z/xOAH0l6FNikY5ujgbmSrrW9U2n9r7Ls9SSryf+z/R9J67Q7BUEQBEEQBItOKBbBsxrbs4Gq9K2XAy+t2Wyu7Q9WtHU4cHiF/Holmevps1KUtz0DOKO0aovSZ/sC+5aWJ+X/BvbJr3JbMygpQ7Y/UXMcQRAEQRCMIM/0FI3hChUEQRAEQRAEwSITFosgaIHtg0e7D8PFtdfOvnfieN1e8dHywL0tmmojP5xtD7f8WOpLW/mx1Je28mOpL23lx1Jf2sqPpb60lY++DI38WOpLW/k62Re12N+Q8EyvKhWKRRAEANheoWq9pFm2p/faThv54Wx7uOXHUl/ayo+lvrSVH0t9aSs/lvrSVn4s9aWtfPRlaOTHUl/ayrdtOxg8oVgEQRAEQRAEwQigZ3iURcRYBEEQBEEQBEGwyIRiEQRBN44eRvnhbHu45cdSX9rKj6W+tJUfS31pKz+W+tJWfiz1pa189GVo5MdSX9rKt207GCRKGSuDIAiCIAiCIBguNnjlNF9w6dUjsq+VJi8xezTiSsJiEQRBEARBEATBIhPB20EQBEEQBEEwAjyzQ7fDYhEEQdAKSUuOdh/GIpI2k7RLfr+CpBd3kX+RpC3z+4mSlh6Jfi4qSrxf0oF5eTVJG41if56W5/HpTtvrPQieLYRiEQRBPyStKOlYSefl5XUl7dZlm9aDm14G6JKWlPQFScfk5bUkva1B/lOSlsmDv2MlXStpq2776QVJm0q6GfhTXt5A0g+Gou3c3hqSnpPfbyHpk5KW7bJN28H8RElrt+hTT/KSDgL2BfbPq8YDP22Q/wjwC+CovOqFwJkN8m2vg+Ec/P8A2ATYMS/PA75f048Dq15Njbc51rbnsWL75SSt36t8D+29VNIlkm7My+tL+vxQtT+I/vR8f0h6d/HckvR5Sb+UNLVGtu31Pph7u+292niskh7s8pon6S+DaXtR+96WXn47SrKbSnqfpA8Wr+HqV2/9GbnXaBGKRRAEnZwAXACsnJf/Any6TriXwY2k8aX3bQboxwOPkwZyAP8CvtLQ911tPwhsBSwHfAA4tEG+jTLyHeBNwH8BbF8PbN6l7RUkfU7S0ZKOK1414mcACyStScpgsipwSkPbbQc3bwfmAOfn5SmSzhoi+XcC2wAPA9i+E2hSLj8OvBp4MMv/FXh+g3zb66DnwT+0VqZfZfvjwGO57/cBS9TIPlx6LQC2BlZv6De0O9a25xFJM/L1/lzgWuAYSd+ukW07yXAM6Xp8MvdnLvDehr60UkTayLe9P4Av2J4naTNgS+BY4Ic1sm2v97b3dtt7tZdj/bvtZRpeSxfHM4i2F6Xv75L0V0kPlBScB2tkW03uSDoJOAzYDNgwv6JI3jATikUQBJ0sb/vnwFMAtueTBkV19DK42T3/YEO7Afoatr9B30DlEZpdVIvP3gKcZPumLvLQQhmxfUfHqqbzAvBrYDJwMXBO6VXFU/lcvxM4wvY+wEoNbbcd3BwMbATcn+XnAE0zj23kn3BKMWgASUs1tAvwuO0nigVJixfb1tD2Omgz+Id2yvSTksbRd6wrkO+VTmx/q/T6KrAF8JKGfkC7Y217HgEm5+v9XcCJtl9FGkhXcQItJhmAJW1f07FufoN8K0WkpXzb+6O4l98KHG37HOqvmbbXe9t7+2Da3au9HOt2XfpYJzPcz5lvANvYnlwoOLaXqZFtO7kzHXi17Y/Z3iu/PtkgPyJohP5Gi1AsgiDo5GFJz6PvR3Nj4IEG+V4GNz8C3lMstBigPyFpYqkva5Bmc+uYLelCkmJxQXZtqBz0lehVGblD0qaAJY2XtDfwxy5tL2l7X9s/t31G8aqRfVLSjsCHgLPzuvE1stB+cPOk7c7vsWkQ2kb+55KOApbNFqyLSYPAOi6V9DlgoqQ3AqcDv2mQb3sd9Dz4z7RRpr8H/Ap4vqSvAr8HvtbQdpklSRa9Jtoca9vzCLC4pJVI9+PZXWTbTjLcm/tb9H174N8N8m0VkTbybe+Pf+VreAfgXCXXpboxUtvrve293fZe7Xqstm8pPpO0WH7/UknbFBblQqZt24vY97tsd3uO9jXUbnLnRuAFvbYdDA2RFSoIgk4+C5wFrCHpCmAFYPsG+c7BzcfoGNzYXgAUM0X9BujAp6gfoB9EMqmvKulkkmVk5ypBSQIOzP29xfYjWUHapcvxFsrIi4H9G5SRPYHDgVVI7ikXkqw1TZwt6S22z+0iR+7nnsBXbd+q5Md8UoN85+BmV5oHNzdJeh8wTtJapO/jyqGQt31Y/u4fBNYGDrR9UUPb+wG7ATcAewDnAj9ukO/5Osh0Dv63B5p8/XtWpm2fLGk28AaSAvqOuoGRpBvoG1SNI12bX2roB7Q71rbnkbz/C4ArbM+U9BLgrzWybScZPk5y9VlH0r+AW4H3N8i3VUTayLe9P94DvBk4zPb9Wfnap0pwENd723u77b3a5lgvA14jaTnSM2wmSZnaaQjaHkzfZ0k6jeQ+u1CBtv3LCtk2vx0AywM3S7qmo+1tGrYZfp7haaGiQF4QBAPIVoe1SY/AP9t+skF2MdLgZqssfwHwY9c8XCQtTxqgb5nlLwQ+aft/Fe1uD1wCbJxlr7Z9b0NfbrD9il6Ps7SfKSRl5P48kFolu1ksEpLmAUsBT5DdNwDXmfrzTPVqtv/cY/tvpHTemwY3SgGPB9D/e/qy7cd6kCfLf6VKPg+U/l18lo9jRdu39XIcTQzmOsjbrUPf4P+SpllRpSDdI4D1SLOcKwDbl68BpZiEWjqv37zNi0qL80mzs00z8sV2z6PFsQ4XvZyXmu2WAhazPa+L3EtIisimwH1kRaTuuhmEfM/3R5bfDFjL9vHZyjXJ9q1N23QjW85OtF03cK/aptW9mrfp6VglXWt7qqS9gIm2vyFpju0pi9r2YPou6fiK1ba9a4Vs1W/Hp2z/t6bt11att31pXf+HmylTp/miy/4wIvt6/tLjR6VAXigWQRD0QzVZM2yfOETtv9r2Fd3W5fWz2jwYJf0EONL2zBbbXGL7DXXrJP2//ON7BBUm/aHy2VUKejwMWML2iyVNAb5UN7uWB2+P2V6glIFlbeC8JiVwuJA0C9i0cImTtARpRnzDGvlbqT6XlfEHvV4Hkpax/WCdElAz+B9HmlU9ggZlutTn8nxjseyGvpcHq8sDS3cbrEpaBXgRJa8C25dVyJUtIgUPALNISuCAAZekl5KCkle0vZ5SVqhtbH+lQ66n89KxzbLAB0kB6uW+N94jvSoig5Xvsc2DSD75a9t+qaSVgdNtv7ok83vbm+UJg/J5L66BugmD3wOvL7uMjhaSriNZlb8D7Gb7psFMyDxdkLQiKWgb4Brbd49mf6ZMneaLR0ixWGGUFItwhQqCoJPyYHACadb3WqCfYiHp57bfUzO4wXZdGssjgM40jlXrAC5WimU4jVLGkqoBYuZVwPsl3Zblix/8AX2RNIHk8758dgsoBozLkNydCoqZ7lk1+2xE0jb0BRjOsF3n134wKehxBqnTc/IMbR1ll4bzc/8GuDRI+q7tT0v6DdXfU53ichHwbtv35+XlgJ/ZflOF+OLlQZPtJ7JyUUf5x24C8G6gySLQ63VwCvA2YDalQX/p/4DzmRWzHW1/B7iprgO2W9cpKA9WSdmeliBl1Hl1wzZfJ32PN9HnkmfS993JeSQf8yLD0HtJ1/R/SIHXb6/Y5hiSi89RkAKgJZ1CR+apXs9LB+cCV5Ncs2pjWiS93/ZPJX22Y32x728vinz+rDz4X4IU0/Bw3eCfFKT8StKzDtt3qiNttu3N8v+2tUJuAa5Qyo5Uvn47j7PVvTpIRefTpAD4X2Wl4iXA7+o63vY8ZsV1bwYql6+vkX8h6flf3BOXk6wQ/yzJVE7qlNquVFwlvQf4JumZKuAISfvY/kVdW8GiE4pFEAT9sL1XeTnPQv6sQvRT+X9tPYGOdjYhuTCs0DFAWIbkf17FDvl/OZahcoCYqRr01rEH6Ud2ZdJAtFAsHgSOXLgz+zf5/0+Kddk9Z5JTdp1aJB1KUtROzqs+la0z+1eIP2n7AfVPQN4UcCynOJLdgB8WLg0VcoUv92FNfa1g+UKpgJRZSVJdKtN7JG1j+ywASdsCte47FTPp31WKW6ir8dDTdWD7bfl/WyXgCklHMlBxubZ4r5qaBlWyJboOVit4B2nWvCk4vWBL2+V+3VBydamLbVjS9jUd11mde1bX89LBBNufrfmsTBEA3OsAva18v8G/0sFuS3Ivq+MJ25bUNUi5xiI2r8Ga8/f8WozmY2h1rw5G0cluQJdml6UiYLvWojSI83g6KVnHj+meNQ+Swn0KaXIBUkzO8cAbSzKDmtQhuWRtWFgpsnvbxaT06KPGaNaYGAlCsQiCoBsPU5Eu0Pa/8//bIbmh0PxMWQKYlGXKP4QPUhMc3naAaPv2Kj/pGtnD86Dpc7a/3K3tPKu7J+nHciawjKTDbX+zYbO3AFNsP5Xb+AlwHX054cu0DXpUVtZ2IsW4QIWCZnt2/t/Wr/gpSavZ/kfe2YuonzXcEzg5n08Bd5BcYuo6Xh4ML0aa1a+9dgZpLejJnShT+JeXA6sNlGdZv9Wwu07Zgp4HqyVuIc0K96JYjJO0kXOmJEkb0ncN1CkLbQKgezkvZU5SCvA9m/7Bsv0sS7YLa8kXa9rpR1v5iu0NnJktSPvViLUJUr6WVIviPtL1vizwH0l3AR8p7rnS/r8IKf7AKX1wXT+L7WYBj5aeG+OA5zQdY5ZZkf7X+z8q5DYh1eiYBKwmaQNgD9sfa2o/t9fLeZxvu67+RxUr2C7HWZwgqV9K4/KkTksW63B9+i+RDXXYCcUiCIJ+dJjhFwPWBX7eIL8H8EVSzYBiu6rZ5GKm7IRCGemhL63iPSpcT4piTpWuJ9nd411AV8UCWNfJf38nkgvKfiRLR5NiAWnQUQysJjfI7UWaYXucNIN3Ac1F4Nq6NLyNdJzFYLvRLxz4HPB7SZdm2dcAu1cJ2v47sLGkSXn5oYZ+Q/9B+nzgNkrpiEt9fr3t3+bvqGq/VZljyu5EN9M3a1rnToTt13Xpb08yFXQOVneje9amR4A5ki6h/+C8alb5w8BxxXknFQL83Vgu9gAAIABJREFUcFZgDqlpv+fMTYM45idI98MBNDwLCiR9r2L1A8As27+ukJ9AOocvJ7nQFf2sCvQtXzOF8lob/Ox2mZ4uAn5h+4K8r61INSCOJxVnfFVHX9oO5i8hBSgX99FEUqDyplXCSoHYBwF30d99rsod9bsky+5Z+bivl1RbC6LX81iy4vxG0sdIWdlqlcsS/83WtVPz8o7kOhUV+2jjnglwvqQLSm3vQHLXG0VGt8bESBCKRRAEnZTN8POB28v+rhXsDazn7ll6vmv708CRxQxumU7/4UxP8R4lBuN6comk7YBf5hm5OsYrpTh8BylA/Mmq4+jgEOA6Sb8jDc43p2KmL882npMHcgd0aRPoU9Ty9osB99b5Gme+SyqKdkOX4yzam0yKeyncHj5d9x0r5fzfjuxXXfJ9r0yt2mLA+lrgt1THChioVCzo0Z1INb77pX5W+e4vSUrJvJrt3bN1aW1XxM50DFZfCnze9sVNfSIN+GorFXe0PxN4haTJebmcCrZyMiC7vmypHgKgJVW6ptV9r8D/AWt2exaUmACsQ3KfgXQN3QpsIOl1+XlR5iRS1eU3kawoO1GfbrR8zRTK67ZNncmKRGPmqMzGtj9S2u5CSYfZ3iPfC520GsyTXMoWKue2Hypcl2r4FOkarByQd2L7jg5XuCaXpV7PYzmuCfqn6m1yX92VFGPxnSx3JfUpwldo4Z6J7X3ys72YWDra9q/q5IOhIRSLIAj6MQiXmb+TZlm70drX373HexQMxvVkD9JAcb6kx6ifyT+K9KN6PXBZdg1qjLGwfaqkGfQpSPva/k+F3AJJT0ma7IHFpSoZhGvWHcCN3ZSK3J+nlLJh/ZzuRdQgVRh/gDS46OrCUzeQL+3/2/n/Qfl/t1oknfTqTtTad580Kz2bvtnjf5EGxgvPk/oHvJZHcHvma+zvwAG2L+lsvI3bh1LGm0OA59neVtJ6pKrjxzZsc2DHcrHfKmXh4dL7CaR4qqa6AX+jt2dBwfqkysgLcl9+SAre3YwUAN7JmrbfLWlb2z/J98DlVQ33es1oYODzwo+ot+j9W9K+9D2LdgDuyhMEdVXY2wzmH5Y01TmWRdI04NEG+Ttori/ST1YtakH0eh4H466Yt7udVNm7Fxa0cM8s2j8DqCtKGgwDoVgEQdCPQfzQ7g9cKekPNLhuePC+/mUq4z1KtC3m1HPgo+3vkQqvFdwuqXHmXdKrgTm2z8rm/v+XB/9VrmAPkYJvL6J/oGydFaKta9b/I1UUvpT+39OAWflMm4xcL7T95pp2qphOUraKmfm3A9dQX6gNSW9loAvMlzpkiuwxPbkTeXC++2vY3kGpkjJOAfT9RoxN11QefK5HCuhfr7R+MFnWTgB+QkofCmk2/1SS200dPSsLtvvFlUg6jOSi19T2nGyh6+bGBbAcyT2oGBQvBTw3K9pVSmERHH1/VqL+A1TOWNe4WS2k6FOv938H7yO5Hp1J+q6uyOvGUeHSR/vCbp8GTpd0J+m5+wL6EhgspKSg3wLMkHQO3e/tVoU+ez2PJfmPAyd3uCvtaPsHHXKDSeN9AD24Z2qQaYFHAhHB20EQPPv4LimY8yTSc3AnYCXbdRl7jiK5qzSmmCxQixoGqo73OL1TrtRG24q4xX6WA9ai/6D1sg6ZFYGvASvb3lrSukDhO13HD0luHRuQrCLHkty4qgo3/ZJ6154q2rpmfZWkvEwgBdJ3o01GrislvcJ21SxzFS8EphZuOJIOJrmCVfr6S/oRKY3q60gxCtuTFJFOiuwxs+nRnSi33zXlZYknlAoAFlaxNegt0BpI1ing+jygKtNzljVJX7b9BZJryM+U4pywPV9SYyaeQSgLZZYkfXd1nJlfvfINkiIygz5Xwa9lS2OVy9jR+V79Aun7nUR9JrEJpOfFaXn53aSYm6vKQmpZ+DArhoe7vuDd3yrWtRrMO1VEX4f0DIP6+iGFUvSP/FqCLvd2dlPruVgfPZ7HEh+x/f3S/u7Lkzw/6JBrncbb9vlKiR8a3TM9+LTAwRAQBfKCIOiHpOttb9BtXemz62y/skX7zystLqxhUKW4qH/l1F7iPVoj6cOkQd0LgTmkH62rbL8+Wxkutv0fSeeR3GAOsL2BUnXy69xQWEp9qT8PBP5l+9hiXY38RGBV23/pod+fBPYluWa9FVgN+Knt19TI32h7varPFhVJNwNrkvzjH4f6+iFZ/s/A+s4xENkvfa7ttWvk59pev/R/EqkYYOWx5m2WIPnvmzQwqy1Olq1Ep9Dnrvd+YCfbb6yQfSPwedJg60KSMrKz7Rl17fdKHrRe7IYYlKxE/MP2eXlAvj3w83y9bgx83XZlxeGa9pYDZtpes+KzsvVkHKny9pdsH1kh27XvNftfiVS/hdyPO9ts39Du1cBmzpXOsxJ+ue2NO+SqCh8WuGbCY9gL3mWLzLr0n+xoLFKqlJnPbo6bOZ7qiZ0BAfBZvqfzWJK/gXRvF4r3ONK9/fKmvvdKL5NAJdmTbH+g27qR5JVTp/u3vx+ZAnnPXWrxKJAXBMGY4OHsXvMz0g/QjvR3n+jkPEm7A7+hhywgblfD4C229y2vkPT1inWLYvr+FMkt52rbr8szhV/Ln10CfJvk5rC87Z9L2j8fR9fZYWBeln8/sLlSUPT4KkGlQnqHkdw9Xp5n5g52TQG7QbhmnStpK9sXdulz0Z+eg5SBrXtps8SJwDWSikDKd5Dceuoo/MsfUaqI/F9gpYa+v4VkSfs76Rp4saQ9bJ9Xs0nXlJe53cVI7jvvIimgIlk2eg1WbsS9xdr81HZxP36WNHP/UklXAstTk7q5dAyVykKNeNl6Mh+4qxhgDrLvVWxIcmmBZPGsVSzUkSSgtO+q/i9HqpFTPIcm5XWd/R5MbECvBe8G4+5TZLfbgqRYnEu6v35PTdIKSdNJkx5L5+UHgF3dkfY2U75/J5ASXjQpcz2dxxLnA6cpuaRCimE7v05Y1cUAi+rxR9l+rCRbOQlEffrjfspMngya1tD3YAgIxSIIgk7eRzLbH05//+E6dsz/y7UZmlJMtqlh8EbSrHyZrTvXLaLp+zHbj0lC0nNs/0nS2rm9f0v6aJZ7OFtbipm4jekeMLkD6dztlq0eq9ERAyHpeVnZOpg0c/urvO9r1Vx5uzLugPpB4keBvZV815+ku9LVNUi5wC3qh2T5r2YLUDGg3MX2dXXywNlKgfvfJGX8Ms1pW78NvM7232Chu9I5pFiUKnpKeen+Qe3nNOx/UWiMtSkpFcU1sjnJZUbUu8yU6VlZIN2X/7T9uKQtgO0knehSZp42fe9EAwtIflLSJrY/V9N+myQBhzIwI9vBFX1YJ9/zlVZEVxcD7LXgXWt3n8z2wAYki+guSm6YP22QPw74mO3LAfK9eDwV6WadgpkXIulUktJSR0/nscS+JGWieG5eRPO9egtJuS2nhJ1HyqJ2DFC2LjRNApWPaX9SuuyJkooEGyKlQz66oS8jwjM9xiJcoYIgGFHyD1RBkb7wMNt/Lsl8lBSQugb9fZaXBq6s828ejOk7z5rvQgqYfD2p6NV422/pkJtK8sNfD7iR9GO4ve25jQfcQFY0DrG9k6RrbG8k6XeFO0nh+lOzbWXcge3dquQH0bdZtqer5OqmGpc4leqH2H5ptiqcbvvVHXLLOAWcV/q111m5Otp4DikdZ61SJ2mm7Q1LyyKdmw1r5F9E+m43oS/l5V6276iQPZRUVbyXoPbWSPpQ1XrXZItSCgpenf4z+LUuM3XnvrTt/0qyc0jf6+qkmfNfAy/vvDe69N11/ZE0l/4FJMeRBtN113wrdz5JL6CvpsQfXJGRTdLR2SJXVQPGtutmw3tG0rttn95tXemz4lkwm3R/zwP+aHudGvkB7qhqcLnskFubFN80wBWuJNP1PHbITyRZOv/cJJdlZ3bel8U6STeVXahK6+eQsp893inT0c4htquKkY4ar5w63b+7YmRcoZZbMlyhgiAYRRbBbD+eNDtV5GWfQTJhV86cujcf7FNIs8uH0L/uw7wuA7jWpm/b78xvD86Di8lUmO7z7PBraTE7nK0aRwAvIwVVjgMesl0UynsVqcI2pJnenUh1IF5KKpjXVHl7U/fFHXxR0reomJEf5IwstAtS7rV+yCmkGfMi5/3CbtJs5RpHiiNZnfy7Jakpo9UsSeeSajmYFMczU7nYlwcW1nthp8uZUkavAYoF7YLaW1OnQFQh6SSS8j2H/oUAm3zxq6pGFxWaO4/jqezy9y7gCNtHSKq1LHX2XdKqwHu7HEavBSShhyQBFdd78R2uLGnlzuvd9u75f8+xIfk5UfWMrFNC9mdg0omqdQWzsoXuGNK98hD1wdKQCo8eRZr1N+kanVGcg/Ixq89dtLjn/sNAq3Dr8yhpou1HlVw6v0l63r1Y0hRSXE5dStlJ6p9CdjX6rJ2dMSz/zOflTOAiSfcBAzLsFX0nZdYa8NxreOaNCFEgLwiCZwuDNdv/kBQ3UGT9+EBe9+EqYfVQwyDPRj8g6XDgf+7LHrSMpFfZ7jflU2H6Lp7cbU3fa9uulK9SoCTVKlCZI0kDq9NJM78fJJn4i2MtDyyKytsTSK4hF9BcEbzXuIPPklIyfqviM1Pvn3wQScFaVdLJ5CDlGtme6ofYflv+39av/Tekar89ZR4jncO76Mu+dQ+pevHbobKw3hGkYoDd1g06X3+vKMWyHMLAwN0qxWU6Ke1wG9eDi0jV2s/N+9saeIftPSpkn1RKq/tB+gqlVcYIlfq/AkmR2xFYmezaV0NPBSRLbAbsrBRwXZckYLDXe5uA6b1L7yeQ4j4GuJPlc/sWYBX1T9u6TJV8aZ9F+uAfSTofWKaLZbSwIh7Usf6VdByze3cX7fk8Krlsfp6U3vsgkkvnjLy/OZKa7pn/I6WQXRgPBXwsP0P6Kao1k0BV7o2DvgaCRSdcoYIg6McgzPZts0idQkMNA5dqCuTZ0anFwEkpeHZWnYl/UU3fTe4Dkn5MGlQVP3YfABbYrlSg8jaFO9FCl6Yqt4W8vu15/wJp8PsG4PvkuAOnNKRDglJMSRGkfLXrK2/vTcrU8kbSYHFX4BTbnSlVC/lLbL+h27rSZ7UuYYuCpE1IMSSfJlX+LVgGeGeN29cHq9pqcj9q2affkwZn3yHdG7uQKmRXZU07Hfik7X+3aP8Gd2Qyq1qX169LSpV6lVOxxxcD77H99Q65pUkB7e8jKc6/BHaw3ZSatth2JfoKSF7T5Gaj5LI2AFfXhWmFagKmbTcGw5e2v8b2Rh3rNgCmkOKeyt/fPOB3tu9raG99Brq4tUlHXdduo3tUhSVigksB1FXrJH0M+K/t0yRdbXtj9XehbLx/ldwbCzevP3furyTXytW1l76PNK+cNt2XXlGVKXvomTxxXLhCBUEwJmhrtl8gaQ3bf4eFs1dN2ZLa1DBQeTbWKXi26bl1gFIQ7ottfzm7Yqxku9cneZONesOOgeZvJV3fpb1HlNKezpH0DVJ9kMVqZFudd9uFNeMMSWfTJe4A2vvjk/Luj8vym2f3owGDG/dYP0TSBFJcyPJKaSOL871M3lcd56ldRqsJwG4MLKjXmVJzCZLbxeL0D8J9kPrsSmV/8Akkxe5amt2P2jDR9iWSlAfMB6s+a9rywM2SrqF/RramSsZ3Svo8fcHAO1GTFcj2zUoVplfLy7cCX68QvZs0OfB50mDckt5ZIQdUDm6LFNKVbjal/tyu5Ba3Il3GL5LeDZxve14+3qnAl12fJKDngGn1j1NZjORuOcCNy/b1pJolp+T+9hp3cBwp8Pom+ix0VZa2Qn4ySRktrKmXktyPqp4HPyCdi7mk+299kpX6Mapn869koOWu3zrbP8jKBcBNkt4HjMvWt09S4dIp6fW2f6vsnlhijbrnDANdXcfR7Orate/B0BOKRRAEwODN9sA+wO8k3UL6oXoRaca6jhXp7zv7RF5XxS1K9Rp+mJc/RsoiUu73ZqQZ1QWkmfunSD+OXyb5Jn+f/oPBJt7e8FlbBQqSVWMc8AngMyTf9u06+j/Y8z5AUcg/yHWBsq388dsObrIi0a0Y4R4k68DKJN/xQrF4kOQ2VsfVwK+yxaqXjFYnkapQv4k0W7wTFZWOnarAXyrphGLWO+9jku0HO+XzNnuVl5V8vn/W0Pe2PJ778FdJnyBl46rLsHXwINrfkTQILVyULqMvs1s/JL2dlAK5m7/8/iSXvx8Ap0o6jWaqXFQKal1VJO2V+34X/a/JqtnwL9g+PT8ftiT5/f+IviDkTh7NExfzlepB3E26X6soYoREukdvJSmy5b6W0+6+md7OY8HGttet+ayK40gJJYqq3x8gZYXqHLRDUiI/4hynouT+dXCnZUYpYHsVknvpK+k/CbBkZ6Puq6xduHQ+ToqpugD4SkU/XksqrFr1zO33nFHLLE9t+z6SiObZq2cC4QoVBAEweLN9NmNDqUosgHPxswr5A0g/gOUaBqfZPqRC9vmkWg2vJ/3YXEKqtnp3SWZTUoGy3dVXkK5rJqPS9icBnygGAdnd4rgKV503kH6sywrULrarssn0zCKc90pFwfVB9n+khT++pJu7DW40sG5IP+oG/pL2qnOTqpG/FdgWuKGX/hffv/oK6nUr6nUKyeVnATCTNAA53PY3q+Q7th0P3Oia4n5tkbQhSQlalqQcTwa+YfvqoWi/ZV9mk+69GaX7qTYzU1a230tSVNYiKzDuoeBjj/35Gykb0IBUwBWyxTVwCOm6OUUNxTwl/YA0eH0vye//IWCO7V065BYDNrF9RZf9706KD/tFzXmsdD/Lnx0LfMv2zd2OM8vPsT2l27q8fkAWpZp1HyLFVE2nf9zdPOCETotCthLuSSqUeQNwrOvTGA8K9ejq2tH3mfSN5Sv7PpJMHUFXqGXCFSoIgtGkbLZ3DkjO7iqrNvkCk6wFhWmdvN211Jib3aKGQVYgGrPK2L5S0iN58clsHi9iMlage7Dv74E/KAWVr0KywPxfWSC38wApKPH5efWfG5SnchGyqj6vX3pfnPdfAQ9ny0th5n9OTRPQPnD3RuAFJHesXrhK0rpNgxvnQFBJX87tnkT6Ed+JhgJ2wH8kLd3hpvKVOhcYUkaaG1scaxFQf3+ekf0Pfd9bFes6pcHdiRQMuh9pVnqAYqH+Bb0WI/nk17kJtsb2zPz2IVJ8RS2qzjr2cJVCJ+m7tj+t6oJkde5TT9p+QP0T79feT7ZvIdUV+Fo+7zuS4hWaUpm2qTB9B91rxxT8SylT0huBr+cJkDo3xJ4DprNV40hSYHQtto9WioOC6vPYdC2fSLr//kMPleyBRyVtZvv3AEoZzR6tkZ2rFC9WdoWrOs6fAD+RtJ07al/U8BPSfXc5KT7lZSTrZCXqIYlHxeprypagbC3cwvaZi9j3keUZbrIIxSIIgk4uUkoZuDhpcHW3pCttf6Ys1NbcrP41DG7Lr+Kz57oijaxS2tUfAivaXk8poHEb2/1M67bn5LffI1lCni/pqyS/6c83HaztoyTdBPyOVJ/glS4FkCpVe/0aqSDWi4HdbZ9V2Vgfb+vyeRUXklw2HsrLE/O6TWvke1IUSgPJpWnnj99mcLNNh1Xoh0rxJ1VxAVDtpvJD6t1UbiFl4Tqvo+916WaPzkrx50lJAiYBTUHt47Pl4R3AkbafVM5wVcFhpffzgdtt/7NGtmeUqjjXUvM9NWYd6+Ck/P+wms+r6Mlfvqa/N5JcYg6ok1HLCtP0XQfn0P06eA/ZBcn2/UpB4vtU9KHW317S1Bpl9xJJ2wG/bFJ23RcH1fY8HktyZ+o1C9pHSQPpyaT79H/UZ3DbJct/Ki9fRp+r6UIkvd/2T4HVq5SAinO+bmGByRaXbtPygylmepDthVnG8vd6ECn9bBUvVHJrm0dK3TsV2M89xmoFgyMUiyAIOpmcFYAPAyfaPkipkFUnbyL9eL2QVOm4YB7JpaCTwdQwOIY0GDgKwPbc7LZS5bOL7ZOz28EbcrvvsD3At76MpA+QBp0fJPlqnytpl2xJgDTr9nLb92RXj5Ppy2hViSuy1EhanpQ5pW4gMsF2oVRg+yFJVQpaW0XhLFIMy+Ud619Ds1LSZnDzcJ7t/1nu246UisdVULhuvRU42vY5kiq/08yt+bVEftWSXVUezFa2y+itvsRRJEX3euAyJXe4yhgL4C22++X9l/T1znWDYBPSjPypwB/ocV7T9t8kjcuWruOVMqkNcBexPTu/nWL78PJnkj5FCvjtpOwvfyrdUyC3pW2F6X/kV9frgBTYPgtAqTYCpLibTop4jwkk5ex6+gc1b1KxzR6klKbzJT1G95iftufxnh4mLxaSJ1Y2yINoXBMflD97jJRx7Dt1MpkiZXRdfE8nC9NuO9U+aRR2KftfC6osTk3j2F1tHy7pTcDzSM+zk0gTNqPGM72ORcRYBEHQj+zGsxXJtH2A7ZlqrgA9bOZm9VVaLcdMVPoOl7ZZjhR0Wc58VFsQSdKZJCvE3Xl5I9Jgd0pe7peCtnO5ps2NgUNJM4dfJv2YLU/6Yfyg7QEF+CRdQar2fG1enkaaPd+kQ+4jNCgKto/tkD8b2N8dRcUkvQL4mu3KgHVJV3Xuu+F4VwcOJ9W6MHAFKRbmthr5s0lByW8kzSI+Sko1WhsL0wblNL+L2MbirvARr/r+m+6PFvsbRzofO5IGtecAp9q+qWGby0gWnx+T3L3+TYo3aoopqup/bezBcKKWFaZL202CpHw3yBTuiCIpDS8muS/WVWn+JWlGvDGoeSRQivdYllS/pTxpUJcV6lOk+K/amXlJP7f9HtW4aQ7B9buAvskEkSyuj9BF6erVKp1ljwPuJyXkgFSk8rm2d65pu4ixOpwU3/Kr0brWC6ZOm+7LrpzZXXAIWHrCYhFjEQTBmOBLpBm132el4iXkGhNV2D5D0lsZmNrzS1XyalfD4F6lis9FzMT2NMyyK/n670xyWyp+PBsLItl+R8fyNVm5KHih+mdr6rfs6mDpI0lWm8mkzCdb275a0jqkGcsBigXJMnK6pDtJP8YvoK/Kc5ltqVYU/kdy2Tq2Q37FTtnc7xuyQlDHddk61HVwkxWIbRva6qQnN5UCSdNJM74vor/CWDcYuliptsZplCwnne52hbtHlatH5tsl2Y+SspK9pMOCtzRJkVokssXhfOB8pXiAHUluP1+0XZcxq2vWsVL/dyTVmXhxh9vV0vRVvi5kB+OW1bm/OjeiMq0qTOfB/knAc/PyvSRFfYDy5YG1OqaSvr861i7fJ7ZvlPSyjjZaVbFfhPM4kXTPbVUWpyYjG73NzBeuTz25aXY88wbQ+dyzPa6XditoY5Xei2RdPo10Pi4iKRd1zJZ0IUmp3F+p3kovrmXBIhCKRRAE/XAqyHZ6afkWagYrAJJ+RIqpeB1p5nR7KvxrNbgaBh8npRNcR9K/SO4wOzV0/z3AGrafaJDp7NdxNR8VKXM7B7yzOwUrWLyYLZT0JeeMPnlQUrlBVuLWoZRdy9VVvdsqCss29HNil896GtxIOp7qWdDKtMO2H5F0N6mS8l9JsQq1yivJ/Wwfevc5LxSy8qCjyt2ucPfoxd/7FFJg9yH0rw49r1NhGSxZoXgrSalYnb6YoUpKLnePAt1cS64kKeXL0z/d6zwGBu8Oyi2rgx/TUC9A6UY4xPb99F5h+mjgs86Z2CRtQRqY1sUhLcT2tZLqYnigt6DmthWdW5/HbLn6r+29u8mWN8v/30JyX71JHQ8a9xVR/FiVKx/Q6crXy3NuKFgyT+aU11Vmk7L9MLCfpKXy+1ry8R8IrADckp85z6NLQoSRoIuX2NOeUCyCIOhH20EisGk2N8+1/UVJ3yINwDoZTA0D295S0lKk6sPzlKr/1nEjaSB9d4NMJ+eU3k8A3kmpYJhThpG2lAe/ndlZmvxP16YvQ85UVdelaKsozJL0EdvHlFcqxdDUDh7ckWazC2eX3g84h50oBVxOJx3v8aSK5j8luVJV0dbnvOkaKcsVs6Rd/b2dMtE8QK75oJQKeQIwSdIk2//otX9VSDoRWI8UxPxFp+DnOtmmwXelJScrIbdTHTPQyQvoc8t6Hz24ZVV1s0sfLelc4BV5+bYe2lzKpfTOtmfkZ8PAnfe3Qi1GUnJqr0l6CGq2vXv+/7oe+gqDOI+2FyhldWpDm5n5NzJQidi6c13nc68X97NB0rNVWim1+I9JcR+rKaXq3sN9Gb0WUlxfZcuVU5rirqmKg0UjYiyCIOjH/2/vvsNkq6r0j3/fCwiXKCoYUDKIiETJBkTRMYGYkGBkxJzjGAbEMeHMqAPqqCiCCURAQRAQkCAKwsVLUvmJoAOYQJQo+f39sU/drqpbVV3VXd2nuvr9PM99us+pc6rXra7uPvvstddSqXbSsOQisUvKD5IutL29pAsoDZn+Blxpu2OJSQ3Qw6BLPvgi2x27rVYpMz+gDDD67UTc/hwLKGlgk94F7fEcjXzj5lxjqu0VbC/X4ZyOFXLac7wlfQc4q8tAYTfbe7Xtfzjlrvc9TAwknkhZ/LqnmypgtZ23HiX1YF1a04/6SYPp+RpKWkwp13mJJ9bO9FrH83TKxdmZ9Jdz3qkx2C2UfgbNPVAGSveoznk+JUXqUZQB7DqUdQEdc/f7JekBJtK2lipu0JyjXr1+psyinETb4NUdigc0ndt3idrq+EZa1qcpA55ejQybz3uB28qAdjjmSMo6or6SzlVKMl/CRIWr/YBtbC/V5bv6eWq4j7I4/ziXxcvdnn8h/XfHHqiL/SCvo6QvUmZxj6U1la/b+30BpRfONVVq4UOBtZpnf5pT+Sipog2rAOfb3q/Lczennwm4kS7pZ1Ohkmr7Zcqs09+pZqU7vYclXUiZET/R/fVVGej9NRu23uaJ/unPZyeclZbPGouIGAFuW4hdXcj+tMcpP6zypD9N+aNvyl2lbs9/6GR/lKuUoMc43fsQAAAgAElEQVQDq7VdJK5K0zqODo4EPkX/KTOdbETvngeTmmK+cb8Vct5O6UK9Lx0GCh1i+Quwk6SnUe6IA5xs+6xJ4vk+Zb3GSQz+Wk72Gt5T3VFs3KXseNe5yauBTSgzG5N2Aad0Qd6RUkIYyoBtEWV9wcG2GxemzTM2H6E0dJvMfwA7AGe4NGB7GuUCd1psd+2x0OHYLaufkb0pg4tfVR9P9+RNyfoqUTtoWlaHGHsOKirbA/tK+gMTA3F3G2BS0hM/wsT3/TwmUhbbv/6ks1Bq7YmwO+V32KTdsTVAF/spvo4rUG7QNKdW9ep6/4CkRwP7VClF59g+qe2wqabyTTn9rB8uqbZLZqUpN2FeRpld63T8dW1pU/d3Oq6yPbCfpN/T3/srhiADi4iYTM+LRE/Uaj9OpdrPCo0/1p30+Uf5sZRFhg8GmqsW3Qa8tkesd9rueRe6QzyN7tGNsrd/ZulUgdnwz+oC4T6VspF/pSzGbTHVgUJ1YTBIl/C7+n0ttXQH7slew++qNC97sEqVq9dQLla62daDdbZeFnhc9Vo1Zm2OolxonEt1x7s53UPS2/tMe7vX9t8kLZC0wPZPJH12gNiGwvZvKAOhAyXtRfn/fYoOTf06nNuzRO0gaVnT9KxBDnYpIdxx5rRBgy2Y3kvSzba/R3kttwPOro5b3CPtsq/mlFN9HQdMQ0TSJ4FtKWuRAN4qaUfbS8p+TyOVr+/0swFjXpWyBmotyizzGdX2uyhrW77V4bTrqptSVuk78zZKl/puBnp/zZqssYiI+WQKF4lLpQV0WRvQMOkfZds/AH5Q/XHsWiWmg/MkfYLSu6E5ZaZrdRpX3aMH0SlFawgGqpAzhYHCoD5XpZOcTo/XUuX24eN7XJgsxfZ/StqNsr7mscC/2/5xj1N+pkm6gLd5TGNQUflrte9mSZ0WxEPvtS/N/lHlm58LfEtlEXrPhaQzQdJalDu7e1JSSN5BfzMKd0p6ELBY0iGUfPb22ZL9KP+nt1EuUpd8WXqUDh2U7T+oNEncyPYRKh3ul+qbMOBgoe8F0556d+x+u9hP6XVUKXSxP0tX2uu2zu05lP4kD1TnHwn8kg79hLql8lVfq5NrqteoOf3smi7HDuIblPftzyk3iz5IeV329ETD03avp5S1XouyXuY0elSF6vf9FcOVgUVELDGVi8RB0gIqk/5RlvRe24dQpvb3bn+8U+57pVGffIfmw+lRbrb6eqtTZmaa/4if2+uUXs83qOp1H7RCzkx7AqVs5a60ph+1vJZVStPJ1fF9s/3jKme6MRjt2H29sgPlQvhaJu8CDqVM6w+ZqG724mrfSpQ6+NOxB3AX5UJ+X0pJ4Y6llWeKpHMoufHfpaSJNRakPmiS1xH6KFE7SFpWh9iWupCzfW3T45s17tyr/0X8g1RXGmjBtAfojq0Bm1NO43X8BqWZ37Mo76196X1nHsrsbuP7vlqP4wZN5WtOPzM90s8GtL4nOnUfTvl7sHavNTC2b6J3VcAWA7y/ZtUoNciT9C+UwdoywOG2P9n2+PKUv+XbUH7P7OVJCi1kYBERS0zxIrGvtIAmD2PpP8q23dwHofFH9OIB4hikWssSKoue30bpIL6Y8kf35/QejJzc47GBVa/7oBVyZtpLKH/8+ynde4mkbd3/ItzXUS5W7qIMWnp1X4fS82IQb6IUEnhStX0kZeGuKWWRG3E0z86tKKnRsbjrHWW3lrmcSsWwYViHEvfrKOVPGyZ7HQctUTuQPi/k1lbpH/J+ymzLVpS1Wdj+o0pFo3Z9DxY8tX4g0F937P/scf4wbWj7JZL2sH2kSl+H9oaYzT5B6TvzE8p74Cm0rqNo1lcqXzVr8npgQ8qatXe5c/nrqWru1H2/pOt7DSqqmNanXATvQHmf/xx4R7VOo5N+31/zkkpp489TfrauBy6SdGLbzPD+wN9tbyjpZZR0y079lZbIwCIi2g10kUj/aQENBzV9LkrH6Jc1H+Bq4WGfOe/T9TZKfvIFtp+msij2471OsP2hGYhj0Nd9pg1SunfQRbjvBjar7kBOqnEx3JQXPtnxlvRTSiUsU7p6dyqhPJU0uBdS/riuSfl/DjU9qB+2153quZKeR7lgbjQbHGb8k17I2T5FpWoa9LmIf9DBgqawYNr2nZSBxQd7HHYDpY9MS0PEapam399//WhcdP9DpSrTn+m9zu07ks6m/B4DeJ+7VHuj/1S+I6s4zqNUqHscpXDEsGzRNpBfWG33ej9+m3Ih3ChS8TLKILBbf5JBi0TMOMEo9bHYDri6MTCTdDRlRrZ5YLEHE3+zvwccJkm9biRmYBER7Qa9SOxnBmIJ2+dI2opy5/EllPKC/9vpWEkbUy5C16W1glTP1KYB3WX7LklIWt6lid0gC4WHZdDXfaY9GPiNpIuYvHTvoIskf8dECd5JqVTs+S/6zAuX9FLKIuazKa/joZLe47JId7oOAZ5ve7LUlFH1WcpszuUDzDL2q9+BwmnVp30v4u93sKABF0wPuH7jszQtcm9yS/XY8zs8NhVfrtIzP0xZL7YypdlbCy3dAfz66uOjJD3KndeW9ZvKt2lTqtJX6dD0dDo8tcp5K3qiohvANyW1NzBtNmiRiPlmLUqKYcP1LD1IW3KM7fsk3ULp7t71plAGFhHRbtCLxIOaPu84AwFLBgl7V/9uAo4BNEn60rGUQcfh9C4r2Pgay9u+e7J9ba5XWTT9feDHkv5Ol1KHM2zUKpj0U3oVWLJIcmtK6pEpdfG7LpinXJz9rFpj0Txo6bZ25qMMlhf+QUolqb8CVLn+Z1DuuE3XX+bwoALKRcIVMzCogAEv5NznIv4BBwuDLpgeZP3GoF3vp8R2o1z3OfRIa6NzB/AlT0OHdM5GKp9KVab2krTNmlOV7tNo3Gb/kaT3A0dT/n97AadIegiA29YWtb2/NmbyIhEz7pJLFp22cDk9bJa+3AqSmtOJv2z7yzP9RdMgLyKmrcMMxPFua4Kn0gDsPGB/21dX+66x3fUPp3o0w+tyfKeGen1XcJL0VModvFN7rS2o7iY+xjOwuLo93cfT7Og8zVgezkR6xS/c1Fyu7bh/p3zvG3X2XwAca/s/uhz/C0pvlJZ+I91S3yRdbPuJki4FtnIpy3up7S26HH+5mzruqjQQu7R531RJ+hwl9e/79NGsb9RI2pYyUDuH1vj/e5rPK8o6pU2AZ1Iu0E8bxoWcBmgeOIXnXoaJ9Rub02P9hqTf2t6oy/Nc7S5NQacQ0/KUBfXr0jpTO+0iAd3WN7X/HtZEk0+qYxqNPmc99a8ppkYRgMZ7oHm0s9T/oTrnEZSUHwMX9UgRm3ck7QgcZPtZ1fa/Adj+RNMxp1XH/FzSspS0vDWSChURQzeFGYgXUmYyfqJS9ehoutwdbNyBAk6S9EZKykPzRdDNbcc/gjJlu7Aa5DSed1VgxT7+L+2VbNaiDJCajzkb2J3ye3MR8FdJ59t+52TP349B031m2oDpRPsCW7hafKlSV38xpQJNJ8sN+LoNWuL11OoP4neq7b0ozcGGYVXKBdYzm/b1atY3KyQd0OfdyI9RShmvQGkGNxRVCtQp1eCtr8FEv+tVPI0qVZMZcP3GxZJe685d7xcxPD+gpFctoun3XjtJu9o+S507zXcb7Pa1vmmKqUozohoMX2d7vWr7lZSB1+8pF70dq6BV35d/B85i4nfYwba/NiuBj76LgI1U+rXcQPn7vE/bMScCr6QslH8xcNZks52ZsYiIKZnKDER1zEqUPN+9KVP1RwEn2D696ZhrmWha167T3bVXAq+iVKRpnvq9FTiy191kNVWysb2xpEdR7rbv3HbcL6s0nH+lzFYcKOmyYa2BqO7G70pbuo/t/Yfx/FOMZ7f2dKJOswQq1Wj2dCmXS5Vadny3tTCSPk65KDiJHgPGpuNXolQxWsBEXvi3bP+t0/HVOc1Voc6z3XfX6Lmo35k5SVfY3myy46YYw5HAYe6/OtjVjMB6lQ7rN04Evmb7hrbjHk65yXEPHbreD+tueL/fo2rwc6CkIzo8bHfoe1Hd1Hmhy2L1OUHSJcAzXPrQPIVyU+otwJaURpgv7nLeVcBOjd8Tkh4K/MyDNdsca5KeQ1kftAzlPf8xSQcDF9s+UaU62DcoRRluBl7m7lW4ynNmYBER7SStQ7mDf4akhcCytm9rO+YFlDscO1Pu+B1NqYPdrVttp6+zOiWFZi/bTx9C3C+yfdyA5yymqmRje6tq31IDBkmXU+5SHwl80PZFQx5YDJTuM9MGSSeS9H1KytSPKQPC3SiLPa+HpddONKU0NOuWyrAMZUAzUCnhtvfwisAy7e/hcdIY+PZx3CGU1/P0yY6dQgy/oZQn7asAQTXjV29Pgdb1G0e7j+7Yau16f6Un6Xo/hZi+DBzqDus5hvDcW1FKAfe7vql2zb8HJX0euNH2QdX2YttbdjnvZ8AurtJaVRpDnm17p9mJfH5KKlREtFBZdHkA8BBK47tHUxZQt1z42/4+8P2mGYi3A2tK+iJtMxDd2P478OXqX6dY3kS5M924E746sLftL3R5yvNVKpg8yvazJW0K7Gj7qz3C6Lck4cGU2vbnV4OK9YHfTvZ/HMBIdHRu0imd6JQux55Aa4Wes3s98SCDT5ca9w9IWs32Ld2Ok/Ro29dXn7e/h9eiw3t4zPRbkegNwLsl3UO58z6UnHlJorzmgxQ+uFjSMdS7XmXg7tieoa731c0LU67NXi3pGvpoCKnB1mR8iZIa1LK+acQtI2lZ2/dRfoabe7f0uo69GrhQ0g8or+sewGWS3gnTX1cUnWXGIiJaVHfwtwMubLqD33L3use5w56BWOpuVK87s5J+RLkb90HbW6gsNvtlr9glvZvSdXs3SqOp1wDfdtvi85kiaUPg4ZQ1Cc3pPusAJ9seZu523/HYPr8tnegflEHe7yY5f9KF7ZKWo1zgPqXadTbwJXdpwFVdGGxFmRFZMthqvsuq0qF9Ddv/M533cEzdoK/xICk880E1y9aVJ5obtp93KhNrMu5vOn6pqlH9zmyNEkkfBJ5DWcu3NrB1dTNoQ0qqa8dZryrNtSvbQ20QGUUGFhHRQtKFtrdvWlOwLCVNaNb7KVR38DZ39YuqSou5zHa3/gUX2d62+Y9nr6nypvN2Y5JKNiqL1b9IuejeTNLmwO7uUvlogP/jD4F/a097kPQE4OO2h1Ubf8biUYeF7ZSZnY4LtCUdTunK3KgC9XLgftv/2uX4VzZtLqkI47YqUpJeY/trkn5he7uZeA9XefYfZ7BZsZFRzSzsC6xn+6OSHgM80va0+xQMusYiWmnpbtdfre7ST3Ze3+tmNOD6plEhaQfgkcDpniiZuzGwsnuXto5ZllSoiGh3jqQPUCos7Qa8kd71zmfSacAxKrXxAV5HWc/RzR3VAr3GQGQHyp28nqqBxI8lPQzotiD4K8B7KKkE2L5M0rfpXvmoX7NSG38AU4lnNdu3qixsP8rVwvYeX2PbtrUjZ1VrS1pI2gN4tO3PV9u/ANagfH/f1yHGRrWXs2fwPfx1qlmxavv/USqijezAQqXq2c9dKiB9gZICsyul7OztlG7G23Z/hr5tD+wn6ff0WGMh6b22D5F0KK3lY4HRzvdvJ+l5tn84pKdr73a9KSVFazI/k/SEPtdk7F19bG70Z3r3y6id7Qs67Pt/vc5RKSrR6f01zAar0SYDi4ho935gf8ods9dR8uoP73nGzHlPFcMbqu0fTxLLOykVXTaQdD7lIrRbxZAdgE9SKl18lFL54mHAAkmvsN0+gFnR9i/U2ihq0ruJfXhwj8cWDuH5BzWVeJaV9EjgpUxccPdyv6QNGmlV1XqVTg0Q30trs8UHAdtQOhEfQWmg2Enze/gASkrZsN7DD7P9XU3UfL9Ppeb/rFNZlP4uYG3br5W0EaW6WfuF7gOU2bYDgO1tby3pl1DWOVWLWoeh3yaPjSpQF/c8am44GBjWwGKgbtdTWZMxyPqmMfDups9XoKxDGcbv7OghA4uIaGH7Acrd+a4dc2dDlfZ0pe1NKAtvJ2X7EpUmd4+l/HG9qlvePnAY8AFK6dKzgGfbvkDSJpQFy+0Di5skbcDEbMiLgT8N+N/qZLZq489kPI2F7T/tc2H7eyj9TK6hfJ/WAV7d4bgH2b6uafunVcrGzeqwyL5thuMr1SLuNYBtJP3DnXtwDGpKs2Iz5AjK92THavsGymCr5ULX9s8kNcqL3lv9bDXiX4MhLeJ16cDe3hNm5Q7HnVR97NgQcY4ZZkvqQbtdP2/QL1ANRt9JGYwe0GMwOud1WJ92fjXrGTMoaywiAgBJ37X90qa7YC1qWmPxA+AtnoHu081rLyT92vbjmh5baoFjdbH8ZWAn4O+UBnr7dltQOUAcs1Ibf9TiUalk06gnf5XtpRqBqUc3Y0m/s71B277zKXXWr6u2F1NSflYGjvBwCgpsDRxKKTd6BdWsWK/F6jNFEyWKm9cU9SxRLGlfSoWvrSmpNy8GPmS72+zPIPH02xPmxF7PY3v36cYyWyRtN4z1KdVzDdTtut81GZKeRymzertKFa5FwCuqtWIrUno79FyHNhdpotEqlKIYTwQ+5/SxmFGZsYiIhkYu78B3wWbQ6sCV1V2mxh9c295jCM/dfJf2n22PtQysqju8b7T9jOpO+QIPqSeC7b8AO6m1Nv7JHnJt/JmIZ9BceXXpEAxsKKlTmdELu8yevI7OaSIDzXAMqnofPLX618+s2Ey7R6XPTGP2YQN6dGoGsP0tSYsoZTsFvMDDa1C3J1VPmOpr/VHSKh2O2xG4jjIzeCHDves/q4Y1qKiea9Bu1/2uybiGMuu7H7CB7b1Uqqhh+071MTUyRy1i4vfSfZRF67U0HJ1PMrCICABs/6n6+AcASatS/++IDzd9LuDJtObcT8cWkm6tnndh9Xnj66zQfKBLL4UnVZ/PSG8Jz1Bt/KnqM55Bc+UbFaXWpMz8nEl5vZ8G/AxoH1i8g9IrZR+qi1XKGovlgRd0eP7Vmzdsv7lpc40+Y+yqeh/sbfszwJXTfb4hOJCSsvcYSd+iNKt8VR/n/ZbSlX5ZAElrD2lWsN+eMI+glHfeG9gHOBn4ju1ReE3nkr7WZNj+VWNNEFMYjM41krYFrmusJ1GpKvciysDiVzWGNi8kFSoiWlR3gz8C3MXE3R67Q1fkWYpnK8rFx0so6UfHu0uPCUk7A4tt3yFpP0q6x+emm65UPfcXKY3WjqW1l8JsNvMaOdVd/E/ZfvekB0+cczrwysZgtlr4/XXbHRf/StoVaJQY7trpuLq4PrvLDMcutvfudN4gJH2GUir3GFrfB7WUvKzWe+xAGaBdYPumSY5/C2VA8hfKgvmezdcGjGXgnjBVStzewKeBj9g+bLpxzBeSLrG9dbftLufsBnyIMrtxOtVg1PbZMxnrbJJ0CfAM2zdLegpwNPAWYEvgcbY7FvSI4cjAIiJaSPotpS5/zwuUGY5hY8rFxt6UpkjHAO+23bOBlEqJ0y2AzSllQQ8HXmr7qUOIKc28upD0c9s7Tn7kkuPb17QsoAwYHtfjtH6ed00mujgvNcNRpXlNi0oJy3Z2DSUsq/Ue7W4B/tAp174652pKZahuZZWnG9OkPWGq45YHnkv5GV+XUs3ta7ZvmIm4hqlKHdoXWN/2wZLWBh4xzLSoPuMYaE1G03kDDUbnmuZ1RpI+D9xo+6Bqe9K+RjE9dac5RMTo+R3lj1OdfkPJG36e7asBJL2jj/Puq1Ix9qA06vqqpGHl1B5u+/zmHdUMScDiakFuv7M5Z0o6jZJjD2Ux8RnTDcL2XynrQ5pnOIa6XsX204b1XEPwBcqs3GWUi8TNKClaq0l6g+3TO5xzHTNYxaoaSHQcTDRIOooS6ymUWYorZiqeGdLcC+Rg4DbgOIbTC6RvU1iT0bACpQDFssCm1fqmc4cXWe2WkbRsNbh+OqXMckOue2dYZiwiokWVenQEZVFlc2fWWWtaJekFlLUUO1NyyI+mXNj3rMEu6Zzq+FcDT6F0gL60kYc8zZiWSjPoJ/VgPpjKbI6kPSnfI4BzbZ8wI8ENiaT9bH9TUsdu4rb/u4aYjgc+3FiboNIF/GBK/4/jm+/MNsX9eMrC85Np/fmedvySbmMiffJBlJSxOzpUM3qApmIMzQ/R4077qGj83A9SjWtUSPoUZSB/JRMFLOw5VIlrMpI+CDyHMtu9NrB1dcNpQ+BIt1Upi+HKyC0i2n2J0tfhcoZU335Qtr9PWbS7ErAH8HZgzWqdwwld7sRC+YO5D7C/7T9XKQqfnk4sknakLDReo+2iclVgqncMx4rtTj0oJjvnBEpZ27misRC5U5WjumzcvOC5WqS7ie1rOhT6acT9f9W/B1X/oENFr6mwveS1qdKF9qCk3LQft2AYX69GM9YLZBa8gFIOeKwWbDez/TFJZwKPBE73xB30BZS1FjGDMmMRES3UoYfDKJC0OmUB917u0o9A0qdsv2+yfQN+3acCu1DqxTc36rsNOMl2r0Zw80I1Y9Gp3Oy8X38yk1R6EtxMmdGDMrB+GPBySqndpVJzJL3EbT0rOu0bYowj+ftkOjSDvUBmmqQfAS+xfXvdscR4ysAiIlpI+jilLN9JtKZK3FxXTP3qkq502ZAq3rzX9iFt+2bsgmwukfSips0VKP0M/jib6XOzRdKjKQ3yGukU5wFvs319DbEsBN4IPKnadT4l//8uYMVOF48zmdKn1j4ljYZkTx1kYf9cIWkTJnqBnOnh9QKZEZroNbMWpcDFmdSU6hrjLQOLiGgh6doOu+2ays32Q9IbKBdYGwBXNz20CnC+7f2G8DWyxqJPVZWnn9reqccxC4G1bV81e5FNn6QfA98GvlHt2o/SgX23+qKanKRnU/LOX0qpstawKqUfwnZD+BrNa20aDcm+Ui2qHytVKtTDaUop93B6gcyIqpdDN7Z91KwFE2MtA4uImPMkrUZpjvYJ4P1ND9023ZmW2bggGzeSHkupxrRhl8efD/wnpVP2epK2BA6eCwtIO5WrrKuEpaSNKO/5TWlq6tjpJoCkLSh1/A8G/r3poduAn9j++8xGOz5mshfITJP0Ntufm2xfxFRl8XZEtJC0HPAGJir2nA18yfa9tQU1Cdu3SLod2MpDaIbX5o+UztK7A4ua9t9G6Qw97zVVA1L18c9Ar3UtBwHbUd5b2F4sqWfFrxHyN5Xmi41SuXsDM9ITog9HUC5wP0PpXv5qSgrSUmxfClwq6dvD/lmW9HhgA9snVtufAVarHj7MNTUPnEFvoyyAruv7Ph2vBNoHEa/qsC9iSjKwiIh2X6SUifxCtf3yat+/1hZRH2zfL+kqSWsPMyWh+YKM8jtzzqXvzLTmakB9urcaDLY8zRBDmkmvoayx+Awl5p9RLszqsND2mZJUDagPkrSI1hmJdutK6muWYwCfpMycNDwL+DCwYhXLC6bx3KNoRnuBzARJe1Mq5q1X9ZxpWIVSACBiKDKwiIh227bVYz9L0qW1RTOY1YErJf2C1kZtw0ix+Req9B3KH+c5k74zU9S583PD3cD/2b6tw2NXStqH0shqI+CtlAv0ueDR7d/zqlHidTXEcne1nuW3kt4M3ACsPMk5fc9yDOCRtpu/f7faPg5A0uum+dyj6BrgbElD7wUyg34G/IlSNey/mvbfRmmwGDEUWWMRES0kXUIpR/i7ant94HtzYZFyVRp2KbbPGcJzL6J02j27qSnW5cNovjdXSfpJj4eXpTSn+nyHalorAh8EnklJnzoN+Kjtu2Yq1mEZpUX8krYFfg08GPgoJf3oENsX9Dhnke1tmt+7jX3TiOMq24/t8tj/s73xVJ97FEk6sNN+2x+Z7VgiRk1mLCKi3XuAn0i6hnLRtw7lrubIax9ASHoSJQd+2gML5nb6zoyw/bRej0taHvgl0DKwsH0nZWDxwZmLbrhGsVGi7YuqT2+n/5/RqcxyTOaPkra3fWHzTkk7UNYojZW5PICovieHAo+jzL4uQ4fu6BFTlYFFRLSocrY3Ahp3IK+aS11aJW1FySV+CXAtcNyQnnoup+/Uwvbdkl7e2Jb0Wdtvl3QSnRvqjXJa2YMoF+DL0tp9+1ZKg7RZJ2ljyo2AdWgte7prj9PeRln78FbKLMeulAW90/E+4BhJXwcaC7W3qZ53r2k+98iY4+/fhsOAlwHHUvqMvAIYqxmlqFdSoSJiKZJ2Atal9WJlZOucVxdYe1f/bqKUhX237XWG+DXmbPrOqJC0je1FM5myNtMkrdOoPFbd+V/Z9q01xXIppRv8IkrZUwBsL+p60szFsibwZuDx1a4rKWlwf5ntWGbKmLx/L7b9xObGoePYHT3qk4FFRLSQ9A1Ko7nFTFyseJQ7s0p6gNIBeX/bV1f7rhnlpn4xN1XVwV5P+dm4iJIK9Tnbn64hlr7XRrRVAlrKHLnbPhIkrQT80/YD1fYywPJVit9Ik3Qu8AzgcEpZ6D8Br2or2BExZRlYREQLSb+mNH6bM78cJL2AMr2/M3AqcDRwuO1p90bolvbQkAuyVpIOsn3QJMc8j5KG00jhaTQYG/k870YzPEn7AltTGjIums3maJIeUn36VuCvwAm0VidaqnyopBsplau+A1xIec1pOmfk77aPCkkXAM+wfXu1vTJweq9O86NC0jqU98xylD48qwFfaNyQiZiuDCwiooWkY4G32v5T3bEMqrqTuAclJWpX4CjgBNunT+M5O6Y9NOSCrFU/FZIkXQ28ELh8Lg1gASRdSelg/W1K87dzJF06m3d8JV3LREPCdu7SeXsZYDfKz8bmwMnAd2xfOZOxjqNR6r4eMWqyeDsi2j0M+FXVC6L5LujI35m3fQflgu/bklanLOB+HzDlgUWngYOkrcewm/CwdLrYbXcdcMVcG1RUvgT8HrgUOLe6AzyrayymMhNn+37KbN6pVbWuvSm9GD5i+7Bhx9Y6m0IAAB7TSURBVCjpEbb/POznHRF3NP8OkLQN8M+aY+rLXJ4tjLkhMxYR0WIuL0ycLXX1LZgLJC1o5J73OGZbysXNOcydBmNdSVrW9n01fN03Ad+y/Y9qe3Vgb9tf6HL88sBzKYOKdYETga/ZvmEGYhvbn5Hq/Xs0pZSugEcAe9WxaH5Qc3m2MOaGDCwiYimSHg5sW23+wvZf64xn1KSKyvRIOp3Se+FyYMkgZJT7A0jaz/Y323pYLFHHoKhLSk7H96ako4DNgFOAo21fMcOxjfXPiKTlaC3JfW+d8fSramr59MkG/xFTlVSoiGgh6aXAp4GzKXfjDpX0HtvfqzWw0TKyF8BzxKNsb1Z3EANaqfq4Ss+jZtcyktS481yto3hQl2P3A+6g9LF4a1Ojx5lKhfnKkJ9vZFSlp98JrGP7tZI2kvRY2z+sO7Y+vBc4RdJYzBbG6MmMRUS0qGrj79aYpZC0BnDGfC9HKGlnYLHtOyTtR6kI9LlGT4P5TNJ6tq+dbF/TY4dQ3lNTXvsSIOnTlFz5L1W7XgdcZ/td9UU1/iQdQ+kd8grbm1UDjZ/NhcXbc3G2MOaWDCwiooWky20/oWl7AXBp8775SNJlwBaUijpHAF8FXmq7Z9Wo+aBTPn2vHguSbqPMANwDNFJIRnoBqaT/6fV4HX1eqp/N1wFPr3b9mFJm+f7uZ8V0NTWZW5LuNduVwaZK0hVzcLYw5pCkQkVEu1MlnUapdw+wF/CjGuMZFffZtqQ9KB2Fvypp/7qDqpOkTSidlleT9MKmh1YFVuh2nu1RSifqV/PC3I8AB9YVSIPtByR9HTjL9lV1xzOP3CNpIVV/G0kb0JRWNOJOkfTMzBbGTMmMRUQspbpIfFK1eZ7tE+qMZxRUOcmnAq8BnkxpMjWvZ3KqQdYLgN0pFYYabqMsEP5Zj3N3B55SbZ49R/LTgXoXJktazfYt1ee7U9ZDPcj2epK2BA6uuzR01U/mrnGdOZG0G/AhYFNKKeudKd2rz64zrn40zRbeDTQqmY30bGHMLRlYREQLSesBf7J9V7W9EHi47d/XGljNJD0C2Ae4yPZ5ktYGdrF9VM2h1U7SjrZ/PsDxn6RUHftWtWtv4GLb/zYT8Q1bnaVUJR0A3Gz7e5IWURpBnt2UknP5bA92q5SslwH7Ur6vdwPLAzdRGvF9adw6O0t6KLADZfH7BbZvqjmkiJGQgUVEtJB0MbCT7Xuq7QcB59vetveZ469qhraR7TOqBZvL2L6t7rjqIum9tg+RdChVWkizbusOqvUqWzZKXlbVjH5pe/MZDXhI6u7RIOnDtj8q6QLbO7Tl+l82269jNZt3BvADSuPDxvf1IcDTKAPyE2x/czbjGjZJPb/nc61ppqSDbB9UdxwxXrLGIiLaLdsYVADYvqcaXMxrkl4LHAA8BNgAWAv4XyYWzs5Hv64+XjyFcx8M3Fx9vtpwwpk5VQpJY/C0oqRGt+1Z71xs+6PVp1dK2odSdnYj4K1A1/SzGfSMTn0cbN8MHAccV/V9mOv+q/q4AvBESvd1UQo6XAzsWFNcU7U7cFDdQcR4ycAiItrdKGl32yfCkjz6TPPDm4DtgAsBbP9W0pr1hlQv2ydVH48EkLRytX37JKd+Avhl1axLlLUW75/BUKdtRBecvwX4ICX16NvAacB/zHYQjUGFpG/YfnnzY419c6WBXC+2nwYg6Xhga9uXV9ubMTcv0DX5IRGDSSpURLSoKpx8C3hUtet6Sr32scqRHpSkC21v30g7kbQscMlcSd+ZSdWF1TcoszkCbqS8Z67scc4jae3u/ucZD3SMVOljZzQudkdBe4pYFePltjetMayhk3Sl7cdPtm/UNTdXjBiWzFhERAvbvwN2GODu83xxjqQPAAurqjBvBE6qOaZR8WXgnbZ/AiBpF0rn5Z16nLOAMhO2LLCxpI1tnzvTgY4L2/dLeqC5SlRdJP0b0PjZaE4Ru4fy3hg3l0k6HGisGdkXuKzGePomaTXK7MqTq+1zKJXEan0PxfjIjEVEACDps7bfXn3+Ntufa3rs67ZfVVtwI6CqfLM/8EzKRdNplGZk8/6XaKfmYL0ahkn6FKU/ypVMdP913WVS5xpJPwC2ojTGu6Oxv8ZmfYfbfs1sf+3ZJmkF4A1MlEs+F/hio5LeKJN0HHAFcGS16+XAFrZf2P2siP5lYBERQGsaQ4eUhlqr4MRok3QCcAklHQpgP2Ab23t2Of4qYHPbc6Wp2EiS9MpO+xtrXmZbHaVu61KV4V57rjUmlLTY9paT7YuYqqRCRUSDunw+r0m6nA6lVBuyxgIoTQM/AhxPea3Oq/Z1cw2wHHOnW/FIqmsA0cMlkra1fVHdgQxbt8aEwMg0JuzTPyU9yfZPASTtDPyz5phijGRgERENCyStTsl9b3zeGGAsU19YtXte3QGMsmqB7vEDLiK+E1gs6UyaBhd1pPDMZVWJ2U9QOkCv0Nhve/2aQtoe2FfSHyipWY1SvOMw+N5L0s22vwccSKkQdzaA7cVVY9G54A3AkdVaC4C/A6+qL5wYNxlYRETDasAiJgYTzc2e5m3OpO0/1B3DKJviIuITq38xPUdQLnI/Q2lE92rKjYG6PKvGrz2jbH9Z0oerzXtt3yK1TOzOid+RthcDW0hatdq+dZJTIgaSNRYRET20NUZrXEmYGhqjjaqpLCKumi5uXG1eNQ59DmabpEW2t2le29DYNwKxrQTsCext+7l1xzNMkr4KnEnpvfIiSmPC5Wy/vtbA+iDp48Ahtv9Rba8OvMv2h+qNLMZFZiwiInoY0cZoo+b46l9fqnK0RwK/pwzQHiPplSk3O7C7q2pMv5X0ZuAGYOW6gqkGi88F9qHMXhxH6U4/bpobE36HUiHuoz3PGB3Ptv2Bxobtv0t6DpCBRQxFZiwiIvok6UnARraPkPQwYBXb19Yd1yiQtAaA7Rv7OHYRsE+joo6kjYHvjMKd9rlE0rbAr4EHUy5sV6Pcjb5gluN4JrA3pRTzT4BjgENtrzubccTkJF0GbNuoyFZVt7p4rjX3i9GVgUVERB8kHQg8EXis7Y0lPQo41vbONYdWG5Uk8wOBN1Ny+wXcR7moPLjHeZe1L+jttC/mBkkPUCqBvaox0JZ0TY2LyGeEpJ7rguZCVShJ7wOeT1mfA2Vdzom2D6kvqhgnSYWKiK4kHWB7HDvnTsWelHUElwDY/qOk+Z4m9Q5gZ8od0MYF5frAFyW9w/Znupx3cYfOxRfPeLRjYgQvcLcGXgacIeka4GjGs5LcjsB1lPSnC5mDZbltf6qatXh6teujtk+rM6YYL5mxiIiu0hhvgqRf2N6u8ZpUi1N/Pp/vskv6JbCb7Zva9q8BnG57qy7nLQ+8CXhStes84AtpmNcfSTfS4wLX9jl1xAUgaSdKWtSLgEuBE8bl5kRVWnk3yv9vc+BkSgrflbUGFjFCMrCIiK4k/bLbxeF8I+ndwEaUC4tPUBrAfdv2obUGViNJV9jebAqPrQTcZfv+ansZYHnbd85ctONjLlzgVovKnwG8zHavZolzUjU43pvSKO8jtg+rOaSeJP3U9pPaqtxBqtvFkGVgERFdSXq07evrjmNUSNqNskBVwGm2f1xzSLXqNaM1yWMXAM+wfXu1vTJlhmOnmYt2PI3CBa6kdW3/vsfjAtYah98l1ev9XMprvi6lH8vXbN9QZ1wRoyIDi4iIAVUVof7mef4LVNL9NPWtaH4IWMH2cl3OW2x7y8n2RXejdIEr6VjK4v0fUJps3kjpBL4hsAtl5uLAuT4Ql3QUsBlwCnC07StqDmlgkjYArrd9d1X2eXPgqEZfi4jpysAiIqIHSTsAnwRuppT0/AbwMMqF1Ctsn1pjeHOSpPOBt9i+pNreBjjM9o71RjY3jOIFrqRNKYvwdwYeCdxJKYV7CvA923fVGN5QVNWvGgPpOZlOJGkxpbrdupTvzQ+Ax9t+Tp1xxfjIwCIiogdJFwMfoPQI+DKlwdQFkjah5LVnDcqAqv4LRwN/pFyUPYKSi5/KUH0YhwvcqEdT8Yn3UNY5HZq1dDFMKTcbES0krQi8C1jb9mslbUTp3fDDmkOry7K2TweQdHCj+Zjt35TU8RiU7Yuqgdljq11X2b63zpjmEtsL6o4h5qx7Je0NvJLSzwKgY8pixFTkl1NEtDsCuJtSsx3gBuA/6gundg80ff7Ptscy5TsASe9t2nyB7Suqf/dK+nhtgUXMH6+m/G7/mO1rJa1HSe+MGIqkQkVEC0kX235i8/S4pEttb1F3bHVoWqAsYCEldxwmWaAcS2uuFNVeNSo9UyJmnqTnAyfbfmDSgyOmIDMWEdHuHkkLqe7GV1VE5m3jMtvL2F7V9iq2l60+b2xnUDEYdfm803bMMSr2k/Tv1fbakrarO65osRfwW0mHVOmIEUOVgUVEtDsQOBV4jKRvAWcC7+19SkRf3OXzTtsx93yBkmazd7V9G/D5+sKJdrb3A7YCfgd8XdLPJR0gaZWaQ4sxkVSoiFii6pb7YspgYgfKXeQLbN9Ua2AxFpJWNt6aKg4ljXLESXoo8HLg7ZSywBsC/2P70FoDizkvVaEiYgnbD0h6r+3vAifXHU+MF9vL1B1DzKh7JS3DRBrlGrQWP4iaSdqdsoB7Q+AoYDvbf62qAf4KyMAipiUDi4hod4akdwPH0NRV2fbN9YUUEXPA/wAnAGtK+hhl9vND9YYUbV4EfMb2uc07bd8paf+aYooxklSoiGgh6doOu217/VkPJiLmhCqNcgdKh/qnU9LbzrT961oDi4hZlYFFRERETFs6OI8+SS8EPgWsSRn8pVt7DFUGFhHRQtIrOu23fdRsxxIRc4ek/wR+DhzvXFyMJElXA8/PTFLMlAwsIqKFpObFeytQ0housf3imkKKiDlA0m3ASsB9wF3kbvjIkXS+7Z3rjiPGVwYWEdGTpAcDR9v+l7pjiYiIwVUpUABPBR4BfJ+mxqe2j68jrhg/qQoVEZO5A1iv7iAiYjRJ2sT2byRt3elx25fMdkyxlOc3fX4n8MymbQMZWMRQZMYiIlpIOomJLsgLgE2BY22/r76oImJUSfqy7QMk/aTDw7a966wHFRG1yMAiIlpIemrT5n3AH2xfX1c8ERExHJLWBz5HKQ1symL7t9vuVGY8YmBJhYqIds9pn52Q9KnMWETEZCRtRpnlXKGxLxXlRsq3gc8De1bbLwOOBravLaIYK5mxiIgWki6xvXXbvstsb15XTBEx+iQdCOxCGVicAjwb+Gkqyo2OTr/LJV1qe4u6YorxkhmLiABA0huANwLrS7qs6aFVgPPriSoi5pAXA1sAv7T9akkPB75Zc0zR6keS3k+ZpTCwF3CKpIcA2L65zuBi7suMRUQAIGk1YHXgE8D7mx66LX9sImIykn5heztJi4CnAbcBv7a9Sc2hRUVSr7UUtr3+rAUTYykzFhHRsAxwK/Cm9gckPSSDi4iYxMVV35uvAIuA2ymLg2NE2E7p8JhRmbGICGDJnazGLwS1PZw7WRHRN0nrAqvavmySQyNijGRgEREREdMm6UzbT59sX0SMr6RCRUQLSU/ptN/2ubMdS0SMPkkrACsCD5O0OhMznqsCa9UWWETMugwsIqLde5o+XwHYjpIvne65EdHJ64C3A48CLmnafytwWC0RRUeSdgYW275D0n7A1sDnbP+h5tBiTCQVKiJ6kvQY4LO2X1R3LBExuiS9xfahdccR3VWlxLcANge+DhwOvNT2U+uMK8ZHZiwiYjLXA4+rO4iIGE2SdrV9FnCDpBe2P277+BrCis7us21JewCH2f6qpP3rDirGRwYWEdFC0qFMVIdaAGxJa3pDRESzpwJnAc/v8JiBDCxGx22S/g14OfBkSQuA5WqOKcZIUqEiooWkVzZt3gf83nY6b0dEzHGSHgHsA1xk+zxJawO72D6q5tBiTGRgEREtJK0E3GX7/mp7GWB523fWG1lEjCJJ7+z1uO3/nq1YYnKS1gE2sn2GpBWBZWzfVndcMR4W1B1ARIycM4GFTdsLgTNqiiUiRt8qk/yLESHptcD3gC9Vu9YCvl9fRDFussYiItqtYPv2xobt26u7WhERS7H9kbpjiL69iVJC/EIA27+VtGa9IcU4ycAiItrdIWlr25cASNoG+GfNMUXEiJL0XtuHtBV+WML2W2sIKzq72/Y9UulhKGlZOnzPIqYqA4uIaPd24FhJf6R00H0EsFe9IUXECPt19fHiWqOIfpwj6QPAQkm7AW8ETqo5phgjWbwdEUuRtBzw2GrzKtv31hlPRERMX1Vedn/gmZQbR6cBhzsXgzEkGVhEBDDR5KpTgytIk6uI6EzSib0et737bMUSvaXqX8y0pEJFREOaXEXEVOwIXAd8h7IoWPWGEz2cCTwDaBToWAicDuxUW0QxVjJjERFLVNPkL7b93bpjiYi5obrrvRuwN7A5cDLwHdtX1hpYLEXSYttbTrYvYqrSxyIilrD9APDeuuOIiLnD9v22T7X9SmAH4GrgbElvrjm0WNodkrZubKTqXwxbZiwiooWkTwI3AccAdzT22765tqAiYqRJWh54LmXWYl3gROBrtm+oM65oJWlb4Gigpeqf7UW1BhZjIwOLiGgh6doOu217/VkPJiJGnqSjgM2AU4CjbV9Rc0jRQ6r+xUzKwCIiIiKmTNIDTMxuNl9UiHJTYtXZjyq6kbQTZVZpSQEf20fVFlCMlVSFiogWXcrN3gJcbvuvsx1PRIw221mvOUdI+gawAbAYuL/abSADixiKzFhERAtJJ1PKR/6k2rULsAhYDzjY9jdqCi0iIqZB0q+BTdMQL2ZKZiwiot2ywONs/wVA0sMpd7O2B84FMrCIiJibrqAs2P5T3YHEeMrAIiLaPaYxqKj8tdp3s6Qs8ouImLseBvxK0i+Auxs70x09hiUDi4hod7akHwLHVtsvBs6RtBLwj/rCioiIaTqo7gBivGWNRUS0kCTghcCTql3n2/5ejSFFRMSQSFoH2Mj2GZJWBJaxfVvdccV4yMAiInqS9GTgZbbfVHcsERExdZJeCxwAPMT2BpI2Av7X9tNrDi3GRErERcRSJG0l6RBJvwcOBn5Tc0gRETF9bwJ2Bm4FsP1bYM1aI4qxkjUWEQGApI2Bvat/NwHHUGY1n1ZrYBERMSx3276nZLyCpGVpbWoYMS0ZWEREw2+A84Dn2b4aQNI76g0pIiKG6BxJHwAWStoNeCNwUs0xxRhJKlRENLyQUtv8J5K+IunpgGqOKSIihuf9wI3A5cDrgFOAD9UaUYyVLN6OiBZVWdk9KClRu1Ka451g+/RaA4uIiIiRloFFRHQlaXXgJcBeqRoSETE3Sfqu7ZdKupwOaypsb15DWDGGMrCIiIiIGGOSHmn7T1UPi6XY/sNsxxTjKYu3IyIiIsaY7T9Vny4A/mT7LgBJC4GH1xZYjJ0s3o6IiIiYH44FHmjavr/aFzEUGVhEREREzA/L2r6nsVF9/qAa44kxk4FFRERExPxwo6TdGxuS9qA0RI0YiizejoiIiJgHJG0AfAt4FKVP0XXAKxpNUSOmKwOLiIiIiHlE0soAtm+vO5YYL0mFioiIiBhjkp7fVmr2ncD5kk6UtF5dccX4ycAiIiIiYrx9DLgRQNLzgP2A1wAnAv9bY1wxZjKwiIiIiBhvtn1n9fkLga/aXmT7cGCNGuOKMZOBRURERMR4k6SVJS0Ang6c2fTYCjXFFGMonbcjIiIixttngcXArcCvbV8MIGkr4E+9TowYRKpCRURERIw5SWsBawKX2n6g2vdIYDnb/1drcDE2MrCIiIiIGGOSHmH7z9M9JmIyWWMRERERMd5OGdIxET1lxiIiIiJijEm6H7ij1yHArbbXmqWQYkxlYBEREREREdOWVKiIiIiIiJi2DCwiIiIiImLaMrCIiIiIiIhpy8AiIiIiYp6RdEDdMcT4ycAiIiIiYv55fd0BxPjJwCIiIiJi/lHdAcT4SbnZiIiIiHlG0qNtX193HDFeMrCIiIiIiIhpSypURERERERMWwYWERERERExbRlYRERERMwDklaU9GFJX6m2N5L0vLrjivGRgUVERETE/HAEcDewY7V9A/Af9YUT4yYDi4iIiIj5YQPbhwD3Ati+k5SdjSHKwCIiIiJifrhH0kLAAJI2oMxgRAzFsnUHEBERERGz4kDgVOAxkr4F7Ay8qtaIYqykj0VERETEPCHpocAOlBSoC2zfVHNIMUYysIiIiIiYByQ9pdN+2+fOdiwxnjKwiIiIiJgHJJ3UtLkCsB2wyPauNYUUYyZrLCIiIiLmAdvPb96W9BjgszWFE2MoVaEiIiIi5qfrgcfVHUSMj8xYRERERMwDkg6lKjVLubm8JXBJfRHFuMkai4iIiIh5QNIrmzbvA35v+/y64onxk4FFRERERERMW1KhIiIiIuYBSZczkQrV8hBg25vPckgxZjKwiIiIiJgfflR9/Eb1cd/q4xdriCXGUFKhIiIiIuYBSb+0vVXbvktsb11XTDFeUm42IiIiYn6QpJ2bNnYi14IxREmFioiIiJgf9ge+Jmk1yrqKvwOvqTekGCdJhYqIiIiYR6qBBbZvqTuWGC8ZWERERESMMUn72f6mpHd2etz2f892TDGekgoVERERMd5Wqj6uUmsUMfYyYxEREREREdOWSgARERER84CkQyStKmk5SWdKulHSfnXHFeMjA4uIiIiI+eGZtm8Fngf8HtgQeE+tEcVYycAiIiIiYn5orK19LnBsqkLFsGXxdkRERMT88ENJvwH+CbxB0hrAXTXHFGMki7cjIiIi5glJDwFusX2/pJWAVWz/ue64YjwkFSoiIiJiHpC0CHgZsCqA7TsyqIhhysAiIiIiYn7YC1gLuEjS0ZKeJUl1BxXjI6lQEREREfOIpAWUylBfBO4HjgA+Z/vmWgOLOS8zFhERERHzhKTNgf8CPg0cB7wEuBU4q864YjykKlRERETEPFCtsfgH8FXg/bbvrh66UNLO9UUW4yKpUBERERHzgKT1bV9TdxwxvjKwiIiIiJgHJC0PvAhYl6asFdsH1xVTjJekQkVERETMDz8AbgEWAXdPcmzEwDJjERERETEPSLrC9mZ1xxHjK1WhIiIiIuaHn0l6Qt1BxPjKjEVERETEGJN0OWBKCvxGwDWUVCgBtr15jeHFGMnAIiIiImKMSVqn1+O2/zBbscR4y8AiIiIiYoxJWgF4PbAhcDnwVdv31RtVjKMMLCIiIiLGmKRjgHuB84BnA3+w/bZ6o4pxlIFFRERExBiTdLntJ1SfLwv8wvbWNYcVYyhVoSIiIiLG272NT5ICFTMpMxYRERERY0zS/cAdjU1gIXAnE1WhVq0rthgvGVhERERERMS0JRUqIiIiIiKmLQOLiIiIiIiYtgwsIiIiIiJi2jKwiIiIeUXS/ZIWS7pC0rGSVpzGc31d0ourzw+XtGmPY3eRtNMUvsbvJT2s3/1tx9w+4Nc6SNK7B40xIgIysIiIiPnnn7a3tL0ZcA+lI/ESVZ3/gdn+V9u/6nHILsDAA4uIiLkiA4uIiJjPzgM2rGYTzpN0IvArSctI+rSkiyRdJul1ACoOk3SVpDOANRtPJOlsSU+sPv8XSZdIulTSmZLWpQxg3lHNljxZ0hqSjqu+xkWSdq7Ofaik0yVdKelwSknQniR9X9Ki6pwD2h77TLX/TElrVPs2kHRqdc55kjYZxosZEfPblO7KREREzHXVzMSzgVOrXVsDm9m+tro4v8X2tpKWB86XdDqwFfBYYFPg4cCvgK+1Pe8awFeAp1TP9RDbN0v6X+B22/9ZHfdt4DO2fyppbeA04HHAgcBPbR8s6bnA/n38d15TfY2FwEWSjrP9N2Al4GLb75D079Vzvxn4MvB627+VtD3wBWDXKbyMERFLZGARERHzzUJJi6vPzwO+SklR+oXta6v9zwQ2b6yfAFYDNgKeAnzH9v3AHyWd1eH5dwDObTyX7Zu7xPEMYFNpyYTEqpJWrr7GC6tzT5b09z7+T2+VtGf1+WOqWP8GPAAcU+3/JnB89TV2Ao5t+trL9/E1IiJ6ysAiIiLmm3/a3rJ5R3WBfUfzLuAttk9rO+45Q4xjAbCD7bs6xNI3SbtQBik72r5T0tnACl0Od/V1/9H+GkRETFfWWERERCztNOANkpYDkLSxpJWAc4G9qjUYjwSe1uHcC4CnSFqvOvch1f7bgFWajjsdeEtjQ1LjQv9cYJ9q37OB1SeJdTXg79WgYhPKjEnDAqAx67IPJcXqVuBaSS+pvoYkbTHJ14iImFQGFhEREUs7nLJ+4hJJVwBfoszynwD8tnrsKODn7SfavhE4gJJ2dCkTqUgnAXs2Fm8DbwWeWC0O/xUT1ak+QhmYXElJifq/SWI9FVhW0q+BT1IGNg13ANtV/4ddgYOr/fsC+1fxXQns0cdrEhHRk2zXHUNERERERMxxmbGIiIiIiIhpy8AiIiIiIiKmLQOLiIiIiIiYtgwsIiIiIiJi2jKwiIiIiIiIacvAIiIiIiIipi0Di4iIiIiImLYMLCIiIiIiYtr+P+t+BoaULmlxAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 720x720 with 2 Axes>"
+            ]
+          },
+          "metadata": {
+            "needs_background": "light"
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5KnaRf855lsv"
+      },
+      "source": [
+        "# ajouter le code pour faire la prediction avec les modÃ¨les BERT\n",
+        "\n",
+        "\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "llGjT-xsUvR4"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3dGPXQSLUvUn"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NQyuDQw_JOwB"
+      },
+      "source": [
+        "y_pred = clf.predict(vec_data)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zgNKwbp_eYos"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dZGxg_OreYrO"
+      },
+      "source": [
+        "df_test = df.copy()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "g8FfxZ7bKwCe"
+      },
+      "source": [
+        "df_test['classification'] = y_pred"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hRcYKfdIK0Tm",
+        "outputId": "db988435-9716-4cf5-a754-04bc5356369f"
+      },
+      "source": [
+        "df_test.shape"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(61738, 14)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 29
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nlV3yXcCMb8v"
+      },
+      "source": [
+        "df_test.head()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GuotNONXMXgt",
+        "outputId": "5fb34593-c97d-4401-a617-b25aa8f7e49c"
+      },
+      "source": [
+        "df_test.loc[(df_test['ensemble_domaine_enccre'] != df_test['classification'])].shape"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(8597, 14)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 30
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "raw7PJrtMsDx"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "df_test['class_is_true'] = df_test['ensemble_domaine_enccre'] == df_test['classification']"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 310
+        },
+        "id": "qDD13-3dOSgK",
+        "outputId": "a309b603-8179-48ff-ad55-f3599f0dc699"
+      },
+      "source": [
+        "df_test.head()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>normClass</th>\n",
+              "      <th>classEDdA</th>\n",
+              "      <th>author</th>\n",
+              "      <th>id_enccre</th>\n",
+              "      <th>domaine_enccre</th>\n",
+              "      <th>ensemble_domaine_enccre</th>\n",
+              "      <th>content</th>\n",
+              "      <th>contentWithoutClass</th>\n",
+              "      <th>firstParagraph</th>\n",
+              "      <th>nb_word</th>\n",
+              "      <th>classification</th>\n",
+              "      <th>class_is_true</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>1</td>\n",
+              "      <td>5</td>\n",
+              "      <td>A, a &amp; a</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>ordre EncyclopÃ©d. Entend. Science de l'homme, ...</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>v1-1-0</td>\n",
+              "      <td>grammaire</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+              "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+              "      <td>A, a &amp; a s.m. (ordre EncyclopÃ©d.\\nEntend. Scie...</td>\n",
+              "      <td>711</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>1</td>\n",
+              "      <td>6</td>\n",
+              "      <td>A</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>v1-1-1</td>\n",
+              "      <td>grammaire</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+              "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+              "      <td>A, mot, est 1. la troisieme personne du prÃ©sen...</td>\n",
+              "      <td>238</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>1</td>\n",
+              "      <td>7</td>\n",
+              "      <td>A</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Dumarsais</td>\n",
+              "      <td>v1-1-2</td>\n",
+              "      <td>grammaire</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+              "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+              "      <td>A, prÃ©position vient du latin Ã , Ã  dextris, Ã  ...</td>\n",
+              "      <td>1980</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>1</td>\n",
+              "      <td>10</td>\n",
+              "      <td>A, numismatique ou monÃ©taire</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>v1-1-5</td>\n",
+              "      <td>numismatique</td>\n",
+              "      <td>MÃ©dailles</td>\n",
+              "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+              "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+              "      <td>A, numismatique ou monÃ©taire, sur le revers de...</td>\n",
+              "      <td>112</td>\n",
+              "      <td>MÃ©dailles</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>1</td>\n",
+              "      <td>11</td>\n",
+              "      <td>A, lapidaire</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>v1-1-6</td>\n",
+              "      <td>inscriptions</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+              "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+              "      <td>A, lapidaire, dans les anciennes inscriptions ...</td>\n",
+              "      <td>80</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>True</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   volume  numero  ... classification class_is_true\n",
+              "3       1       5  ...      Grammaire          True\n",
+              "4       1       6  ...      Grammaire          True\n",
+              "5       1       7  ...      Grammaire          True\n",
+              "8       1      10  ...      MÃ©dailles          True\n",
+              "9       1      11  ...       Histoire          True\n",
+              "\n",
+              "[5 rows x 15 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 32
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qsAd_w_iO9LZ"
+      },
+      "source": [
+        "df_test.to_csv('result_classification_sgdtfidf_21.11.24.csv', index=False)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "H4XfLD3EaaTe",
+        "outputId": "50c60efb-6670-4bd2-8c2d-f7c309fb0932"
+      },
+      "source": [
+        "df_test.loc[(df_test['ensemble_domaine_enccre'] == 'GÃ©ographie') & (df_test['class_is_true'] == False )].shape"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(95, 15)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 32
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "J3Nbs6zMCnWh"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "s6xTROC7CnZA"
+      },
+      "source": [
+        "## test de sortie des scores (proba) pour chaque classe"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1TyETcoyCnbU"
+      },
+      "source": [
+        "y_pred_proba = clf.predict_proba(vec_data)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "2W4i8nrLC61s",
+        "outputId": "86373732-4a06-487f-db1b-0a2e867974fa"
+      },
+      "source": [
+        "clf.classes_"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array(['Agriculture - Economie rustique', 'Anatomie', 'AntiquitÃ©',\n",
+              "       'Architecture', 'Arts et mÃ©tiers', 'Beaux-arts',\n",
+              "       'Belles-lettres - PoÃ©sie', 'Blason', 'CaractÃ¨res', 'Chasse',\n",
+              "       'Chimie', 'Commerce', 'Droit - Jurisprudence',\n",
+              "       'Economie domestique', 'Grammaire', 'GÃ©ographie', 'Histoire',\n",
+              "       'Histoire naturelle', 'Jeu', 'Marine', 'MarÃ©chage - ManÃ¨ge',\n",
+              "       'MathÃ©matiques', 'Mesure', 'Militaire (Art) - Guerre - Arme',\n",
+              "       'MinÃ©ralogie', 'Monnaie', 'Musique', 'MÃ©dailles',\n",
+              "       'MÃ©decine - Chirurgie', 'MÃ©tiers', 'Pharmacie', 'Philosophie',\n",
+              "       'Physique - [Sciences physico-mathÃ©matiques]', 'Politique',\n",
+              "       'PÃªche', 'Religion', 'Spectacle', 'Superstition'], dtype='<U43')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 47
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 73
+        },
+        "id": "tiecHJyTC66o",
+        "outputId": "bf846387-9964-418d-d122-9bc032c60266"
+      },
+      "source": [
+        "data_eval[0]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "\"\\nLes pins ont encore le mÃ©rite de l'agrÃ©ment ; ils\\nconservent pendant toute l'annÃ©e leurs feuilles, qui\\ndans la plÃ»part des especes sont d'une trÃ¨s-belle verdure.\\nCes arbres sont d'une belle stature, & d'un accroissement \\nrÃ©gulier ; ils ne sont sujets ni aux insectes,\\n\\n\\nni Ã  aucune maladie ; enfin plusieurs de ces pins sont\\nde la plus belle apparence au printems, par la couleur \\nvive des chatons dont ils sont chargÃ©s. Voyez sur\\nla culture du pin, le dictionnaire des Jardiniers de\\nM. Miller, & pour tous Ã©gards, le traitÃ© des arbres\\nde M. Duhamel, qui est entrÃ© dans des dÃ©tails intÃ©ressans \\nsur cet arbre.\\n\""
+            ]
+          },
+          "metadata": {},
+          "execution_count": 44
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cFkSivM2Cndt",
+        "outputId": "8fda16d7-04cc-4609-8fa6-7995a4ffd01c"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([0.38404935, 0.        , 0.        , 0.        , 0.        ,\n",
+              "       0.01376867, 0.10553505, 0.        , 0.        , 0.        ,\n",
+              "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
+              "       0.        , 0.00485592, 0.47335577, 0.        , 0.        ,\n",
+              "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
+              "       0.        , 0.        , 0.        , 0.01843524, 0.        ,\n",
+              "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
+              "       0.        , 0.        , 0.        ])"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 42
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 36
+        },
+        "id": "3dG5qbPoCngN",
+        "outputId": "0ad887fe-dd94-4d4d-856a-b45b8091d650"
+      },
+      "source": [
+        "clf.classes_[np.argmax(y_pred_proba[0], axis=0)]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'Histoire naturelle'"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 49
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qsrY1g6mCniF"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gFywr71BCnkt"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 645
+        },
+        "id": "_Gews6OdbN3d",
+        "outputId": "03b7bb01-51be-4d35-f090-84f02b697366"
+      },
+      "source": [
+        "df_test.loc[(df_test['ensemble_domaine_enccre'] == 'GÃ©ographie') & (df_test['class_is_true'] == False )].groupby(by=[\"classification\"]).size().reset_index(name='counts').sort_values(by='counts', ascending=False)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>classification</th>\n",
+              "      <th>counts</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>19</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "      <td>11</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>AntiquitÃ©</td>\n",
+              "      <td>10</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Belles-lettres - PoÃ©sie</td>\n",
+              "      <td>9</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>18</th>\n",
+              "      <td>Religion</td>\n",
+              "      <td>9</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17</th>\n",
+              "      <td>Physique - [Sciences physico-mathÃ©matiques]</td>\n",
+              "      <td>8</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>Droit - Jurisprudence</td>\n",
+              "      <td>5</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>Commerce</td>\n",
+              "      <td>4</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>4</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16</th>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>Marine</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>11</th>\n",
+              "      <td>MathÃ©matiques</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>14</th>\n",
+              "      <td>MÃ©dailles</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>MÃ©decine - Chirurgie</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Chimie</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Beaux-arts</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Architecture</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>12</th>\n",
+              "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>13</th>\n",
+              "      <td>Musique</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                                 classification  counts\n",
+              "8                                      Histoire      19\n",
+              "9                            Histoire naturelle      11\n",
+              "0                                     AntiquitÃ©      10\n",
+              "3                       Belles-lettres - PoÃ©sie       9\n",
+              "18                                     Religion       9\n",
+              "17  Physique - [Sciences physico-mathÃ©matiques]       8\n",
+              "6                         Droit - Jurisprudence       5\n",
+              "5                                      Commerce       4\n",
+              "7                                     Grammaire       4\n",
+              "16                                  Philosophie       3\n",
+              "10                                       Marine       2\n",
+              "11                                MathÃ©matiques       2\n",
+              "14                                    MÃ©dailles       2\n",
+              "15                         MÃ©decine - Chirurgie       2\n",
+              "4                                        Chimie       1\n",
+              "2                                    Beaux-arts       1\n",
+              "1                                  Architecture       1\n",
+              "12              Militaire (Art) - Guerre - Arme       1\n",
+              "13                                      Musique       1"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 39
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "IF_N5qRqdsmj"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "C_OcQ-uudso3"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dgFIEa0Pdsre"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "tHX62GU4dsue"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
-- 
GitLab