diff --git a/notebooks/Predict.ipynb b/notebooks/Predict.ipynb
index 1ea2070ad60244e6583b5d8c2da8849c4d135d62..a3dcfe00df981474d9c6b31e59b89d9f9e92f3bf 100644
--- a/notebooks/Predict.ipynb
+++ b/notebooks/Predict.ipynb
@@ -98,7 +98,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 40,
+      "execution_count": 9,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -146,7 +146,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 51,
+      "execution_count": 1,
       "metadata": {
         "id": "SkErnwgMMbRj"
       },
@@ -190,7 +190,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 42,
+      "execution_count": 2,
       "metadata": {
         "id": "M2awiee1r0zV"
       },
@@ -199,12 +199,12 @@
         "#drive_path = \"drive/MyDrive/Classification-EDdA/\"\n",
         "drive_path = \"../\"\n",
         "#path = \"/Users/lmoncla/git/gitlab.liris/GEODE/EDdA/output/\"\n",
-        "path = \"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/LGE/\"\n",
-        "\n",
+        "path = \"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/EDdA/\"\n",
+        "#path = \"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/LGE/\"\n",
         "\n",
         "#filepath = \"Parallel_datatset_articles_230215.tsv\"\n",
-        "#filepath = \"EDdA_dataset_articles.tsv\"\n",
-        "filepath = 'LGE_dataset_articles_230314.tsv'\n",
+        "filepath = \"EDdA_dataset_articles_221208.tsv\"\n",
+        "#filepath = 'LGE_dataset_articles_230314.tsv'\n",
         "\n",
         "corpus = 'lge'\n",
         "#corpus = ''"
@@ -212,7 +212,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 43,
+      "execution_count": 3,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -243,93 +243,138 @@
               "  <thead>\n",
               "    <tr style=\"text-align: right;\">\n",
               "      <th></th>\n",
-              "      <th>uid</th>\n",
-              "      <th>lge-volume</th>\n",
-              "      <th>lge-numero</th>\n",
-              "      <th>lge-head</th>\n",
-              "      <th>lge-page</th>\n",
-              "      <th>lge-id</th>\n",
-              "      <th>lge-content</th>\n",
-              "      <th>lge-nbWords</th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>author</th>\n",
+              "      <th>edda_class</th>\n",
+              "      <th>enccre_id</th>\n",
+              "      <th>enccre_class</th>\n",
+              "      <th>content</th>\n",
+              "      <th>content_without_designant</th>\n",
+              "      <th>first_paragraph</th>\n",
+              "      <th>nb_words</th>\n",
+              "      <th>super_domain</th>\n",
               "    </tr>\n",
               "  </thead>\n",
               "  <tbody>\n",
               "    <tr>\n",
               "      <th>0</th>\n",
-              "      <td>lge_1_a-0</td>\n",
               "      <td>1</td>\n",
               "      <td>1</td>\n",
-              "      <td>A</td>\n",
-              "      <td>0</td>\n",
-              "      <td>a-0</td>\n",
-              "      <td>A(Ling.). Son vocal et première lettre de notr...</td>\n",
-              "      <td>1761.0</td>\n",
+              "      <td>Title Page</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>151</td>\n",
+              "      <td>unclassified</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
-              "      <td>lge_1_a-1</td>\n",
               "      <td>1</td>\n",
               "      <td>2</td>\n",
-              "      <td>A</td>\n",
-              "      <td>1</td>\n",
-              "      <td>a-1</td>\n",
-              "      <td>A(Paléogr.). C’est à l’alphabet phénicien, on ...</td>\n",
-              "      <td>839.0</td>\n",
+              "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+              "      <td>Diderot &amp; d'Alembert</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>208</td>\n",
+              "      <td>unclassified</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>2</th>\n",
-              "      <td>lge_1_a-2</td>\n",
               "      <td>1</td>\n",
               "      <td>3</td>\n",
-              "      <td>A</td>\n",
-              "      <td>4</td>\n",
-              "      <td>a-2</td>\n",
-              "      <td>A(Log.). Cette voyelle désigne les proposition...</td>\n",
-              "      <td>56.0</td>\n",
+              "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+              "      <td>d'Alembert</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n</td>\n",
+              "      <td>44669</td>\n",
+              "      <td>unclassified</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
-              "      <td>lge_1_a-3</td>\n",
               "      <td>1</td>\n",
-              "      <td>4</td>\n",
-              "      <td>A</td>\n",
-              "      <td>4</td>\n",
-              "      <td>a-3</td>\n",
-              "      <td>A(Mus.). La lettre a est employée par les musi...</td>\n",
-              "      <td>267.0</td>\n",
+              "      <td>5</td>\n",
+              "      <td>A, a &amp; a</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>v1-1-0</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>711</td>\n",
+              "      <td>Philosophie</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>4</th>\n",
-              "      <td>lge_1_a-4</td>\n",
               "      <td>1</td>\n",
-              "      <td>5</td>\n",
+              "      <td>6</td>\n",
               "      <td>A</td>\n",
-              "      <td>4</td>\n",
-              "      <td>a-4</td>\n",
-              "      <td>A(Numis.). Dans la numismatique grecque, la le...</td>\n",
-              "      <td>67.0</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-1</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>238</td>\n",
+              "      <td>unclassified</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
               "</div>"
             ],
             "text/plain": [
-              "         uid  lge-volume  lge-numero lge-head  lge-page lge-id  \\\n",
-              "0  lge_1_a-0           1           1        A         0    a-0   \n",
-              "1  lge_1_a-1           1           2        A         1    a-1   \n",
-              "2  lge_1_a-2           1           3        A         4    a-2   \n",
-              "3  lge_1_a-3           1           4        A         4    a-3   \n",
-              "4  lge_1_a-4           1           5        A         4    a-4   \n",
+              "   volume  numero                                head                author  \\\n",
+              "0       1       1                          Title Page              unsigned   \n",
+              "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  Diderot & d'Alembert   \n",
+              "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS            d'Alembert   \n",
+              "3       1       5                            A, a & a            Dumarsais5   \n",
+              "4       1       6                                   A            Dumarsais5   \n",
+              "\n",
+              "     edda_class enccre_id enccre_class  \\\n",
+              "0  unclassified       NaN          NaN   \n",
+              "1  unclassified       NaN          NaN   \n",
+              "2  unclassified       NaN          NaN   \n",
+              "3     Grammaire    v1-1-0    Grammaire   \n",
+              "4  unclassified    v1-1-1    Grammaire   \n",
+              "\n",
+              "                                             content  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
               "\n",
-              "                                         lge-content  lge-nbWords  \n",
-              "0  A(Ling.). Son vocal et première lettre de notr...       1761.0  \n",
-              "1  A(Paléogr.). C’est à l’alphabet phénicien, on ...        839.0  \n",
-              "2  A(Log.). Cette voyelle désigne les proposition...         56.0  \n",
-              "3  A(Mus.). La lettre a est employée par les musi...        267.0  \n",
-              "4  A(Numis.). Dans la numismatique grecque, la le...         67.0  "
+              "                           content_without_designant  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
+              "\n",
+              "                                     first_paragraph  nb_words  super_domain  \n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...       151  unclassified  \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...       208  unclassified  \n",
+              "2       \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n     44669  unclassified  \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...       711   Philosophie  \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...       238  unclassified  "
             ]
           },
-          "execution_count": 43,
+          "execution_count": 3,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -341,13 +386,14 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 44,
+      "execution_count": 4,
       "metadata": {
         "id": "Ndw4UtgWt_MJ"
       },
       "outputs": [],
       "source": [
-        "dataset = df[corpus+'-content'].values"
+        "dataset = df['content'].values\n",
+        "#dataset = df[corpus+'-content'].values"
       ]
     },
     {
@@ -363,7 +409,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 45,
+      "execution_count": 5,
       "metadata": {
         "id": "0qDZ86qTr0zX"
       },
@@ -378,7 +424,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 6,
       "metadata": {
         "id": "KEljGX0br0zX"
       },
@@ -488,7 +534,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 46,
+      "execution_count": 7,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -575,7 +621,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 47,
+      "execution_count": 10,
       "metadata": {
         "id": "CN8EZst-r0zZ"
       },
@@ -608,7 +654,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 48,
+      "execution_count": 11,
       "metadata": {},
       "outputs": [
         {
@@ -635,14 +681,14 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 52,
+      "execution_count": 12,
       "metadata": {},
       "outputs": [
         {
           "name": "stderr",
           "output_type": "stream",
           "text": [
-            "134820it [1:07:31, 33.27it/s]\n"
+            "74190it [41:03, 30.12it/s]\n"
           ]
         }
       ],
@@ -659,7 +705,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 53,
+      "execution_count": 13,
       "metadata": {
         "id": "fo6k4li1r0za"
       },
@@ -685,7 +731,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 54,
+      "execution_count": 14,
       "metadata": {
         "id": "UU7qg7zVr0zb"
       },
@@ -698,7 +744,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 55,
+      "execution_count": 15,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -708,7 +754,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 56,
+      "execution_count": 16,
       "metadata": {
         "id": "w4eHpBztr0zb"
       },
@@ -724,7 +770,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 57,
+      "execution_count": 17,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -755,14 +801,18 @@
               "  <thead>\n",
               "    <tr style=\"text-align: right;\">\n",
               "      <th></th>\n",
-              "      <th>uid</th>\n",
-              "      <th>lge-volume</th>\n",
-              "      <th>lge-numero</th>\n",
-              "      <th>lge-head</th>\n",
-              "      <th>lge-page</th>\n",
-              "      <th>lge-id</th>\n",
-              "      <th>lge-content</th>\n",
-              "      <th>lge-nbWords</th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>author</th>\n",
+              "      <th>edda_class</th>\n",
+              "      <th>enccre_id</th>\n",
+              "      <th>enccre_class</th>\n",
+              "      <th>content</th>\n",
+              "      <th>content_without_designant</th>\n",
+              "      <th>first_paragraph</th>\n",
+              "      <th>nb_words</th>\n",
+              "      <th>super_domain</th>\n",
               "      <th>lge-superdomainPred1</th>\n",
               "      <th>lge-superdomainProba1</th>\n",
               "      <th>lge-superdomainPred2</th>\n",
@@ -774,229 +824,305 @@
               "  <tbody>\n",
               "    <tr>\n",
               "      <th>0</th>\n",
-              "      <td>lge_1_a-0</td>\n",
               "      <td>1</td>\n",
               "      <td>1</td>\n",
-              "      <td>A</td>\n",
-              "      <td>0</td>\n",
-              "      <td>a-0</td>\n",
-              "      <td>A(Ling.). Son vocal et première lettre de notr...</td>\n",
-              "      <td>1761.0</td>\n",
+              "      <td>Title Page</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>151</td>\n",
+              "      <td>unclassified</td>\n",
               "      <td>Philosophie</td>\n",
-              "      <td>0.937586</td>\n",
+              "      <td>0.986489</td>\n",
               "      <td>Belles-lettres</td>\n",
-              "      <td>0.021192</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>0.012657</td>\n",
+              "      <td>0.002821</td>\n",
+              "      <td>Politique</td>\n",
+              "      <td>0.001780</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
-              "      <td>lge_1_a-1</td>\n",
               "      <td>1</td>\n",
               "      <td>2</td>\n",
-              "      <td>A</td>\n",
-              "      <td>1</td>\n",
-              "      <td>a-1</td>\n",
-              "      <td>A(Paléogr.). C’est à l’alphabet phénicien, on ...</td>\n",
-              "      <td>839.0</td>\n",
-              "      <td>Géographie</td>\n",
-              "      <td>0.992606</td>\n",
+              "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+              "      <td>Diderot &amp; d'Alembert</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>208</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>0.943809</td>\n",
               "      <td>Histoire</td>\n",
-              "      <td>0.002934</td>\n",
-              "      <td>Histoire naturelle</td>\n",
-              "      <td>0.001019</td>\n",
+              "      <td>0.014932</td>\n",
+              "      <td>Politique</td>\n",
+              "      <td>0.014871</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>2</th>\n",
-              "      <td>lge_1_a-2</td>\n",
               "      <td>1</td>\n",
               "      <td>3</td>\n",
-              "      <td>A</td>\n",
-              "      <td>4</td>\n",
-              "      <td>a-2</td>\n",
-              "      <td>A(Log.). Cette voyelle désigne les proposition...</td>\n",
-              "      <td>56.0</td>\n",
-              "      <td>Philosophie</td>\n",
-              "      <td>0.982367</td>\n",
+              "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+              "      <td>d'Alembert</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n</td>\n",
+              "      <td>44669</td>\n",
+              "      <td>unclassified</td>\n",
               "      <td>Belles-lettres</td>\n",
-              "      <td>0.004124</td>\n",
+              "      <td>0.926219</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>0.019612</td>\n",
               "      <td>Beaux-arts</td>\n",
-              "      <td>0.002203</td>\n",
+              "      <td>0.011769</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
-              "      <td>lge_1_a-3</td>\n",
               "      <td>1</td>\n",
-              "      <td>4</td>\n",
-              "      <td>A</td>\n",
-              "      <td>4</td>\n",
-              "      <td>a-3</td>\n",
-              "      <td>A(Mus.). La lettre a est employée par les musi...</td>\n",
-              "      <td>267.0</td>\n",
-              "      <td>Musique</td>\n",
-              "      <td>0.905895</td>\n",
+              "      <td>5</td>\n",
+              "      <td>A, a &amp; a</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>v1-1-0</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>711</td>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>0.978732</td>\n",
+              "      <td>Politique</td>\n",
+              "      <td>0.004091</td>\n",
               "      <td>Belles-lettres</td>\n",
-              "      <td>0.029459</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>0.014980</td>\n",
+              "      <td>0.002425</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>4</th>\n",
-              "      <td>lge_1_a-4</td>\n",
               "      <td>1</td>\n",
-              "      <td>5</td>\n",
+              "      <td>6</td>\n",
               "      <td>A</td>\n",
-              "      <td>4</td>\n",
-              "      <td>a-4</td>\n",
-              "      <td>A(Numis.). Dans la numismatique grecque, la le...</td>\n",
-              "      <td>67.0</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>0.986111</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-1</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>238</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>0.988337</td>\n",
               "      <td>Belles-lettres</td>\n",
-              "      <td>0.003949</td>\n",
-              "      <td>Géographie</td>\n",
-              "      <td>0.001527</td>\n",
+              "      <td>0.003174</td>\n",
+              "      <td>Beaux-arts</td>\n",
+              "      <td>0.001221</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>5</th>\n",
-              "      <td>lge_1_aa-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>6</td>\n",
-              "      <td>AA</td>\n",
-              "      <td>4</td>\n",
-              "      <td>aa-0</td>\n",
-              "      <td>AA. Ces deux lettres désignent l’atelier monét...</td>\n",
-              "      <td>14.0</td>\n",
-              "      <td>Commerce</td>\n",
-              "      <td>0.986866</td>\n",
-              "      <td>Droit Jurisprudence</td>\n",
-              "      <td>0.002140</td>\n",
-              "      <td>Politique</td>\n",
-              "      <td>0.001812</td>\n",
+              "      <td>7</td>\n",
+              "      <td>A</td>\n",
+              "      <td>Dumarsais</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-2</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, préposition vient du latin à, à dextris, ...</td>\n",
+              "      <td>\\nA, préposition vient du latin à, à dextris, ...</td>\n",
+              "      <td>\\nA, préposition vient du latin à, à dextris, ...</td>\n",
+              "      <td>1980</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>0.988102</td>\n",
+              "      <td>Belles-lettres</td>\n",
+              "      <td>0.002661</td>\n",
+              "      <td>Beaux-arts</td>\n",
+              "      <td>0.001391</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>6</th>\n",
-              "      <td>lge_1_aa-1</td>\n",
               "      <td>1</td>\n",
-              "      <td>7</td>\n",
-              "      <td>AA</td>\n",
-              "      <td>4</td>\n",
-              "      <td>aa-1</td>\n",
-              "      <td>AA. Nom de plusieurs cours d’eau de l’Europe o...</td>\n",
-              "      <td>75.0</td>\n",
-              "      <td>Géographie</td>\n",
-              "      <td>0.954104</td>\n",
+              "      <td>8</td>\n",
+              "      <td>A</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-3</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\nA, étoit une lettre numérale parmi les Ancie...</td>\n",
+              "      <td>\\nA, étoit une lettre numérale parmi les Ancie...</td>\n",
+              "      <td>\\nA, étoit une lettre numérale parmi les Ancie...</td>\n",
+              "      <td>200</td>\n",
+              "      <td>unclassified</td>\n",
               "      <td>Histoire</td>\n",
-              "      <td>0.025117</td>\n",
-              "      <td>Histoire naturelle</td>\n",
-              "      <td>0.008872</td>\n",
+              "      <td>0.631214</td>\n",
+              "      <td>Belles-lettres</td>\n",
+              "      <td>0.320553</td>\n",
+              "      <td>Physique</td>\n",
+              "      <td>0.007173</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>7</th>\n",
-              "      <td>lge_1_aa-2</td>\n",
               "      <td>1</td>\n",
-              "      <td>8</td>\n",
-              "      <td>AA</td>\n",
-              "      <td>5</td>\n",
-              "      <td>aa-2</td>\n",
-              "      <td>AA. Rivière de France, prend sa source aux Tro...</td>\n",
-              "      <td>165.0</td>\n",
-              "      <td>Géographie</td>\n",
-              "      <td>0.998200</td>\n",
+              "      <td>9</td>\n",
+              "      <td>A, lettre symbolique</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-4</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\nA, lettre symbolique, étoit un hiéroglyphe c...</td>\n",
+              "      <td>\\nA, lettre symbolique, étoit un hiéroglyphe c...</td>\n",
+              "      <td>\\nA, lettre symbolique, étoit un hiéroglyphe c...</td>\n",
+              "      <td>82</td>\n",
+              "      <td>unclassified</td>\n",
               "      <td>Histoire</td>\n",
-              "      <td>0.000280</td>\n",
-              "      <td>Histoire naturelle</td>\n",
-              "      <td>0.000190</td>\n",
+              "      <td>0.979700</td>\n",
+              "      <td>Belles-lettres</td>\n",
+              "      <td>0.012630</td>\n",
+              "      <td>Religion</td>\n",
+              "      <td>0.001750</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>8</th>\n",
-              "      <td>lge_1_aa-3</td>\n",
               "      <td>1</td>\n",
-              "      <td>9</td>\n",
-              "      <td>AA</td>\n",
-              "      <td>5</td>\n",
-              "      <td>aa-3</td>\n",
-              "      <td>AA. Rivière de Hollande, affluent de la Dommel...</td>\n",
-              "      <td>17.0</td>\n",
-              "      <td>Géographie</td>\n",
-              "      <td>0.995858</td>\n",
-              "      <td>Histoire naturelle</td>\n",
-              "      <td>0.001078</td>\n",
+              "      <td>10</td>\n",
+              "      <td>A, numismatique ou monétaire</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-5</td>\n",
+              "      <td>Médailles</td>\n",
+              "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+              "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+              "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+              "      <td>112</td>\n",
+              "      <td>unclassified</td>\n",
               "      <td>Histoire</td>\n",
-              "      <td>0.000548</td>\n",
+              "      <td>0.947388</td>\n",
+              "      <td>Commerce</td>\n",
+              "      <td>0.027528</td>\n",
+              "      <td>Belles-lettres</td>\n",
+              "      <td>0.010894</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>9</th>\n",
-              "      <td>lge_1_aa-4</td>\n",
               "      <td>1</td>\n",
-              "      <td>10</td>\n",
-              "      <td>AA</td>\n",
-              "      <td>5</td>\n",
-              "      <td>aa-4</td>\n",
-              "      <td>AA. Nom de deux fleuves de la Russie. Le premi...</td>\n",
-              "      <td>71.0</td>\n",
-              "      <td>Géographie</td>\n",
-              "      <td>0.997916</td>\n",
+              "      <td>11</td>\n",
+              "      <td>A, lapidaire</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-6</td>\n",
               "      <td>Histoire</td>\n",
-              "      <td>0.000561</td>\n",
-              "      <td>Militaire</td>\n",
-              "      <td>0.000186</td>\n",
+              "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+              "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+              "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+              "      <td>80</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>0.738804</td>\n",
+              "      <td>Belles-lettres</td>\n",
+              "      <td>0.193938</td>\n",
+              "      <td>Beaux-arts</td>\n",
+              "      <td>0.019706</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
               "</div>"
             ],
             "text/plain": [
-              "          uid  lge-volume  lge-numero lge-head  lge-page lge-id  \\\n",
-              "0   lge_1_a-0           1           1        A         0    a-0   \n",
-              "1   lge_1_a-1           1           2        A         1    a-1   \n",
-              "2   lge_1_a-2           1           3        A         4    a-2   \n",
-              "3   lge_1_a-3           1           4        A         4    a-3   \n",
-              "4   lge_1_a-4           1           5        A         4    a-4   \n",
-              "5  lge_1_aa-0           1           6       AA         4   aa-0   \n",
-              "6  lge_1_aa-1           1           7       AA         4   aa-1   \n",
-              "7  lge_1_aa-2           1           8       AA         5   aa-2   \n",
-              "8  lge_1_aa-3           1           9       AA         5   aa-3   \n",
-              "9  lge_1_aa-4           1          10       AA         5   aa-4   \n",
+              "   volume  numero                                head                author  \\\n",
+              "0       1       1                          Title Page              unsigned   \n",
+              "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  Diderot & d'Alembert   \n",
+              "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS            d'Alembert   \n",
+              "3       1       5                            A, a & a            Dumarsais5   \n",
+              "4       1       6                                   A            Dumarsais5   \n",
+              "5       1       7                                   A             Dumarsais   \n",
+              "6       1       8                                   A                Mallet   \n",
+              "7       1       9                A, lettre symbolique                Mallet   \n",
+              "8       1      10        A, numismatique ou monétaire                Mallet   \n",
+              "9       1      11                        A, lapidaire                Mallet   \n",
               "\n",
-              "                                         lge-content  lge-nbWords  \\\n",
-              "0  A(Ling.). Son vocal et première lettre de notr...       1761.0   \n",
-              "1  A(Paléogr.). C’est à l’alphabet phénicien, on ...        839.0   \n",
-              "2  A(Log.). Cette voyelle désigne les proposition...         56.0   \n",
-              "3  A(Mus.). La lettre a est employée par les musi...        267.0   \n",
-              "4  A(Numis.). Dans la numismatique grecque, la le...         67.0   \n",
-              "5  AA. Ces deux lettres désignent l’atelier monét...         14.0   \n",
-              "6  AA. Nom de plusieurs cours d’eau de l’Europe o...         75.0   \n",
-              "7  AA. Rivière de France, prend sa source aux Tro...        165.0   \n",
-              "8  AA. Rivière de Hollande, affluent de la Dommel...         17.0   \n",
-              "9  AA. Nom de deux fleuves de la Russie. Le premi...         71.0   \n",
+              "     edda_class enccre_id enccre_class  \\\n",
+              "0  unclassified       NaN          NaN   \n",
+              "1  unclassified       NaN          NaN   \n",
+              "2  unclassified       NaN          NaN   \n",
+              "3     Grammaire    v1-1-0    Grammaire   \n",
+              "4  unclassified    v1-1-1    Grammaire   \n",
+              "5  unclassified    v1-1-2    Grammaire   \n",
+              "6  unclassified    v1-1-3          NaN   \n",
+              "7  unclassified    v1-1-4          NaN   \n",
+              "8  unclassified    v1-1-5    Médailles   \n",
+              "9  unclassified    v1-1-6     Histoire   \n",
+              "\n",
+              "                                             content  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
+              "5  \\nA, préposition vient du latin à, à dextris, ...   \n",
+              "6  \\nA, étoit une lettre numérale parmi les Ancie...   \n",
+              "7  \\nA, lettre symbolique, étoit un hiéroglyphe c...   \n",
+              "8  \\nA, numismatique ou monétaire, sur le revers ...   \n",
+              "9  \\nA, lapidaire, dans les anciennes inscription...   \n",
+              "\n",
+              "                           content_without_designant  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
+              "5  \\nA, préposition vient du latin à, à dextris, ...   \n",
+              "6  \\nA, étoit une lettre numérale parmi les Ancie...   \n",
+              "7  \\nA, lettre symbolique, étoit un hiéroglyphe c...   \n",
+              "8  \\nA, numismatique ou monétaire, sur le revers ...   \n",
+              "9  \\nA, lapidaire, dans les anciennes inscription...   \n",
+              "\n",
+              "                                     first_paragraph  nb_words  super_domain  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...       151  unclassified   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...       208  unclassified   \n",
+              "2       \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n     44669  unclassified   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...       711   Philosophie   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...       238  unclassified   \n",
+              "5  \\nA, préposition vient du latin à, à dextris, ...      1980  unclassified   \n",
+              "6  \\nA, étoit une lettre numérale parmi les Ancie...       200  unclassified   \n",
+              "7  \\nA, lettre symbolique, étoit un hiéroglyphe c...        82  unclassified   \n",
+              "8  \\nA, numismatique ou monétaire, sur le revers ...       112  unclassified   \n",
+              "9  \\nA, lapidaire, dans les anciennes inscription...        80  unclassified   \n",
               "\n",
               "  lge-superdomainPred1  lge-superdomainProba1 lge-superdomainPred2  \\\n",
-              "0          Philosophie               0.937586       Belles-lettres   \n",
-              "1           Géographie               0.992606             Histoire   \n",
-              "2          Philosophie               0.982367       Belles-lettres   \n",
-              "3              Musique               0.905895       Belles-lettres   \n",
-              "4             Histoire               0.986111       Belles-lettres   \n",
-              "5             Commerce               0.986866  Droit Jurisprudence   \n",
-              "6           Géographie               0.954104             Histoire   \n",
-              "7           Géographie               0.998200             Histoire   \n",
-              "8           Géographie               0.995858   Histoire naturelle   \n",
-              "9           Géographie               0.997916             Histoire   \n",
+              "0          Philosophie               0.986489       Belles-lettres   \n",
+              "1          Philosophie               0.943809             Histoire   \n",
+              "2       Belles-lettres               0.926219             Histoire   \n",
+              "3          Philosophie               0.978732            Politique   \n",
+              "4          Philosophie               0.988337       Belles-lettres   \n",
+              "5          Philosophie               0.988102       Belles-lettres   \n",
+              "6             Histoire               0.631214       Belles-lettres   \n",
+              "7             Histoire               0.979700       Belles-lettres   \n",
+              "8             Histoire               0.947388             Commerce   \n",
+              "9             Histoire               0.738804       Belles-lettres   \n",
               "\n",
               "   lge-superdomainProba2 lge-superdomainPred3  lge-superdomainProba3  \n",
-              "0               0.021192             Histoire               0.012657  \n",
-              "1               0.002934   Histoire naturelle               0.001019  \n",
-              "2               0.004124           Beaux-arts               0.002203  \n",
-              "3               0.029459             Histoire               0.014980  \n",
-              "4               0.003949           Géographie               0.001527  \n",
-              "5               0.002140            Politique               0.001812  \n",
-              "6               0.025117   Histoire naturelle               0.008872  \n",
-              "7               0.000280   Histoire naturelle               0.000190  \n",
-              "8               0.001078             Histoire               0.000548  \n",
-              "9               0.000561            Militaire               0.000186  "
+              "0               0.002821            Politique               0.001780  \n",
+              "1               0.014932            Politique               0.014871  \n",
+              "2               0.019612           Beaux-arts               0.011769  \n",
+              "3               0.004091       Belles-lettres               0.002425  \n",
+              "4               0.003174           Beaux-arts               0.001221  \n",
+              "5               0.002661           Beaux-arts               0.001391  \n",
+              "6               0.320553             Physique               0.007173  \n",
+              "7               0.012630             Religion               0.001750  \n",
+              "8               0.027528       Belles-lettres               0.010894  \n",
+              "9               0.193938           Beaux-arts               0.019706  "
             ]
           },
-          "execution_count": 57,
+          "execution_count": 17,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -1007,22 +1133,40 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 58,
+      "execution_count": 18,
       "metadata": {
         "id": "J9rObbvVr0zc"
       },
       "outputs": [],
       "source": [
-        "#df.to_csv(drive_path + \"predictions/EDdA_dataset_articles_superdomainBERT_230313.tsv\", sep=\"\\t\")\n",
-        "df.to_csv(drive_path + \"predictions/LGE_dataset_articles_superdomainBERT_230321.tsv\", sep=\"\\t\", index=False)"
+        "df.to_csv(drive_path + \"predictions/EDdA_dataset_articles_superdomainBERT_230327.tsv\", sep=\"\\t\", index=False)\n",
+        "#df.to_csv(drive_path + \"predictions/LGE_dataset_articles_superdomainBERT_230321.tsv\", sep=\"\\t\", index=False)"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 59,
+      "execution_count": null,
       "metadata": {
         "id": "7TD1mbKj_fXH"
       },
+      "outputs": [],
+      "source": [
+        "df.loc[(df[corpus+'-superdomainProba1'] == 'Géographie')]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 21,
+      "metadata": {},
       "outputs": [
         {
           "data": {
@@ -1045,14 +1189,18 @@
               "  <thead>\n",
               "    <tr style=\"text-align: right;\">\n",
               "      <th></th>\n",
-              "      <th>uid</th>\n",
-              "      <th>lge-volume</th>\n",
-              "      <th>lge-numero</th>\n",
-              "      <th>lge-head</th>\n",
-              "      <th>lge-page</th>\n",
-              "      <th>lge-id</th>\n",
-              "      <th>lge-content</th>\n",
-              "      <th>lge-nbWords</th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>author</th>\n",
+              "      <th>edda_class</th>\n",
+              "      <th>enccre_id</th>\n",
+              "      <th>enccre_class</th>\n",
+              "      <th>content</th>\n",
+              "      <th>content_without_designant</th>\n",
+              "      <th>first_paragraph</th>\n",
+              "      <th>nb_words</th>\n",
+              "      <th>super_domain</th>\n",
               "      <th>lge-superdomainPred1</th>\n",
               "      <th>lge-superdomainProba1</th>\n",
               "      <th>lge-superdomainPred2</th>\n",
@@ -1062,43 +1210,61 @@
               "    </tr>\n",
               "  </thead>\n",
               "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>73362</th>\n",
+              "      <td>17</td>\n",
+              "      <td>2381</td>\n",
+              "      <td>WOLSTROPE</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>Géographie moderne</td>\n",
+              "      <td>v17-1454-0</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>\\nWOLSTROPE, (Géog. mod.) bourg d'Angleterre,\\...</td>\n",
+              "      <td>\\nWOLSTROPE,  bourg d'Angleterre,\\ndans le com...</td>\n",
+              "      <td>\\nWOLSTROPE,  bourg d'Angleterre,\\ndans le com...</td>\n",
+              "      <td>5530</td>\n",
+              "      <td>None</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>0.998638</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>0.00016</td>\n",
+              "      <td>Militaire</td>\n",
+              "      <td>0.000113</td>\n",
+              "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
               "</div>"
             ],
             "text/plain": [
-              "Empty DataFrame\n",
-              "Columns: [uid, lge-volume, lge-numero, lge-head, lge-page, lge-id, lge-content, lge-nbWords, lge-superdomainPred1, lge-superdomainProba1, lge-superdomainPred2, lge-superdomainProba2, lge-superdomainPred3, lge-superdomainProba3]\n",
-              "Index: []"
-            ]
-          },
-          "execution_count": 59,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df.loc[(df[corpus+'-superdomainProba1'] == 'Géographie')]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 60,
-      "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "(134820, 14)"
+              "       volume  numero       head    author          edda_class   enccre_id  \\\n",
+              "73362      17    2381  WOLSTROPE  Jaucourt  Géographie moderne  v17-1454-0   \n",
+              "\n",
+              "      enccre_class                                            content  \\\n",
+              "73362   Géographie  \\nWOLSTROPE, (Géog. mod.) bourg d'Angleterre,\\...   \n",
+              "\n",
+              "                               content_without_designant  \\\n",
+              "73362  \\nWOLSTROPE,  bourg d'Angleterre,\\ndans le com...   \n",
+              "\n",
+              "                                         first_paragraph  nb_words  \\\n",
+              "73362  \\nWOLSTROPE,  bourg d'Angleterre,\\ndans le com...      5530   \n",
+              "\n",
+              "      super_domain lge-superdomainPred1  lge-superdomainProba1  \\\n",
+              "73362         None           Géographie               0.998638   \n",
+              "\n",
+              "      lge-superdomainPred2  lge-superdomainProba2 lge-superdomainPred3  \\\n",
+              "73362             Histoire                0.00016            Militaire   \n",
+              "\n",
+              "       lge-superdomainProba3  \n",
+              "73362               0.000113  "
             ]
           },
-          "execution_count": 60,
+          "execution_count": 21,
           "metadata": {},
           "output_type": "execute_result"
         }
       ],
       "source": [
-        "df.shape"
+        "df[(df['head'] == 'WOLSTROPE')]"
       ]
     },
     {