From 3bba32055665ed0e66292d6d8028f0d2e873efea Mon Sep 17 00:00:00 2001
From: Ludovic Moncla <moncla.ludovic@gmail.com>
Date: Mon, 13 Mar 2023 23:02:10 +0100
Subject: [PATCH] Update Predict.ipynb

---
 notebooks/Predict.ipynb | 2816 +++++++++++----------------------------
 1 file changed, 757 insertions(+), 2059 deletions(-)

diff --git a/notebooks/Predict.ipynb b/notebooks/Predict.ipynb
index 6ccb683..0da102c 100644
--- a/notebooks/Predict.ipynb
+++ b/notebooks/Predict.ipynb
@@ -17,7 +17,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -25,56 +25,7 @@
         "id": "pwmZ5bBvgGNh",
         "outputId": "1a080856-4e47-4e1d-81d1-d38bb58948a5"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Collecting transformers==4.10.3\n",
-            "  Downloading transformers-4.10.3-py3-none-any.whl (2.8 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (2.25.1)\n",
-            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (1.21.6)\n",
-            "Collecting sacremoses\n",
-            "  Downloading sacremoses-0.0.53.tar.gz (880 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m880.6/880.6 KB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (23.0)\n",
-            "Collecting tokenizers<0.11,>=0.10.1\n",
-            "  Downloading tokenizers-0.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.3/3.3 MB\u001b[0m \u001b[31m57.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (4.64.1)\n",
-            "Collecting huggingface-hub>=0.0.12\n",
-            "  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m190.3/190.3 KB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (3.9.0)\n",
-            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (2022.6.2)\n",
-            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (6.0)\n",
-            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.0.12->transformers==4.10.3) (4.4.0)\n",
-            "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.10.3) (4.0.0)\n",
-            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.10.3) (1.24.3)\n",
-            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.10.3) (2.10)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.10.3) (2022.12.7)\n",
-            "Requirement already satisfied: six in /usr/local/lib/python3.8/dist-packages (from sacremoses->transformers==4.10.3) (1.15.0)\n",
-            "Requirement already satisfied: click in /usr/local/lib/python3.8/dist-packages (from sacremoses->transformers==4.10.3) (7.1.2)\n",
-            "Requirement already satisfied: joblib in /usr/local/lib/python3.8/dist-packages (from sacremoses->transformers==4.10.3) (1.2.0)\n",
-            "Building wheels for collected packages: sacremoses\n",
-            "  Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895260 sha256=1a6d3101ab60a657a64074bebed597b1987c115de1092b993a013ae317d882f9\n",
-            "  Stored in directory: /root/.cache/pip/wheels/82/ab/9b/c15899bf659ba74f623ac776e861cf2eb8608c1825ddec66a4\n",
-            "Successfully built sacremoses\n",
-            "Installing collected packages: tokenizers, sacremoses, huggingface-hub, transformers\n",
-            "Successfully installed huggingface-hub-0.12.0 sacremoses-0.0.53 tokenizers-0.10.3 transformers-4.10.3\n",
-            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Collecting sentencepiece\n",
-            "  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m33.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hInstalling collected packages: sentencepiece\n",
-            "Successfully installed sentencepiece-0.1.97\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "!pip install transformers==4.10.3\n",
         "!pip install sentencepiece"
@@ -91,7 +42,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -99,17 +50,7 @@
         "id": "WF0qFN_g3ekz",
         "outputId": "56e76858-932c-42fd-ace0-37bf11c7b4ce"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Your runtime has 27.3 gigabytes of available RAM\n",
-            "\n",
-            "You are using a high-RAM runtime!\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "from psutil import virtual_memory\n",
         "ram_gb = virtual_memory().total / 1e9\n",
@@ -132,7 +73,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -140,15 +81,7 @@
         "id": "vL0S-s9Uofvn",
         "outputId": "dbe3e901-da63-48b5-d8c6-b8cbda503fef"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Mounted at /content/drive\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "from google.colab import drive\n",
         "drive.mount('/content/drive')"
@@ -165,7 +98,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": 1,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -175,11 +108,10 @@
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
-            "There are 1 GPU(s) available.\n",
-            "We will use the GPU: Tesla T4\n"
+            "We will use the GPU\n"
           ]
         }
       ],
@@ -213,7 +145,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": 2,
       "metadata": {
         "id": "SkErnwgMMbRj"
       },
@@ -237,19 +169,20 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": 13,
       "metadata": {
         "id": "M2awiee1r0zV"
       },
       "outputs": [],
       "source": [
-        "drive_path = \"drive/MyDrive/Classification-EDdA/\"\n",
-        "path = \"./\""
+        "#drive_path = \"drive/MyDrive/Classification-EDdA/\"\n",
+        "drive_path = \"../\"\n",
+        "path = \"/Users/lmoncla/git/gitlab.liris/GEODE/EDdA/output/\""
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 11,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -257,25 +190,7 @@
         "id": "X1A_J8MGr0zV",
         "outputId": "ca5c966c-00a2-4d74-cd1c-576c18f98d3d"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "--2023-02-15 07:14:06--  https://geode.liris.cnrs.fr/EDdA-Classification/datasets/Parallel_datatset_articles_230215.tsv\n",
-            "Resolving geode.liris.cnrs.fr (geode.liris.cnrs.fr)... 134.214.142.28\n",
-            "Connecting to geode.liris.cnrs.fr (geode.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 42343065 (40M) [text/tab-separated-values]\n",
-            "Saving to: ‘Parallel_datatset_articles_230215.tsv’\n",
-            "\n",
-            "Parallel_datatset_a 100%[===================>]  40.38M  74.9MB/s    in 0.5s    \n",
-            "\n",
-            "2023-02-15 07:14:07 (74.9 MB/s) - ‘Parallel_datatset_articles_230215.tsv’ saved [42343065/42343065]\n",
-            "\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "#!wget https://geode.liris.cnrs.fr/files/datasets/EDdA/Classification/LGE_withContent.tsv\n",
         "#!wget https://geode.liris.cnrs.fr/EDdA-Classification/datasets/EDdA_dataset_articles_no_superdomain.tsv\n",
@@ -284,20 +199,21 @@
     },
     {
       "cell_type": "code",
-      "source": [
-        "#filepath = \"data/LGE_withContent.tsv\"\n",
-        "#filepath = \"EDdA_dataset_articles_no_superdomain.tsv\"\n",
-        "filepath = \"Parallel_datatset_articles_230215.tsv\""
-      ],
+      "execution_count": 4,
       "metadata": {
         "id": "eea7F4vato1x"
       },
-      "execution_count": 12,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "#filepath = \"data/LGE_withContent.tsv\"\n",
+        "#filepath = \"EDdA_dataset_articles_no_superdomain.tsv\"\n",
+        "#filepath = \"Parallel_datatset_articles_230215.tsv\"\n",
+        "filepath = \"EDdA_dataset_articles.tsv\""
+      ]
     },
     {
       "cell_type": "code",
-      "execution_count": 13,
+      "execution_count": 5,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -308,49 +224,9 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "             idLGE  tomeLGE  rankLGE  \\\n",
-              "0            aam-0        1       63   \n",
-              "1          abaco-0        1       92   \n",
-              "2         abacot-0        1       96   \n",
-              "3        abaddon-0        1      104   \n",
-              "4  abandonnement-0        1      138   \n",
-              "\n",
-              "                                          contentLGE  volumeEDdA  numeroEDdA  \\\n",
-              "0  AAM. Mesure de capacité pour les liquides en u...           1          31   \n",
-              "1  ABACO, architecte italien du xvi siècle (V. La...           1          42   \n",
-              "2  ABACOT. Double couronne que portaient autrefoi...           1          44   \n",
-              "3  ABADDONou APOLYON le Destructeur. « Elles\\nava...           1          46   \n",
-              "4  ABANDONNEMENT. I. Droit civil. — Ce mot est un...           1          75   \n",
-              "\n",
-              "        headEDdA  authorEDdA normclassEDdA  \\\n",
-              "0            AAM     Diderot  unclassified   \n",
-              "1          ABACO  d'Alembert  unclassified   \n",
-              "2         ABACOT     Diderot  unclassified   \n",
-              "3        ABADDON     Diderot  unclassified   \n",
-              "4  ABANDONNEMENT   Toussaint         Droit   \n",
-              "\n",
-              "                                         contentEDdA  nbWordsEDdA  \\\n",
-              "0  \\n* AAM, s. mesure des Liquides, en usage à Am...           18   \n",
-              "1  \\nABACO, s. m. Quelques anciens Auteurs se ser...           26   \n",
-              "2  \\n* ABACOT, s. m. nom de l'ancienne parure dè\\...           22   \n",
-              "3  \\n* ABADDON, s. m. vient d'abad, perdre. C'est...           25   \n",
-              "4  \\nABANDONNEMENT, s. m. en Droit, est le délais...           77   \n",
-              "\n",
-              "       superdomainEDdA  \n",
-              "0         Unclassified  \n",
-              "1         Unclassified  \n",
-              "2         Unclassified  \n",
-              "3         Unclassified  \n",
-              "4  Droit Jurisprudence  "
-            ],
             "text/html": [
-              "\n",
-              "  <div id=\"df-be30bfa5-3524-40b4-abed-43faebfa6628\">\n",
-              "    <div class=\"colab-df-container\">\n",
-              "      <div>\n",
+              "<div>\n",
               "<style scoped>\n",
               "    .dataframe tbody tr th:only-of-type {\n",
               "        vertical-align: middle;\n",
@@ -368,179 +244,140 @@
               "  <thead>\n",
               "    <tr style=\"text-align: right;\">\n",
               "      <th></th>\n",
-              "      <th>idLGE</th>\n",
-              "      <th>tomeLGE</th>\n",
-              "      <th>rankLGE</th>\n",
-              "      <th>contentLGE</th>\n",
-              "      <th>volumeEDdA</th>\n",
-              "      <th>numeroEDdA</th>\n",
-              "      <th>headEDdA</th>\n",
-              "      <th>authorEDdA</th>\n",
-              "      <th>normclassEDdA</th>\n",
-              "      <th>contentEDdA</th>\n",
-              "      <th>nbWordsEDdA</th>\n",
-              "      <th>superdomainEDdA</th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>author</th>\n",
+              "      <th>edda_class</th>\n",
+              "      <th>enccre_id</th>\n",
+              "      <th>enccre_class</th>\n",
+              "      <th>content</th>\n",
+              "      <th>content_without_designant</th>\n",
+              "      <th>first_paragraph</th>\n",
+              "      <th>nb_words</th>\n",
+              "      <th>super_domain</th>\n",
               "    </tr>\n",
               "  </thead>\n",
               "  <tbody>\n",
               "    <tr>\n",
               "      <th>0</th>\n",
-              "      <td>aam-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>63</td>\n",
-              "      <td>AAM. Mesure de capacité pour les liquides en u...</td>\n",
               "      <td>1</td>\n",
-              "      <td>31</td>\n",
-              "      <td>AAM</td>\n",
-              "      <td>Diderot</td>\n",
+              "      <td>Title Page</td>\n",
+              "      <td>unsigned</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\n* AAM, s. mesure des Liquides, en usage à Am...</td>\n",
-              "      <td>18</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>151</td>\n",
               "      <td>Unclassified</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
-              "      <td>abaco-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>92</td>\n",
-              "      <td>ABACO, architecte italien du xvi siècle (V. La...</td>\n",
-              "      <td>1</td>\n",
-              "      <td>42</td>\n",
-              "      <td>ABACO</td>\n",
-              "      <td>d'Alembert</td>\n",
+              "      <td>2</td>\n",
+              "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+              "      <td>Diderot &amp; d'Alembert</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\nABACO, s. m. Quelques anciens Auteurs se ser...</td>\n",
-              "      <td>26</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>208</td>\n",
               "      <td>Unclassified</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>2</th>\n",
-              "      <td>abacot-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>96</td>\n",
-              "      <td>ABACOT. Double couronne que portaient autrefoi...</td>\n",
-              "      <td>1</td>\n",
-              "      <td>44</td>\n",
-              "      <td>ABACOT</td>\n",
-              "      <td>Diderot</td>\n",
+              "      <td>3</td>\n",
+              "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+              "      <td>d'Alembert</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\n* ABACOT, s. m. nom de l'ancienne parure dè\\...</td>\n",
-              "      <td>22</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n</td>\n",
+              "      <td>44669</td>\n",
               "      <td>Unclassified</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
-              "      <td>abaddon-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>104</td>\n",
-              "      <td>ABADDONou APOLYON le Destructeur. « Elles\\nava...</td>\n",
-              "      <td>1</td>\n",
-              "      <td>46</td>\n",
-              "      <td>ABADDON</td>\n",
-              "      <td>Diderot</td>\n",
-              "      <td>unclassified</td>\n",
-              "      <td>\\n* ABADDON, s. m. vient d'abad, perdre. C'est...</td>\n",
-              "      <td>25</td>\n",
-              "      <td>Unclassified</td>\n",
+              "      <td>5</td>\n",
+              "      <td>A, a &amp; a</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>v1-1-0</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>711</td>\n",
+              "      <td>Philosophie</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>4</th>\n",
-              "      <td>abandonnement-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>138</td>\n",
-              "      <td>ABANDONNEMENT. I. Droit civil. — Ce mot est un...</td>\n",
               "      <td>1</td>\n",
-              "      <td>75</td>\n",
-              "      <td>ABANDONNEMENT</td>\n",
-              "      <td>Toussaint</td>\n",
-              "      <td>Droit</td>\n",
-              "      <td>\\nABANDONNEMENT, s. m. en Droit, est le délais...</td>\n",
-              "      <td>77</td>\n",
-              "      <td>Droit Jurisprudence</td>\n",
+              "      <td>6</td>\n",
+              "      <td>A</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-1</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>238</td>\n",
+              "      <td>Unclassified</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
-              "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-be30bfa5-3524-40b4-abed-43faebfa6628')\"\n",
-              "              title=\"Convert this dataframe to an interactive table.\"\n",
-              "              style=\"display:none;\">\n",
-              "        \n",
-              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-              "       width=\"24px\">\n",
-              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
-              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
-              "  </svg>\n",
-              "      </button>\n",
-              "      \n",
-              "  <style>\n",
-              "    .colab-df-container {\n",
-              "      display:flex;\n",
-              "      flex-wrap:wrap;\n",
-              "      gap: 12px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert {\n",
-              "      background-color: #E8F0FE;\n",
-              "      border: none;\n",
-              "      border-radius: 50%;\n",
-              "      cursor: pointer;\n",
-              "      display: none;\n",
-              "      fill: #1967D2;\n",
-              "      height: 32px;\n",
-              "      padding: 0 0 0 0;\n",
-              "      width: 32px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert:hover {\n",
-              "      background-color: #E2EBFA;\n",
-              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-              "      fill: #174EA6;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert {\n",
-              "      background-color: #3B4455;\n",
-              "      fill: #D2E3FC;\n",
-              "    }\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   volume  numero                                head                author  \\\n",
+              "0       1       1                          Title Page              unsigned   \n",
+              "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  Diderot & d'Alembert   \n",
+              "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS            d'Alembert   \n",
+              "3       1       5                            A, a & a            Dumarsais5   \n",
+              "4       1       6                                   A            Dumarsais5   \n",
               "\n",
-              "    [theme=dark] .colab-df-convert:hover {\n",
-              "      background-color: #434B5C;\n",
-              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-              "      fill: #FFFFFF;\n",
-              "    }\n",
-              "  </style>\n",
+              "     edda_class enccre_id enccre_class  \\\n",
+              "0  unclassified       NaN          NaN   \n",
+              "1  unclassified       NaN          NaN   \n",
+              "2  unclassified       NaN          NaN   \n",
+              "3     Grammaire    v1-1-0    Grammaire   \n",
+              "4  unclassified    v1-1-1    Grammaire   \n",
               "\n",
-              "      <script>\n",
-              "        const buttonEl =\n",
-              "          document.querySelector('#df-be30bfa5-3524-40b4-abed-43faebfa6628 button.colab-df-convert');\n",
-              "        buttonEl.style.display =\n",
-              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "                                             content  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
               "\n",
-              "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-be30bfa5-3524-40b4-abed-43faebfa6628');\n",
-              "          const dataTable =\n",
-              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
-              "                                                     [key], {});\n",
-              "          if (!dataTable) return;\n",
+              "                           content_without_designant  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
               "\n",
-              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
-              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
-              "            + ' to learn more about interactive tables.';\n",
-              "          element.innerHTML = '';\n",
-              "          dataTable['output_type'] = 'display_data';\n",
-              "          await google.colab.output.renderOutput(dataTable, element);\n",
-              "          const docLink = document.createElement('div');\n",
-              "          docLink.innerHTML = docLinkHtml;\n",
-              "          element.appendChild(docLink);\n",
-              "        }\n",
-              "      </script>\n",
-              "    </div>\n",
-              "  </div>\n",
-              "  "
+              "                                     first_paragraph  nb_words  super_domain  \n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...       151  Unclassified  \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...       208  Unclassified  \n",
+              "2       \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n     44669  Unclassified  \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...       711   Philosophie  \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...       238  Unclassified  "
             ]
           },
+          "execution_count": 5,
           "metadata": {},
-          "execution_count": 13
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -550,16 +387,16 @@
     },
     {
       "cell_type": "code",
-      "source": [
-        "corpus = 'LGE'\n",
-        "#corpus = 'EDdA'\n",
-        "data = df['content'+corpus].values\n"
-      ],
+      "execution_count": 6,
       "metadata": {
         "id": "Ndw4UtgWt_MJ"
       },
-      "execution_count": 28,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "#corpus = 'LGE'\n",
+        "corpus = ''\n",
+        "data = df['content'+corpus].values\n"
+      ]
     },
     {
       "cell_type": "markdown",
@@ -574,7 +411,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 16,
+      "execution_count": 14,
       "metadata": {
         "id": "0qDZ86qTr0zX"
       },
@@ -589,7 +426,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 17,
+      "execution_count": 9,
       "metadata": {
         "id": "KEljGX0br0zX"
       },
@@ -695,7 +532,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 18,
+      "execution_count": 10,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -752,67 +589,11 @@
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Loading Bert Tokenizer...\n"
           ]
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/996k [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "11c285bed74e46a08fbb7bf88715aafa"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "9be44ba364a344f2b6b2546ae9d61ba8"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/1.96M [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "aa6a7a9106554f85a91150bd65c271d0"
-            }
-          },
-          "metadata": {}
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "4c46904f8e944d2b834ba9d384b00a8c"
-            }
-          },
-          "metadata": {}
         }
       ],
       "source": [
@@ -826,11 +607,19 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 29,
+      "execution_count": 11,
       "metadata": {
         "id": "-O6NspVTr0zZ"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Token indices sequence length is longer than the specified maximum sequence length for this model (75311 > 512). Running this sequence through the model will result in indexing errors\n"
+          ]
+        }
+      ],
       "source": [
         "data_loader = generate_dataloader(tokenizer, data)"
       ]
@@ -849,7 +638,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 30,
+      "execution_count": 15,
       "metadata": {
         "id": "CN8EZst-r0zZ"
       },
@@ -858,12 +647,13 @@
         "#model = torch.load(model_path, map_location=torch.device('mps'))\n",
         "#model.load_state_dict(torch.load(model_path, map_location=torch.device('mps')))\n",
         "\n",
-        "model = BertForSequenceClassification.from_pretrained(model_path).to(\"cuda\")"
+        "#model = BertForSequenceClassification.from_pretrained(model_path).to(\"cuda\")\n",
+        "model = BertForSequenceClassification.from_pretrained(model_path).to(\"mps\")"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 31,
+      "execution_count": 16,
       "metadata": {
         "id": "_fzgS5USJeAF"
       },
@@ -874,7 +664,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 32,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -882,1029 +672,28 @@
         "id": "ISkijyclr0za",
         "outputId": "8120e858-9950-4380-f887-70ca47360c76"
       },
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[4,\n",
-              " 1,\n",
-              " 7,\n",
-              " 16,\n",
-              " 5,\n",
-              " 7,\n",
-              " 7,\n",
-              " 8,\n",
-              " 6,\n",
-              " 6,\n",
-              " 0,\n",
-              " 9,\n",
-              " 7,\n",
-              " 5,\n",
-              " 6,\n",
-              " 3,\n",
-              " 11,\n",
-              " 7,\n",
-              " 11,\n",
-              " 9,\n",
-              " 12,\n",
-              " 5,\n",
-              " 5,\n",
-              " 13,\n",
-              " 9,\n",
-              " 16,\n",
-              " 6,\n",
-              " 5,\n",
-              " 9,\n",
-              " 1,\n",
-              " 7,\n",
-              " 11,\n",
-              " 4,\n",
-              " 5,\n",
-              " 6,\n",
-              " 8,\n",
-              " 14,\n",
-              " 1,\n",
-              " 8,\n",
-              " 13,\n",
-              " 14,\n",
-              " 16,\n",
-              " 16,\n",
-              " 13,\n",
-              " 8,\n",
-              " 8,\n",
-              " 8,\n",
-              " 8,\n",
-              " 6,\n",
-              " 8,\n",
-              " 13,\n",
-              " 10,\n",
-              " 13,\n",
-              " 5,\n",
-              " 5,\n",
-              " 13,\n",
-              " 13,\n",
-              " 2,\n",
-              " 1,\n",
-              " 14,\n",
-              " 4,\n",
-              " 13,\n",
-              " 7,\n",
-              " 0,\n",
-              " 1,\n",
-              " 11,\n",
-              " 12,\n",
-              " 9,\n",
-              " 10,\n",
-              " 7,\n",
-              " 12,\n",
-              " 3,\n",
-              " 9,\n",
-              " 5,\n",
-              " 5,\n",
-              " 13,\n",
-              " 11,\n",
-              " 8,\n",
-              " 7,\n",
-              " 6,\n",
-              " 4,\n",
-              " 7,\n",
-              " 7,\n",
-              " 7,\n",
-              " 11,\n",
-              " 7,\n",
-              " 14,\n",
-              " 6,\n",
-              " 5,\n",
-              " 5,\n",
-              " 5,\n",
-              " 4,\n",
-              " 16,\n",
-              " 2,\n",
-              " 13,\n",
-              " 7,\n",
-              " 14,\n",
-              " 2,\n",
-              " 10,\n",
-              " 7,\n",
-              " 8,\n",
-              " 14,\n",
-              " 5,\n",
-              " 1,\n",
-              " 6,\n",
-              " 16,\n",
-              " 14,\n",
-              " 13,\n",
-              " 6,\n",
-              " 7,\n",
-              " 5,\n",
-              " 5,\n",
-              " 11,\n",
-              " 5,\n",
-              " 0,\n",
-              " 6,\n",
-              " 5,\n",
-              " 13,\n",
-              " 9,\n",
-              " 4,\n",
-              " 8,\n",
-              " 7,\n",
-              " 6,\n",
-              " 5,\n",
-              " 13,\n",
-              " 6,\n",
-              " 5,\n",
-              " 5,\n",
-              " 5,\n",
-              " 7,\n",
-              " 11,\n",
-              " 2,\n",
-              " 7,\n",
-              " 8,\n",
-              " 7,\n",
-              " 13,\n",
-              " 5,\n",
-              " 4,\n",
-              " 8,\n",
-              " 6,\n",
-              " 6,\n",
-              " 5,\n",
-              " 12,\n",
-              " 8,\n",
-              " 7,\n",
-              " 13,\n",
-              " 6,\n",
-              " 7,\n",
-              " 9,\n",
-              " 10,\n",
-              " 13,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 9,\n",
-              " 9,\n",
-              " 8,\n",
-              " 8,\n",
-              " 6,\n",
-              " 8,\n",
-              " 13,\n",
-              " 14,\n",
-              " 11,\n",
-              " 13,\n",
-              " 6,\n",
-              " 1,\n",
-              " 11,\n",
-              " 1,\n",
-              " 4,\n",
-              " 8,\n",
-              " 6,\n",
-              " 1,\n",
-              " 9,\n",
-              " 2,\n",
-              " 8,\n",
-              " 6,\n",
-              " 5,\n",
-              " 4,\n",
-              " 8,\n",
-              " 7,\n",
-              " 4,\n",
-              " 7,\n",
-              " 14,\n",
-              " 14,\n",
-              " 8,\n",
-              " 7,\n",
-              " 7,\n",
-              " 16,\n",
-              " 6,\n",
-              " 13,\n",
-              " 9,\n",
-              " 9,\n",
-              " 9,\n",
-              " 16,\n",
-              " 6,\n",
-              " 6,\n",
-              " 14,\n",
-              " 6,\n",
-              " 8,\n",
-              " 6,\n",
-              " 14,\n",
-              " 7,\n",
-              " 8,\n",
-              " 5,\n",
-              " 6,\n",
-              " 6,\n",
-              " 14,\n",
-              " 14,\n",
-              " 6,\n",
-              " 0,\n",
-              " 4,\n",
-              " 10,\n",
-              " 6,\n",
-              " 10,\n",
-              " 14,\n",
-              " 8,\n",
-              " 6,\n",
-              " 2,\n",
-              " 3,\n",
-              " 8,\n",
-              " 3,\n",
-              " 2,\n",
-              " 8,\n",
-              " 8,\n",
-              " 13,\n",
-              " 7,\n",
-              " 6,\n",
-              " 7,\n",
-              " 4,\n",
-              " 8,\n",
-              " 8,\n",
-              " 6,\n",
-              " 13,\n",
-              " 11,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 13,\n",
-              " 5,\n",
-              " 9,\n",
-              " 12,\n",
-              " 11,\n",
-              " 7,\n",
-              " 2,\n",
-              " 11,\n",
-              " 8,\n",
-              " 3,\n",
-              " 3,\n",
-              " 9,\n",
-              " 2,\n",
-              " 8,\n",
-              " 7,\n",
-              " 5,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 13,\n",
-              " 9,\n",
-              " 6,\n",
-              " 14,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 8,\n",
-              " 6,\n",
-              " 5,\n",
-              " 6,\n",
-              " 7,\n",
-              " 3,\n",
-              " 8,\n",
-              " 7,\n",
-              " 5,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 5,\n",
-              " 5,\n",
-              " 8,\n",
-              " 14,\n",
-              " 6,\n",
-              " 6,\n",
-              " 7,\n",
-              " 11,\n",
-              " 8,\n",
-              " 7,\n",
-              " 6,\n",
-              " 1,\n",
-              " 7,\n",
-              " 5,\n",
-              " 2,\n",
-              " 11,\n",
-              " 6,\n",
-              " 11,\n",
-              " 16,\n",
-              " 2,\n",
-              " 7,\n",
-              " 2,\n",
-              " 4,\n",
-              " 2,\n",
-              " 7,\n",
-              " 13,\n",
-              " 6,\n",
-              " 11,\n",
-              " 13,\n",
-              " 13,\n",
-              " 2,\n",
-              " 13,\n",
-              " 11,\n",
-              " 11,\n",
-              " 6,\n",
-              " 11,\n",
-              " 6,\n",
-              " 8,\n",
-              " 8,\n",
-              " 1,\n",
-              " 6,\n",
-              " 9,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 7,\n",
-              " 0,\n",
-              " 6,\n",
-              " 11,\n",
-              " 6,\n",
-              " 7,\n",
-              " 11,\n",
-              " 6,\n",
-              " 6,\n",
-              " 7,\n",
-              " 12,\n",
-              " 9,\n",
-              " 11,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 8,\n",
-              " 14,\n",
-              " 6,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 5,\n",
-              " 7,\n",
-              " 6,\n",
-              " 2,\n",
-              " 7,\n",
-              " 6,\n",
-              " 14,\n",
-              " 5,\n",
-              " 8,\n",
-              " 14,\n",
-              " 8,\n",
-              " 11,\n",
-              " 9,\n",
-              " 9,\n",
-              " 11,\n",
-              " 2,\n",
-              " 14,\n",
-              " 7,\n",
-              " 9,\n",
-              " 7,\n",
-              " 8,\n",
-              " 16,\n",
-              " 11,\n",
-              " 13,\n",
-              " 14,\n",
-              " 5,\n",
-              " 6,\n",
-              " 6,\n",
-              " 14,\n",
-              " 10,\n",
-              " 7,\n",
-              " 7,\n",
-              " 8,\n",
-              " 6,\n",
-              " 2,\n",
-              " 7,\n",
-              " 6,\n",
-              " 13,\n",
-              " 13,\n",
-              " 10,\n",
-              " 6,\n",
-              " 11,\n",
-              " 16,\n",
-              " 6,\n",
-              " 6,\n",
-              " 12,\n",
-              " 2,\n",
-              " 6,\n",
-              " 11,\n",
-              " 13,\n",
-              " 6,\n",
-              " 11,\n",
-              " 2,\n",
-              " 6,\n",
-              " 5,\n",
-              " 13,\n",
-              " 7,\n",
-              " 6,\n",
-              " 11,\n",
-              " 11,\n",
-              " 7,\n",
-              " 6,\n",
-              " 14,\n",
-              " 8,\n",
-              " 8,\n",
-              " 7,\n",
-              " 7,\n",
-              " 7,\n",
-              " 2,\n",
-              " 7,\n",
-              " 7,\n",
-              " 7,\n",
-              " 6,\n",
-              " 7,\n",
-              " 16,\n",
-              " 2,\n",
-              " 2,\n",
-              " 11,\n",
-              " 11,\n",
-              " 10,\n",
-              " 11,\n",
-              " 16,\n",
-              " 3,\n",
-              " 16,\n",
-              " 11,\n",
-              " 7,\n",
-              " 5,\n",
-              " 5,\n",
-              " 3,\n",
-              " 6,\n",
-              " 8,\n",
-              " 1,\n",
-              " 11,\n",
-              " 6,\n",
-              " 13,\n",
-              " 14,\n",
-              " 5,\n",
-              " 5,\n",
-              " 12,\n",
-              " 9,\n",
-              " 14,\n",
-              " 5,\n",
-              " 13,\n",
-              " 6,\n",
-              " 8,\n",
-              " 11,\n",
-              " 14,\n",
-              " 8,\n",
-              " 9,\n",
-              " 7,\n",
-              " 7,\n",
-              " 6,\n",
-              " 3,\n",
-              " 1,\n",
-              " 1,\n",
-              " 6,\n",
-              " 14,\n",
-              " 6,\n",
-              " 5,\n",
-              " 13,\n",
-              " 6,\n",
-              " 8,\n",
-              " 12,\n",
-              " 1,\n",
-              " 6,\n",
-              " 7,\n",
-              " 3,\n",
-              " 7,\n",
-              " 16,\n",
-              " 14,\n",
-              " 3,\n",
-              " 7,\n",
-              " 10,\n",
-              " 5,\n",
-              " 7,\n",
-              " 7,\n",
-              " 7,\n",
-              " 7,\n",
-              " 9,\n",
-              " 7,\n",
-              " 3,\n",
-              " 1,\n",
-              " 1,\n",
-              " 1,\n",
-              " 1,\n",
-              " 5,\n",
-              " 10,\n",
-              " 5,\n",
-              " 7,\n",
-              " 12,\n",
-              " 12,\n",
-              " 6,\n",
-              " 14,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 8,\n",
-              " 6,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 1,\n",
-              " 6,\n",
-              " 8,\n",
-              " 7,\n",
-              " 14,\n",
-              " 8,\n",
-              " 7,\n",
-              " 2,\n",
-              " 12,\n",
-              " 7,\n",
-              " 16,\n",
-              " 6,\n",
-              " 10,\n",
-              " 8,\n",
-              " 7,\n",
-              " 14,\n",
-              " 6,\n",
-              " 9,\n",
-              " 1,\n",
-              " 9,\n",
-              " 9,\n",
-              " 16,\n",
-              " 13,\n",
-              " 5,\n",
-              " 7,\n",
-              " 6,\n",
-              " 9,\n",
-              " 7,\n",
-              " 6,\n",
-              " 11,\n",
-              " 8,\n",
-              " 9,\n",
-              " 9,\n",
-              " 5,\n",
-              " 2,\n",
-              " 5,\n",
-              " 5,\n",
-              " 9,\n",
-              " 3,\n",
-              " 0,\n",
-              " 5,\n",
-              " 8,\n",
-              " 7,\n",
-              " 2,\n",
-              " 2,\n",
-              " 7,\n",
-              " 11,\n",
-              " 11,\n",
-              " 13,\n",
-              " 13,\n",
-              " 14,\n",
-              " 3,\n",
-              " 13,\n",
-              " 1,\n",
-              " 6,\n",
-              " 7,\n",
-              " 7,\n",
-              " 14,\n",
-              " 7,\n",
-              " 11,\n",
-              " 8,\n",
-              " 16,\n",
-              " 6,\n",
-              " 6,\n",
-              " 1,\n",
-              " 8,\n",
-              " 13,\n",
-              " 7,\n",
-              " 8,\n",
-              " 4,\n",
-              " 11,\n",
-              " 6,\n",
-              " 7,\n",
-              " 5,\n",
-              " 5,\n",
-              " 5,\n",
-              " 4,\n",
-              " 5,\n",
-              " 6,\n",
-              " 5,\n",
-              " 8,\n",
-              " 2,\n",
-              " 13,\n",
-              " 6,\n",
-              " 13,\n",
-              " 12,\n",
-              " 16,\n",
-              " 8,\n",
-              " 14,\n",
-              " 7,\n",
-              " 3,\n",
-              " 13,\n",
-              " 11,\n",
-              " 6,\n",
-              " 7,\n",
-              " 6,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 1,\n",
-              " 7,\n",
-              " 11,\n",
-              " 14,\n",
-              " 7,\n",
-              " 11,\n",
-              " 1,\n",
-              " 9,\n",
-              " 0,\n",
-              " 11,\n",
-              " 5,\n",
-              " 1,\n",
-              " 0,\n",
-              " 5,\n",
-              " 12,\n",
-              " 1,\n",
-              " 14,\n",
-              " 12,\n",
-              " 8,\n",
-              " 13,\n",
-              " 13,\n",
-              " 4,\n",
-              " 12,\n",
-              " 3,\n",
-              " 1,\n",
-              " 6,\n",
-              " 7,\n",
-              " 5,\n",
-              " 5,\n",
-              " 5,\n",
-              " 5,\n",
-              " 13,\n",
-              " 5,\n",
-              " 12,\n",
-              " 7,\n",
-              " 8,\n",
-              " 6,\n",
-              " 2,\n",
-              " 5,\n",
-              " 6,\n",
-              " 9,\n",
-              " 13,\n",
-              " 7,\n",
-              " 16,\n",
-              " 6,\n",
-              " 7,\n",
-              " 7,\n",
-              " 4,\n",
-              " 11,\n",
-              " 6,\n",
-              " 12,\n",
-              " 2,\n",
-              " 7,\n",
-              " 6,\n",
-              " 2,\n",
-              " 14,\n",
-              " 7,\n",
-              " 7,\n",
-              " 14,\n",
-              " 13,\n",
-              " 11,\n",
-              " 5,\n",
-              " 6,\n",
-              " 7,\n",
-              " 13,\n",
-              " 7,\n",
-              " 7,\n",
-              " 8,\n",
-              " 13,\n",
-              " 8,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 8,\n",
-              " 11,\n",
-              " 7,\n",
-              " 6,\n",
-              " 0,\n",
-              " 9,\n",
-              " 7,\n",
-              " 6,\n",
-              " 7,\n",
-              " 7,\n",
-              " 9,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 1,\n",
-              " 4,\n",
-              " 1,\n",
-              " 7,\n",
-              " 6,\n",
-              " 1,\n",
-              " 8,\n",
-              " 9,\n",
-              " 7,\n",
-              " 5,\n",
-              " 5,\n",
-              " 8,\n",
-              " 7,\n",
-              " 0,\n",
-              " 10,\n",
-              " 9,\n",
-              " 9,\n",
-              " 3,\n",
-              " 6,\n",
-              " 9,\n",
-              " 9,\n",
-              " 1,\n",
-              " 9,\n",
-              " 0,\n",
-              " 2,\n",
-              " 2,\n",
-              " 6,\n",
-              " 3,\n",
-              " 8,\n",
-              " 7,\n",
-              " 7,\n",
-              " 3,\n",
-              " 1,\n",
-              " 1,\n",
-              " 6,\n",
-              " 5,\n",
-              " 6,\n",
-              " 1,\n",
-              " 6,\n",
-              " 11,\n",
-              " 9,\n",
-              " 8,\n",
-              " 7,\n",
-              " 5,\n",
-              " 5,\n",
-              " 5,\n",
-              " 5,\n",
-              " 1,\n",
-              " 13,\n",
-              " 6,\n",
-              " 5,\n",
-              " 6,\n",
-              " 7,\n",
-              " 2,\n",
-              " 6,\n",
-              " 6,\n",
-              " 13,\n",
-              " 1,\n",
-              " 7,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 7,\n",
-              " 7,\n",
-              " 6,\n",
-              " 13,\n",
-              " 8,\n",
-              " 8,\n",
-              " 1,\n",
-              " 6,\n",
-              " 2,\n",
-              " 3,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 8,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 7,\n",
-              " 14,\n",
-              " 6,\n",
-              " 4,\n",
-              " 8,\n",
-              " 11,\n",
-              " 7,\n",
-              " 7,\n",
-              " 6,\n",
-              " 7,\n",
-              " 6,\n",
-              " 3,\n",
-              " 6,\n",
-              " 14,\n",
-              " 6,\n",
-              " 6,\n",
-              " 10,\n",
-              " 1,\n",
-              " 14,\n",
-              " 4,\n",
-              " 11,\n",
-              " 12,\n",
-              " 1,\n",
-              " 6,\n",
-              " 7,\n",
-              " 6,\n",
-              " 9,\n",
-              " 6,\n",
-              " 6,\n",
-              " 8,\n",
-              " 6,\n",
-              " 6,\n",
-              " 8,\n",
-              " 2,\n",
-              " 7,\n",
-              " 6,\n",
-              " 5,\n",
-              " 12,\n",
-              " 7,\n",
-              " 1,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 1,\n",
-              " 10,\n",
-              " 16,\n",
-              " 5,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 0,\n",
-              " 12,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 13,\n",
-              " 6,\n",
-              " 6,\n",
-              " 9,\n",
-              " 3,\n",
-              " 7,\n",
-              " 3,\n",
-              " 13,\n",
-              " 6,\n",
-              " 6,\n",
-              " 7,\n",
-              " 7,\n",
-              " 6,\n",
-              " 8,\n",
-              " 8,\n",
-              " 7,\n",
-              " 7,\n",
-              " 10,\n",
-              " 6,\n",
-              " 16,\n",
-              " 2,\n",
-              " 7,\n",
-              " 6,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 13,\n",
-              " 6,\n",
-              " 2,\n",
-              " 6,\n",
-              " 5,\n",
-              " 3,\n",
-              " 12,\n",
-              " 6,\n",
-              " 8,\n",
-              " 4,\n",
-              " 6,\n",
-              " 10,\n",
-              " 11,\n",
-              " 11,\n",
-              " 8,\n",
-              " 5,\n",
-              " 1,\n",
-              " 1,\n",
-              " 13,\n",
-              " 5,\n",
-              " 14,\n",
-              " 6,\n",
-              " 12,\n",
-              " 6,\n",
-              " 11,\n",
-              " 12,\n",
-              " 6,\n",
-              " 0,\n",
-              " 0,\n",
-              " 9,\n",
-              " 11,\n",
-              " 1,\n",
-              " 6,\n",
-              " 6,\n",
-              " 3,\n",
-              " 3,\n",
-              " 8,\n",
-              " 6,\n",
-              " 8,\n",
-              " 6,\n",
-              " 12,\n",
-              " 8,\n",
-              " 9,\n",
-              " 6,\n",
-              " 8,\n",
-              " 7,\n",
-              " 8,\n",
-              " 8,\n",
-              " 1,\n",
-              " 9,\n",
-              " 12,\n",
-              " 8,\n",
-              " 6,\n",
-              " 14,\n",
-              " 12,\n",
-              " 0,\n",
-              " 4,\n",
-              " 6,\n",
-              " 6,\n",
-              " 5,\n",
-              " 7,\n",
-              " 3,\n",
-              " 7,\n",
-              " 7,\n",
-              " 6,\n",
-              " 9,\n",
-              " 9,\n",
-              " 9,\n",
-              " 12,\n",
-              " 3,\n",
-              " 6,\n",
-              " 8,\n",
-              " 8,\n",
-              " 8,\n",
-              " 12,\n",
-              " 12,\n",
-              " 9,\n",
-              " 7,\n",
-              " 7,\n",
-              " 8,\n",
-              " 6,\n",
-              " 14,\n",
-              " 3,\n",
-              " 3,\n",
-              " 5,\n",
-              " 9,\n",
-              " 3,\n",
-              " 1,\n",
-              " 7,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 7,\n",
-              " 7,\n",
-              " 6,\n",
-              " 1,\n",
-              " 6,\n",
-              " 6,\n",
-              " 6,\n",
-              " 1,\n",
-              " 9,\n",
-              " 14,\n",
-              " 9,\n",
-              " 0,\n",
-              " 9,\n",
-              " 1,\n",
-              " 3,\n",
-              " 6,\n",
-              " 7,\n",
-              " 5,\n",
-              " 6,\n",
-              " 8,\n",
-              " 7,\n",
-              " 7,\n",
-              " 4,\n",
-              " 0,\n",
-              " 6,\n",
-              " 6,\n",
-              " 9,\n",
-              " 9,\n",
-              " 9,\n",
-              " 0,\n",
-              " 11,\n",
-              " 8,\n",
-              " 7,\n",
-              " 4,\n",
-              " 5,\n",
-              " 6,\n",
-              " 6,\n",
-              " 8,\n",
-              " 6,\n",
-              " 9,\n",
-              " 10,\n",
-              " 12,\n",
-              " 4,\n",
-              " 14,\n",
-              " 6,\n",
-              " 10,\n",
-              " 5,\n",
-              " 7,\n",
-              " 1,\n",
-              " 1,\n",
-              " 6,\n",
-              " 0,\n",
-              " 6,\n",
-              " 3,\n",
-              " 6,\n",
-              " ...]"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 32
-        }
-      ],
+      "outputs": [],
       "source": [
         "pred"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 33,
+      "execution_count": 17,
       "metadata": {
         "id": "fo6k4li1r0za"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/sklearn/base.py:329: UserWarning: Trying to unpickle estimator LabelEncoder from version 1.0.2 when using version 1.1.3. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
+            "https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
+            "  warnings.warn(\n"
+          ]
+        }
+      ],
       "source": [
         "import pickle \n",
         "#encoder_filename = \"models/label_encoder.pkl\"\n",
@@ -1915,7 +704,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 34,
+      "execution_count": 18,
       "metadata": {
         "id": "UU7qg7zVr0zb"
       },
@@ -1926,7 +715,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 35,
+      "execution_count": 19,
       "metadata": {
         "id": "w4eHpBztr0zb"
       },
@@ -1937,9 +726,7 @@
     },
     {
       "cell_type": "code",
-      "source": [
-        "df[df.numero == 2835]['content'+corpus].values"
-      ],
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -1947,25 +734,14 @@
         "id": "KsJQMhCBxpSF",
         "outputId": "2ffa7475-e6de-4c42-a413-22c0d4b2d45f"
       },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "array([\"\\nQueue, terme de Chancellerie, ce mot se dit de la\\nmaniere de sceller les lettres. Une lettre est scellée à\\nsimple queue, quand le sceau est attaché à un coin du\\nparchemin de la lettre qu'on a fendu exprès ; & elle\\nest scellée à double queue, quand le sceau est pendant\\nà une bande en double de parchemin passée au-travers de la lettre, comme on fait dans les expéditions\\nimportantes.\\n\",\n",
-              "       \"\\nPiquer, v. act. (Charp. & Maçon.) piquer en Charpenterie, c'est marquer un piece de bois, pour la\\ntailler & la façonner. Piquer en Maçonnerie, c'est\\nrustiquer le parement ou les lits d'une pierre, c'est-à-dire que piquer signifie en fait de moilon le tailler\\ngrossierement ; on emploie le moilon piqué de la sorte\\naux voûtes de caves, aux puits & aux murs de clôture.\\nPiquer signifie aussi faire sur les matériaux destinés à \\nla construction extérieure les bâtimens, les petits\\npoints ou creux nécessaires pour leur servir d'ornement ; \\non pique de cette maniere la pierre de taille,\\n\\nle grès & le moilon particulierement pour l'ordre\\ntoscan. (D. J.)\\n\"],\n",
-              "      dtype=object)"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 34
-        }
+      "outputs": [],
+      "source": [
+        "df[df.numero == 2835]['content'+corpus].values"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 36,
+      "execution_count": 20,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -1976,74 +752,9 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "             idLGE  tomeLGE  rankLGE  \\\n",
-              "0            aam-0        1       63   \n",
-              "1          abaco-0        1       92   \n",
-              "2         abacot-0        1       96   \n",
-              "3        abaddon-0        1      104   \n",
-              "4  abandonnement-0        1      138   \n",
-              "5        abantes-0        1      143   \n",
-              "6         abaque-0        1      146   \n",
-              "7   abaremo-temo-0        1      152   \n",
-              "8         abares-0        1      153   \n",
-              "9         abarim-0        1      154   \n",
-              "\n",
-              "                                          contentLGE  volumeEDdA  numeroEDdA  \\\n",
-              "0  AAM. Mesure de capacité pour les liquides en u...           1          31   \n",
-              "1  ABACO, architecte italien du xvi siècle (V. La...           1          42   \n",
-              "2  ABACOT. Double couronne que portaient autrefoi...           1          44   \n",
-              "3  ABADDONou APOLYON le Destructeur. « Elles\\nava...           1          46   \n",
-              "4  ABANDONNEMENT. I. Droit civil. — Ce mot est un...           1          75   \n",
-              "5  ABANTES. Peuplade d’origine douteuse que l’on ...           1          81   \n",
-              "6  ABAQUE. I. Antiquité.— Dans l’antiquité on don...           1          84   \n",
-              "7  ABAREMO-TEMO(Bot.). Nom sous lequel Pison\\n(Br...           1          90   \n",
-              "8  ABARES. Nom de deux peuples distincts, habitan...           1          91   \n",
-              "9  ABARIM. Chaîne de montagnes de la Palestine au...           1          92   \n",
-              "\n",
-              "        headEDdA   authorEDdA normclassEDdA  \\\n",
-              "0            AAM      Diderot  unclassified   \n",
-              "1          ABACO   d'Alembert  unclassified   \n",
-              "2         ABACOT      Diderot  unclassified   \n",
-              "3        ABADDON      Diderot  unclassified   \n",
-              "4  ABANDONNEMENT    Toussaint         Droit   \n",
-              "5        ABANTES      Diderot  unclassified   \n",
-              "6         ABAQUE  d'Alembert2  unclassified   \n",
-              "7   ABAREMO-TEMO      Diderot  unclassified   \n",
-              "8         ABARES      Diderot  unclassified   \n",
-              "9         ABARIM      Diderot  unclassified   \n",
-              "\n",
-              "                                         contentEDdA  nbWordsEDdA  \\\n",
-              "0  \\n* AAM, s. mesure des Liquides, en usage à Am...           18   \n",
-              "1  \\nABACO, s. m. Quelques anciens Auteurs se ser...           26   \n",
-              "2  \\n* ABACOT, s. m. nom de l'ancienne parure dè\\...           22   \n",
-              "3  \\n* ABADDON, s. m. vient d'abad, perdre. C'est...           25   \n",
-              "4  \\nABANDONNEMENT, s. m. en Droit, est le délais...           77   \n",
-              "5  \\n* ABANTES, s. m. pl. Peuples de Thrace qui p...           26   \n",
-              "6  \\nABAQUE, s. m. chez les anciens Mathématicien...           52   \n",
-              "7  \\n* ABAREMO-TEMO, s. m. arbre qui croît, dit-o...           55   \n",
-              "8  \\n* ABARES, restes de la Nation des Huns qui s...           24   \n",
-              "9  \\n* ABARIM, montagne de l'Arabie d'où Moyse vi...           23   \n",
-              "\n",
-              "       superdomainEDdA  superdomainBertEDdA   superdomainBertLGE  \n",
-              "0         Unclassified             Commerce             Commerce  \n",
-              "1         Unclassified             Physique           Beaux-arts  \n",
-              "2         Unclassified             Histoire             Histoire  \n",
-              "3         Unclassified             Histoire             Religion  \n",
-              "4  Droit Jurisprudence  Droit Jurisprudence  Droit Jurisprudence  \n",
-              "5         Unclassified             Histoire             Histoire  \n",
-              "6         Unclassified             Physique             Histoire  \n",
-              "7         Unclassified   Histoire naturelle   Histoire naturelle  \n",
-              "8         Unclassified             Histoire           Géographie  \n",
-              "9         Unclassified           Géographie           Géographie  "
-            ],
             "text/html": [
-              "\n",
-              "  <div id=\"df-825c5672-f5f9-49ed-95eb-fdcae67ba1f1\">\n",
-              "    <div class=\"colab-df-container\">\n",
-              "      <div>\n",
+              "<div>\n",
               "<style scoped>\n",
               "    .dataframe tbody tr th:only-of-type {\n",
               "        vertical-align: middle;\n",
@@ -2061,276 +772,263 @@
               "  <thead>\n",
               "    <tr style=\"text-align: right;\">\n",
               "      <th></th>\n",
-              "      <th>idLGE</th>\n",
-              "      <th>tomeLGE</th>\n",
-              "      <th>rankLGE</th>\n",
-              "      <th>contentLGE</th>\n",
-              "      <th>volumeEDdA</th>\n",
-              "      <th>numeroEDdA</th>\n",
-              "      <th>headEDdA</th>\n",
-              "      <th>authorEDdA</th>\n",
-              "      <th>normclassEDdA</th>\n",
-              "      <th>contentEDdA</th>\n",
-              "      <th>nbWordsEDdA</th>\n",
-              "      <th>superdomainEDdA</th>\n",
-              "      <th>superdomainBertEDdA</th>\n",
-              "      <th>superdomainBertLGE</th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>author</th>\n",
+              "      <th>edda_class</th>\n",
+              "      <th>enccre_id</th>\n",
+              "      <th>enccre_class</th>\n",
+              "      <th>content</th>\n",
+              "      <th>content_without_designant</th>\n",
+              "      <th>first_paragraph</th>\n",
+              "      <th>nb_words</th>\n",
+              "      <th>super_domain</th>\n",
+              "      <th>superdomainBert</th>\n",
               "    </tr>\n",
               "  </thead>\n",
               "  <tbody>\n",
               "    <tr>\n",
               "      <th>0</th>\n",
-              "      <td>aam-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>63</td>\n",
-              "      <td>AAM. Mesure de capacité pour les liquides en u...</td>\n",
               "      <td>1</td>\n",
-              "      <td>31</td>\n",
-              "      <td>AAM</td>\n",
-              "      <td>Diderot</td>\n",
+              "      <td>Title Page</td>\n",
+              "      <td>unsigned</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\n* AAM, s. mesure des Liquides, en usage à Am...</td>\n",
-              "      <td>18</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>151</td>\n",
               "      <td>Unclassified</td>\n",
-              "      <td>Commerce</td>\n",
-              "      <td>Commerce</td>\n",
+              "      <td>Philosophie</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
-              "      <td>abaco-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>92</td>\n",
-              "      <td>ABACO, architecte italien du xvi siècle (V. La...</td>\n",
               "      <td>1</td>\n",
-              "      <td>42</td>\n",
-              "      <td>ABACO</td>\n",
-              "      <td>d'Alembert</td>\n",
+              "      <td>2</td>\n",
+              "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+              "      <td>Diderot &amp; d'Alembert</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\nABACO, s. m. Quelques anciens Auteurs se ser...</td>\n",
-              "      <td>26</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>208</td>\n",
               "      <td>Unclassified</td>\n",
-              "      <td>Physique</td>\n",
-              "      <td>Beaux-arts</td>\n",
+              "      <td>Philosophie</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>2</th>\n",
-              "      <td>abacot-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>96</td>\n",
-              "      <td>ABACOT. Double couronne que portaient autrefoi...</td>\n",
               "      <td>1</td>\n",
-              "      <td>44</td>\n",
-              "      <td>ABACOT</td>\n",
-              "      <td>Diderot</td>\n",
+              "      <td>3</td>\n",
+              "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+              "      <td>d'Alembert</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\n* ABACOT, s. m. nom de l'ancienne parure dè\\...</td>\n",
-              "      <td>22</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n</td>\n",
+              "      <td>44669</td>\n",
               "      <td>Unclassified</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>Histoire</td>\n",
+              "      <td>Belles-lettres</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
-              "      <td>abaddon-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>104</td>\n",
-              "      <td>ABADDONou APOLYON le Destructeur. « Elles\\nava...</td>\n",
               "      <td>1</td>\n",
-              "      <td>46</td>\n",
-              "      <td>ABADDON</td>\n",
-              "      <td>Diderot</td>\n",
-              "      <td>unclassified</td>\n",
-              "      <td>\\n* ABADDON, s. m. vient d'abad, perdre. C'est...</td>\n",
-              "      <td>25</td>\n",
-              "      <td>Unclassified</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>Religion</td>\n",
+              "      <td>5</td>\n",
+              "      <td>A, a &amp; a</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>v1-1-0</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>711</td>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>Philosophie</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>4</th>\n",
-              "      <td>abandonnement-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>138</td>\n",
-              "      <td>ABANDONNEMENT. I. Droit civil. — Ce mot est un...</td>\n",
               "      <td>1</td>\n",
-              "      <td>75</td>\n",
-              "      <td>ABANDONNEMENT</td>\n",
-              "      <td>Toussaint</td>\n",
-              "      <td>Droit</td>\n",
-              "      <td>\\nABANDONNEMENT, s. m. en Droit, est le délais...</td>\n",
-              "      <td>77</td>\n",
-              "      <td>Droit Jurisprudence</td>\n",
-              "      <td>Droit Jurisprudence</td>\n",
-              "      <td>Droit Jurisprudence</td>\n",
+              "      <td>6</td>\n",
+              "      <td>A</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-1</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>238</td>\n",
+              "      <td>Unclassified</td>\n",
+              "      <td>Philosophie</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>5</th>\n",
-              "      <td>abantes-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>143</td>\n",
-              "      <td>ABANTES. Peuplade d’origine douteuse que l’on ...</td>\n",
-              "      <td>1</td>\n",
-              "      <td>81</td>\n",
-              "      <td>ABANTES</td>\n",
-              "      <td>Diderot</td>\n",
+              "      <td>7</td>\n",
+              "      <td>A</td>\n",
+              "      <td>Dumarsais</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\n* ABANTES, s. m. pl. Peuples de Thrace qui p...</td>\n",
-              "      <td>26</td>\n",
+              "      <td>v1-1-2</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, préposition vient du latin à, à dextris, ...</td>\n",
+              "      <td>\\nA, préposition vient du latin à, à dextris, ...</td>\n",
+              "      <td>\\nA, préposition vient du latin à, à dextris, ...</td>\n",
+              "      <td>1980</td>\n",
               "      <td>Unclassified</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>Histoire</td>\n",
+              "      <td>Philosophie</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>6</th>\n",
-              "      <td>abaque-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>146</td>\n",
-              "      <td>ABAQUE. I. Antiquité.— Dans l’antiquité on don...</td>\n",
               "      <td>1</td>\n",
-              "      <td>84</td>\n",
-              "      <td>ABAQUE</td>\n",
-              "      <td>d'Alembert2</td>\n",
+              "      <td>8</td>\n",
+              "      <td>A</td>\n",
+              "      <td>Mallet</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\nABAQUE, s. m. chez les anciens Mathématicien...</td>\n",
-              "      <td>52</td>\n",
+              "      <td>v1-1-3</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\nA, étoit une lettre numérale parmi les Ancie...</td>\n",
+              "      <td>\\nA, étoit une lettre numérale parmi les Ancie...</td>\n",
+              "      <td>\\nA, étoit une lettre numérale parmi les Ancie...</td>\n",
+              "      <td>200</td>\n",
               "      <td>Unclassified</td>\n",
-              "      <td>Physique</td>\n",
               "      <td>Histoire</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>7</th>\n",
-              "      <td>abaremo-temo-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>152</td>\n",
-              "      <td>ABAREMO-TEMO(Bot.). Nom sous lequel Pison\\n(Br...</td>\n",
-              "      <td>1</td>\n",
-              "      <td>90</td>\n",
-              "      <td>ABAREMO-TEMO</td>\n",
-              "      <td>Diderot</td>\n",
+              "      <td>9</td>\n",
+              "      <td>A, lettre symbolique</td>\n",
+              "      <td>Mallet</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\n* ABAREMO-TEMO, s. m. arbre qui croît, dit-o...</td>\n",
-              "      <td>55</td>\n",
+              "      <td>v1-1-4</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\nA, lettre symbolique, étoit un hiéroglyphe c...</td>\n",
+              "      <td>\\nA, lettre symbolique, étoit un hiéroglyphe c...</td>\n",
+              "      <td>\\nA, lettre symbolique, étoit un hiéroglyphe c...</td>\n",
+              "      <td>82</td>\n",
               "      <td>Unclassified</td>\n",
-              "      <td>Histoire naturelle</td>\n",
-              "      <td>Histoire naturelle</td>\n",
+              "      <td>Histoire</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>8</th>\n",
-              "      <td>abares-0</td>\n",
               "      <td>1</td>\n",
-              "      <td>153</td>\n",
-              "      <td>ABARES. Nom de deux peuples distincts, habitan...</td>\n",
-              "      <td>1</td>\n",
-              "      <td>91</td>\n",
-              "      <td>ABARES</td>\n",
-              "      <td>Diderot</td>\n",
+              "      <td>10</td>\n",
+              "      <td>A, numismatique ou monétaire</td>\n",
+              "      <td>Mallet</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\n* ABARES, restes de la Nation des Huns qui s...</td>\n",
-              "      <td>24</td>\n",
+              "      <td>v1-1-5</td>\n",
+              "      <td>Médailles</td>\n",
+              "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+              "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+              "      <td>\\nA, numismatique ou monétaire, sur le revers ...</td>\n",
+              "      <td>112</td>\n",
               "      <td>Unclassified</td>\n",
               "      <td>Histoire</td>\n",
-              "      <td>Géographie</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>9</th>\n",
-              "      <td>abarim-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>154</td>\n",
-              "      <td>ABARIM. Chaîne de montagnes de la Palestine au...</td>\n",
               "      <td>1</td>\n",
-              "      <td>92</td>\n",
-              "      <td>ABARIM</td>\n",
-              "      <td>Diderot</td>\n",
+              "      <td>11</td>\n",
+              "      <td>A, lapidaire</td>\n",
+              "      <td>Mallet</td>\n",
               "      <td>unclassified</td>\n",
-              "      <td>\\n* ABARIM, montagne de l'Arabie d'où Moyse vi...</td>\n",
-              "      <td>23</td>\n",
+              "      <td>v1-1-6</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+              "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+              "      <td>\\nA, lapidaire, dans les anciennes inscription...</td>\n",
+              "      <td>80</td>\n",
               "      <td>Unclassified</td>\n",
-              "      <td>Géographie</td>\n",
-              "      <td>Géographie</td>\n",
+              "      <td>Histoire</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
-              "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-825c5672-f5f9-49ed-95eb-fdcae67ba1f1')\"\n",
-              "              title=\"Convert this dataframe to an interactive table.\"\n",
-              "              style=\"display:none;\">\n",
-              "        \n",
-              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-              "       width=\"24px\">\n",
-              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
-              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
-              "  </svg>\n",
-              "      </button>\n",
-              "      \n",
-              "  <style>\n",
-              "    .colab-df-container {\n",
-              "      display:flex;\n",
-              "      flex-wrap:wrap;\n",
-              "      gap: 12px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert {\n",
-              "      background-color: #E8F0FE;\n",
-              "      border: none;\n",
-              "      border-radius: 50%;\n",
-              "      cursor: pointer;\n",
-              "      display: none;\n",
-              "      fill: #1967D2;\n",
-              "      height: 32px;\n",
-              "      padding: 0 0 0 0;\n",
-              "      width: 32px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert:hover {\n",
-              "      background-color: #E2EBFA;\n",
-              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-              "      fill: #174EA6;\n",
-              "    }\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   volume  numero                                head                author  \\\n",
+              "0       1       1                          Title Page              unsigned   \n",
+              "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  Diderot & d'Alembert   \n",
+              "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS            d'Alembert   \n",
+              "3       1       5                            A, a & a            Dumarsais5   \n",
+              "4       1       6                                   A            Dumarsais5   \n",
+              "5       1       7                                   A             Dumarsais   \n",
+              "6       1       8                                   A                Mallet   \n",
+              "7       1       9                A, lettre symbolique                Mallet   \n",
+              "8       1      10        A, numismatique ou monétaire                Mallet   \n",
+              "9       1      11                        A, lapidaire                Mallet   \n",
               "\n",
-              "    [theme=dark] .colab-df-convert {\n",
-              "      background-color: #3B4455;\n",
-              "      fill: #D2E3FC;\n",
-              "    }\n",
+              "     edda_class enccre_id enccre_class  \\\n",
+              "0  unclassified       NaN          NaN   \n",
+              "1  unclassified       NaN          NaN   \n",
+              "2  unclassified       NaN          NaN   \n",
+              "3     Grammaire    v1-1-0    Grammaire   \n",
+              "4  unclassified    v1-1-1    Grammaire   \n",
+              "5  unclassified    v1-1-2    Grammaire   \n",
+              "6  unclassified    v1-1-3          NaN   \n",
+              "7  unclassified    v1-1-4          NaN   \n",
+              "8  unclassified    v1-1-5    Médailles   \n",
+              "9  unclassified    v1-1-6     Histoire   \n",
               "\n",
-              "    [theme=dark] .colab-df-convert:hover {\n",
-              "      background-color: #434B5C;\n",
-              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-              "      fill: #FFFFFF;\n",
-              "    }\n",
-              "  </style>\n",
+              "                                             content  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
+              "5  \\nA, préposition vient du latin à, à dextris, ...   \n",
+              "6  \\nA, étoit une lettre numérale parmi les Ancie...   \n",
+              "7  \\nA, lettre symbolique, étoit un hiéroglyphe c...   \n",
+              "8  \\nA, numismatique ou monétaire, sur le revers ...   \n",
+              "9  \\nA, lapidaire, dans les anciennes inscription...   \n",
               "\n",
-              "      <script>\n",
-              "        const buttonEl =\n",
-              "          document.querySelector('#df-825c5672-f5f9-49ed-95eb-fdcae67ba1f1 button.colab-df-convert');\n",
-              "        buttonEl.style.display =\n",
-              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "                           content_without_designant  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
+              "5  \\nA, préposition vient du latin à, à dextris, ...   \n",
+              "6  \\nA, étoit une lettre numérale parmi les Ancie...   \n",
+              "7  \\nA, lettre symbolique, étoit un hiéroglyphe c...   \n",
+              "8  \\nA, numismatique ou monétaire, sur le revers ...   \n",
+              "9  \\nA, lapidaire, dans les anciennes inscription...   \n",
               "\n",
-              "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-825c5672-f5f9-49ed-95eb-fdcae67ba1f1');\n",
-              "          const dataTable =\n",
-              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
-              "                                                     [key], {});\n",
-              "          if (!dataTable) return;\n",
+              "                                     first_paragraph  nb_words  super_domain  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...       151  Unclassified   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...       208  Unclassified   \n",
+              "2       \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n     44669  Unclassified   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...       711   Philosophie   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...       238  Unclassified   \n",
+              "5  \\nA, préposition vient du latin à, à dextris, ...      1980  Unclassified   \n",
+              "6  \\nA, étoit une lettre numérale parmi les Ancie...       200  Unclassified   \n",
+              "7  \\nA, lettre symbolique, étoit un hiéroglyphe c...        82  Unclassified   \n",
+              "8  \\nA, numismatique ou monétaire, sur le revers ...       112  Unclassified   \n",
+              "9  \\nA, lapidaire, dans les anciennes inscription...        80  Unclassified   \n",
               "\n",
-              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
-              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
-              "            + ' to learn more about interactive tables.';\n",
-              "          element.innerHTML = '';\n",
-              "          dataTable['output_type'] = 'display_data';\n",
-              "          await google.colab.output.renderOutput(dataTable, element);\n",
-              "          const docLink = document.createElement('div');\n",
-              "          docLink.innerHTML = docLinkHtml;\n",
-              "          element.appendChild(docLink);\n",
-              "        }\n",
-              "      </script>\n",
-              "    </div>\n",
-              "  </div>\n",
-              "  "
+              "  superdomainBert  \n",
+              "0     Philosophie  \n",
+              "1     Philosophie  \n",
+              "2  Belles-lettres  \n",
+              "3     Philosophie  \n",
+              "4     Philosophie  \n",
+              "5     Philosophie  \n",
+              "6        Histoire  \n",
+              "7        Histoire  \n",
+              "8        Histoire  \n",
+              "9        Histoire  "
             ]
           },
+          "execution_count": 20,
           "metadata": {},
-          "execution_count": 36
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2339,45 +1037,45 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 37,
+      "execution_count": 21,
       "metadata": {
         "id": "J9rObbvVr0zc"
       },
       "outputs": [],
       "source": [
-        "df.to_csv(drive_path + \"/predictions/predictions_parallel_superdomain.tsv\", sep=\"\\t\")"
+        "df.to_csv(drive_path + \"predictions/EDdA_dataset_articles_superdomainBERT_230313.tsv\", sep=\"\\t\")"
       ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "df.drop(columns=['contentLGE', 'contentEDdA'], inplace=True)"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "8cX6XBq8_F5T"
       },
-      "execution_count": 39,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "df.drop(columns=['contentLGE', 'contentEDdA'], inplace=True)"
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "df.to_csv(drive_path + \"/predictions/metadata_parallel_predictions_superdomain.csv\", sep=\",\", index=False)"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "7fx6BPpg0iNc"
       },
-      "execution_count": 41,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "df.to_csv(drive_path + \"predictions/metadata_parallel_predictions_superdomain.csv\", sep=\",\", index=False)"
+      ]
     },
     {
       "cell_type": "code",
-      "source": [],
+      "execution_count": null,
       "metadata": {
         "id": "7TD1mbKj_fXH"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": []
     }
   ],
   "metadata": {
@@ -2410,77 +1108,147 @@
     },
     "widgets": {
       "application/vnd.jupyter.widget-state+json": {
-        "11c285bed74e46a08fbb7bf88715aafa": {
+        "0180ffc200e8466191a11a723c82e43f": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
           "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
+            "_model_name": "FloatProgressModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_3fde7318ebc3458cb64f8927fdcbaee3",
-              "IPY_MODEL_8d57eb44d9394604981a8f8f97f48b7c",
-              "IPY_MODEL_1cb6ed877c2b455b9463b12c2da877d8"
-            ],
-            "layout": "IPY_MODEL_5e03651dca944a5f91b675c503feeeac"
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c4ea841cb43747cdbce35f8f9c711cde",
+            "max": 29,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_2d937fce2e6c4b69816352bd264ded41",
+            "value": 29
           }
         },
-        "3fde7318ebc3458cb64f8927fdcbaee3": {
+        "04a86b4164fa49de8fd47d4d373e1d81": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
+            "_model_name": "FloatProgressModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_0521c3cc6abd44ae989ac0701100045d",
-            "placeholder": "​",
-            "style": "IPY_MODEL_d12a8ef069af4d79870bd783f2343184",
-            "value": "Downloading: 100%"
+            "layout": "IPY_MODEL_4edc5b66f0eb44a0b05876fda90f0d1b",
+            "max": 1961828,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_5285a390fb42415289d89585e04c8994",
+            "value": 1961828
           }
         },
-        "8d57eb44d9394604981a8f8f97f48b7c": {
+        "0521c3cc6abd44ae989ac0701100045d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "087ebcb093bb41c28485bdc762fb5da6": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "11c285bed74e46a08fbb7bf88715aafa": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
+            "_model_name": "HBoxModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_28d38094dcd54d6694e2efad7fea6abb",
-            "max": 995526,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_6f80ea06220b4a498e6169e55cd8800f",
-            "value": 995526
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_3fde7318ebc3458cb64f8927fdcbaee3",
+              "IPY_MODEL_8d57eb44d9394604981a8f8f97f48b7c",
+              "IPY_MODEL_1cb6ed877c2b455b9463b12c2da877d8"
+            ],
+            "layout": "IPY_MODEL_5e03651dca944a5f91b675c503feeeac"
           }
         },
         "1cb6ed877c2b455b9463b12c2da877d8": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -2498,10 +1266,10 @@
             "value": " 996k/996k [00:00&lt;00:00, 2.00MB/s]"
           }
         },
-        "5e03651dca944a5f91b675c503feeeac": {
+        "209ff109c8e142dfba37baea2d3d5de7": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2550,10 +1318,10 @@
             "width": null
           }
         },
-        "0521c3cc6abd44ae989ac0701100045d": {
+        "28d38094dcd54d6694e2efad7fea6abb": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2602,10 +1370,26 @@
             "width": null
           }
         },
-        "d12a8ef069af4d79870bd783f2343184": {
+        "2924cdc1348942cfb23f28a5383af3e4": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "2b9b4eac7994405ca9bce38332df2629": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -2617,10 +1401,26 @@
             "description_width": ""
           }
         },
-        "28d38094dcd54d6694e2efad7fea6abb": {
+        "2d937fce2e6c4b69816352bd264ded41": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "39636049d60a4bb4bde7d0ef1af25d78": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2669,26 +1469,10 @@
             "width": null
           }
         },
-        "6f80ea06220b4a498e6169e55cd8800f": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
         "3de8b4b0d6494c058589c535dc24dc3e": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2737,47 +1521,10 @@
             "width": null
           }
         },
-        "e0df5e2d4ebd4eb3b126c16dadb2ba62": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "9be44ba364a344f2b6b2546ae9d61ba8": {
+        "3fde7318ebc3458cb64f8927fdcbaee3": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
           "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_fe472df31774495c83aa159e116ba2ee",
-              "IPY_MODEL_0180ffc200e8466191a11a723c82e43f",
-              "IPY_MODEL_a07ac2935a3f4d84971ae9147a854969"
-            ],
-            "layout": "IPY_MODEL_af4ae976808042bf929ab17df10530b2"
-          }
-        },
-        "fe472df31774495c83aa159e116ba2ee": {
-          "model_module": "@jupyter-widgets/controls",
           "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -2789,61 +1536,53 @@
             "_view_name": "HTMLView",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_b2277b3d600c43f999b3a07215ac2e13",
+            "layout": "IPY_MODEL_0521c3cc6abd44ae989ac0701100045d",
             "placeholder": "​",
-            "style": "IPY_MODEL_ebe5e6f8af1e4e04a8a2b5939ac09039",
+            "style": "IPY_MODEL_d12a8ef069af4d79870bd783f2343184",
             "value": "Downloading: 100%"
           }
         },
-        "0180ffc200e8466191a11a723c82e43f": {
+        "4203b950e245481590e8105f31301782": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
-            "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
+            "_model_name": "DescriptionStyleModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_c4ea841cb43747cdbce35f8f9c711cde",
-            "max": 29,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_2d937fce2e6c4b69816352bd264ded41",
-            "value": 29
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
           }
         },
-        "a07ac2935a3f4d84971ae9147a854969": {
+        "4c46904f8e944d2b834ba9d384b00a8c": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
+            "_model_name": "HBoxModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_64b57e3be2c743b3b0e58d338243c656",
-            "placeholder": "​",
-            "style": "IPY_MODEL_6ca9688ac7fa4e638994b91242c0ac87",
-            "value": " 29.0/29.0 [00:00&lt;00:00, 1.88kB/s]"
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_ef37bbf1f34e4765b1803a607716d0d1",
+              "IPY_MODEL_c2d6041cd6674043953e094791ab9659",
+              "IPY_MODEL_e4c43817f44743388e6fd98b8dbb2eda"
+            ],
+            "layout": "IPY_MODEL_39636049d60a4bb4bde7d0ef1af25d78"
           }
         },
-        "af4ae976808042bf929ab17df10530b2": {
+        "4edc5b66f0eb44a0b05876fda90f0d1b": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2892,10 +1631,26 @@
             "width": null
           }
         },
-        "b2277b3d600c43f999b3a07215ac2e13": {
+        "5285a390fb42415289d89585e04c8994": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "53643db8401846f2af6f15f5cd0c9998": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2944,25 +1699,10 @@
             "width": null
           }
         },
-        "ebe5e6f8af1e4e04a8a2b5939ac09039": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "c4ea841cb43747cdbce35f8f9c711cde": {
+        "5e03651dca944a5f91b675c503feeeac": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3011,26 +1751,10 @@
             "width": null
           }
         },
-        "2d937fce2e6c4b69816352bd264ded41": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
         "64b57e3be2c743b3b0e58d338243c656": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3081,8 +1805,8 @@
         },
         "6ca9688ac7fa4e638994b91242c0ac87": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -3094,98 +1818,26 @@
             "description_width": ""
           }
         },
-        "aa6a7a9106554f85a91150bd65c271d0": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_ea3f471546734f5994edfdc214319368",
-              "IPY_MODEL_04a86b4164fa49de8fd47d4d373e1d81",
-              "IPY_MODEL_be067a8a406f41779e42bd35abcbfcf0"
-            ],
-            "layout": "IPY_MODEL_7df91507e47d4a6992464293ce002a29"
-          }
-        },
-        "ea3f471546734f5994edfdc214319368": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_ecef81814a7c4481aa49eb73807bfe4d",
-            "placeholder": "​",
-            "style": "IPY_MODEL_2b9b4eac7994405ca9bce38332df2629",
-            "value": "Downloading: 100%"
-          }
-        },
-        "04a86b4164fa49de8fd47d4d373e1d81": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_4edc5b66f0eb44a0b05876fda90f0d1b",
-            "max": 1961828,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_5285a390fb42415289d89585e04c8994",
-            "value": 1961828
-          }
-        },
-        "be067a8a406f41779e42bd35abcbfcf0": {
+        "6f80ea06220b4a498e6169e55cd8800f": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
           "state": {
-            "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
+            "_model_name": "ProgressStyleModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_53643db8401846f2af6f15f5cd0c9998",
-            "placeholder": "​",
-            "style": "IPY_MODEL_bc4825e1a43f4a20b496d82ea3687e6f",
-            "value": " 1.96M/1.96M [00:00&lt;00:00, 2.16MB/s]"
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
           }
         },
         "7df91507e47d4a6992464293ce002a29": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3234,77 +1886,99 @@
             "width": null
           }
         },
-        "ecef81814a7c4481aa49eb73807bfe4d": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
+        "8d57eb44d9394604981a8f8f97f48b7c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
           "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_28d38094dcd54d6694e2efad7fea6abb",
+            "max": 995526,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_6f80ea06220b4a498e6169e55cd8800f",
+            "value": 995526
+          }
+        },
+        "9be44ba364a344f2b6b2546ae9d61ba8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_fe472df31774495c83aa159e116ba2ee",
+              "IPY_MODEL_0180ffc200e8466191a11a723c82e43f",
+              "IPY_MODEL_a07ac2935a3f4d84971ae9147a854969"
+            ],
+            "layout": "IPY_MODEL_af4ae976808042bf929ab17df10530b2"
           }
         },
-        "2b9b4eac7994405ca9bce38332df2629": {
+        "a07ac2935a3f4d84971ae9147a854969": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
           "state": {
+            "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
+            "_model_name": "HTMLModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_64b57e3be2c743b3b0e58d338243c656",
+            "placeholder": "​",
+            "style": "IPY_MODEL_6ca9688ac7fa4e638994b91242c0ac87",
+            "value": " 29.0/29.0 [00:00&lt;00:00, 1.88kB/s]"
           }
         },
-        "4edc5b66f0eb44a0b05876fda90f0d1b": {
+        "aa6a7a9106554f85a91150bd65c271d0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_ea3f471546734f5994edfdc214319368",
+              "IPY_MODEL_04a86b4164fa49de8fd47d4d373e1d81",
+              "IPY_MODEL_be067a8a406f41779e42bd35abcbfcf0"
+            ],
+            "layout": "IPY_MODEL_7df91507e47d4a6992464293ce002a29"
+          }
+        },
+        "af4ae976808042bf929ab17df10530b2": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3353,26 +2027,10 @@
             "width": null
           }
         },
-        "5285a390fb42415289d89585e04c8994": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "53643db8401846f2af6f15f5cd0c9998": {
+        "b2277b3d600c43f999b3a07215ac2e13": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3423,8 +2081,8 @@
         },
         "bc4825e1a43f4a20b496d82ea3687e6f": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -3436,32 +2094,10 @@
             "description_width": ""
           }
         },
-        "4c46904f8e944d2b834ba9d384b00a8c": {
+        "be067a8a406f41779e42bd35abcbfcf0": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
           "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_ef37bbf1f34e4765b1803a607716d0d1",
-              "IPY_MODEL_c2d6041cd6674043953e094791ab9659",
-              "IPY_MODEL_e4c43817f44743388e6fd98b8dbb2eda"
-            ],
-            "layout": "IPY_MODEL_39636049d60a4bb4bde7d0ef1af25d78"
-          }
-        },
-        "ef37bbf1f34e4765b1803a607716d0d1": {
-          "model_module": "@jupyter-widgets/controls",
           "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -3473,16 +2109,16 @@
             "_view_name": "HTMLView",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_c3e73d423c2c41c0a942331070fda723",
+            "layout": "IPY_MODEL_53643db8401846f2af6f15f5cd0c9998",
             "placeholder": "​",
-            "style": "IPY_MODEL_087ebcb093bb41c28485bdc762fb5da6",
-            "value": "Downloading: 100%"
+            "style": "IPY_MODEL_bc4825e1a43f4a20b496d82ea3687e6f",
+            "value": " 1.96M/1.96M [00:00&lt;00:00, 2.16MB/s]"
           }
         },
         "c2d6041cd6674043953e094791ab9659": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
           "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -3503,31 +2139,10 @@
             "value": 625
           }
         },
-        "e4c43817f44743388e6fd98b8dbb2eda": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_209ff109c8e142dfba37baea2d3d5de7",
-            "placeholder": "​",
-            "style": "IPY_MODEL_4203b950e245481590e8105f31301782",
-            "value": " 625/625 [00:00&lt;00:00, 35.2kB/s]"
-          }
-        },
-        "39636049d60a4bb4bde7d0ef1af25d78": {
+        "c3e73d423c2c41c0a942331070fda723": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3576,10 +2191,10 @@
             "width": null
           }
         },
-        "c3e73d423c2c41c0a942331070fda723": {
+        "c4ea841cb43747cdbce35f8f9c711cde": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3628,10 +2243,10 @@
             "width": null
           }
         },
-        "087ebcb093bb41c28485bdc762fb5da6": {
+        "d12a8ef069af4d79870bd783f2343184": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -3645,8 +2260,8 @@
         },
         "de270f0aa8194e0bb470e693a35d7d6e": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3695,26 +2310,82 @@
             "width": null
           }
         },
-        "2924cdc1348942cfb23f28a5383af3e4": {
+        "e0df5e2d4ebd4eb3b126c16dadb2ba62": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
+            "_model_name": "DescriptionStyleModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/base",
             "_view_module_version": "1.2.0",
             "_view_name": "StyleView",
-            "bar_color": null,
             "description_width": ""
           }
         },
-        "209ff109c8e142dfba37baea2d3d5de7": {
+        "e4c43817f44743388e6fd98b8dbb2eda": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_209ff109c8e142dfba37baea2d3d5de7",
+            "placeholder": "​",
+            "style": "IPY_MODEL_4203b950e245481590e8105f31301782",
+            "value": " 625/625 [00:00&lt;00:00, 35.2kB/s]"
+          }
+        },
+        "ea3f471546734f5994edfdc214319368": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ecef81814a7c4481aa49eb73807bfe4d",
+            "placeholder": "​",
+            "style": "IPY_MODEL_2b9b4eac7994405ca9bce38332df2629",
+            "value": "Downloading: 100%"
+          }
+        },
+        "ebe5e6f8af1e4e04a8a2b5939ac09039": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ecef81814a7c4481aa49eb73807bfe4d": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -3763,19 +2434,46 @@
             "width": null
           }
         },
-        "4203b950e245481590e8105f31301782": {
+        "ef37bbf1f34e4765b1803a607716d0d1": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
           "state": {
+            "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
+            "_model_name": "HTMLModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c3e73d423c2c41c0a942331070fda723",
+            "placeholder": "​",
+            "style": "IPY_MODEL_087ebcb093bb41c28485bdc762fb5da6",
+            "value": "Downloading: 100%"
+          }
+        },
+        "fe472df31774495c83aa159e116ba2ee": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_b2277b3d600c43f999b3a07215ac2e13",
+            "placeholder": "​",
+            "style": "IPY_MODEL_ebe5e6f8af1e4e04a8a2b5939ac09039",
+            "value": "Downloading: 100%"
           }
         }
       }
@@ -3783,4 +2481,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
\ No newline at end of file
+}
-- 
GitLab