diff --git a/notebooks/Classification_BertFineTuning.ipynb b/notebooks/Classification_BertFineTuning.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..692d3b5af2a782eef88210ca4cb8b64052dd80a1
--- /dev/null
+++ b/notebooks/Classification_BertFineTuning.ipynb
@@ -0,0 +1,4367 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4YCMlsNwOWs0"
+      },
+      "source": [
+        "# BERT fine-tuning for EDdA classification"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6xdYI9moOQSv"
+      },
+      "source": [
+        "## Setup colab environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WF0qFN_g3ekz",
+        "outputId": "f3a5f049-24ee-418f-fe5e-84c633234ad8"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Your runtime has 27.3 gigabytes of available RAM\n",
+            "\n",
+            "You are using a high-RAM runtime!\n"
+          ]
+        }
+      ],
+      "source": [
+        "from psutil import virtual_memory\n",
+        "ram_gb = virtual_memory().total / 1e9\n",
+        "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
+        "\n",
+        "if ram_gb < 20:\n",
+        "  print('Not using a high-RAM runtime')\n",
+        "else:\n",
+        "  print('You are using a high-RAM runtime!')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "vL0S-s9Uofvn",
+        "outputId": "4b7efa4d-7f09-4c8e-bc98-99e6099ede32"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ],
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8hzEGHl7gmzk"
+      },
+      "source": [
+        "## Setup GPU"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dPOU-Efhf4ui",
+        "outputId": "121dd21e-f98c-483d-d6d1-2838f732a4e2"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "There are 1 GPU(s) available.\n",
+            "We will use the GPU: Tesla P100-PCIE-16GB\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch\n",
+        "\n",
+        "# If there's a GPU available...\n",
+        "if torch.cuda.is_available():    \n",
+        "\n",
+        "    # Tell PyTorch to use the GPU.    \n",
+        "    device = torch.device(\"cuda\")\n",
+        "\n",
+        "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
+        "\n",
+        "    print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
+        "\n",
+        "# If not...\n",
+        "else:\n",
+        "    print('No GPU available, using the CPU instead.')\n",
+        "    device = torch.device(\"cpu\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Jr-S9yYIgGkA"
+      },
+      "source": [
+        "## Install packages"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pwmZ5bBvgGNh",
+        "outputId": "fce0a8bf-1779-4079-c7ac-200ebb2678c5"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting transformers==4.10.3\n",
+            "  Downloading transformers-4.10.3-py3-none-any.whl (2.8 MB)\n",
+            "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2.8 MB 5.2 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (2.23.0)\n",
+            "Collecting sacremoses\n",
+            "  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)\n",
+            "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 895 kB 47.4 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (4.11.0)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (21.3)\n",
+            "Collecting tokenizers<0.11,>=0.10.1\n",
+            "  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
+            "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3.3 MB 49.6 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (2019.12.20)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (1.21.5)\n",
+            "Collecting huggingface-hub>=0.0.12\n",
+            "  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)\n",
+            "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 67 kB 5.8 MB/s \n",
+            "\u001b[?25hCollecting pyyaml>=5.1\n",
+            "  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
+            "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 596 kB 46.7 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (4.62.3)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.10.3) (3.4.2)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub>=0.0.12->transformers==4.10.3) (3.10.0.2)\n",
+            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers==4.10.3) (3.0.7)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers==4.10.3) (3.7.0)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (1.24.3)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (3.0.4)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (2.10)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.10.3) (2021.10.8)\n",
+            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (1.1.0)\n",
+            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (1.15.0)\n",
+            "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.10.3) (7.1.2)\n",
+            "Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers\n",
+            "  Attempting uninstall: pyyaml\n",
+            "    Found existing installation: PyYAML 3.13\n",
+            "    Uninstalling PyYAML-3.13:\n",
+            "      Successfully uninstalled PyYAML-3.13\n",
+            "Successfully installed huggingface-hub-0.4.0 pyyaml-6.0 sacremoses-0.0.47 tokenizers-0.10.3 transformers-4.10.3\n",
+            "Collecting sentencepiece\n",
+            "  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
+            "\u001b[K     |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1.2 MB 5.1 MB/s \n",
+            "\u001b[?25hInstalling collected packages: sentencepiece\n",
+            "Successfully installed sentencepiece-0.1.96\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install transformers==4.10.3\n",
+        "!pip install sentencepiece"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wSqbrupGMc1M"
+      },
+      "source": [
+        "## Import librairies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "SkErnwgMMbRj"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd \n",
+        "import numpy as np\n",
+        "import csv\n",
+        "import os\n",
+        "import pickle\n",
+        "from sklearn import preprocessing\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import *\n",
+        "\n",
+        "from transformers import BertTokenizer, CamembertTokenizer, BertForSequenceClassification, AdamW, BertConfig, CamembertForSequenceClassification\n",
+        "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
+        "from transformers import get_linear_schedule_with_warmup\n",
+        "\n",
+        "import time\n",
+        "import datetime\n",
+        "\n",
+        "import random\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "from sklearn.metrics import plot_confusion_matrix\n",
+        "from sklearn.metrics import confusion_matrix\n",
+        "from sklearn.metrics import classification_report\n",
+        "import seaborn as sns"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "12SA-qPFgsVo"
+      },
+      "source": [
+        "## Utils functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 28,
+      "metadata": {
+        "id": "WkIVcabUgxIl"
+      },
+      "outputs": [],
+      "source": [
+        "def resample_classes(df, classColumnName, numberOfInstances):\n",
+        "  #random numberOfInstances elements\n",
+        "  replace = False  # with replacement\n",
+        "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
+        "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
+        "    \n",
+        "# Function to calculate the accuracy of our predictions vs labels\n",
+        "def flat_accuracy(preds, labels):\n",
+        "  pred_flat = np.argmax(preds, axis=1).flatten()\n",
+        "  labels_flat = labels.flatten()\n",
+        "  return np.sum(pred_flat == labels_flat) / len(labels_flat) \n",
+        "\n",
+        "def format_time(elapsed):\n",
+        "  '''\n",
+        "  Takes a time in seconds and returns a string hh:mm:ss\n",
+        "  '''\n",
+        "  # Round to the nearest second.\n",
+        "  elapsed_rounded = int(round((elapsed)))\n",
+        "\n",
+        "  # Format as hh:mm:ss\n",
+        "  return str(datetime.timedelta(seconds=elapsed_rounded))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "c5QKcXulhNJ-"
+      },
+      "source": [
+        "## Load Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jdCdUVOTZrqh",
+        "outputId": "ac52be55-ed4b-4c50-dc8c-9124ca6b71e5"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "--2022-02-18 07:03:04--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 175634219 (167M) [text/tab-separated-values]\n",
+            "Saving to: â€˜training_set.tsvâ€™\n",
+            "\n",
+            "training_set.tsv    100%[===================>] 167.50M  27.7MB/s    in 6.8s    \n",
+            "\n",
+            "2022-02-18 07:03:11 (24.7 MB/s) - â€˜training_set.tsvâ€™ saved [175634219/175634219]\n",
+            "\n",
+            "--2022-02-18 07:03:11--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 42730598 (41M) [text/tab-separated-values]\n",
+            "Saving to: â€˜test_set.tsvâ€™\n",
+            "\n",
+            "test_set.tsv        100%[===================>]  40.75M  19.5MB/s    in 2.1s    \n",
+            "\n",
+            "2022-02-18 07:03:14 (19.5 MB/s) - â€˜test_set.tsvâ€™ saved [42730598/42730598]\n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
+        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9d1IxD_bLEvp"
+      },
+      "source": [
+        "### Loading dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "train_path = 'training_set.tsv'\n",
+        "test_path =  'test_set.tsv'"
+      ],
+      "metadata": {
+        "id": "7JEnKknRoClH"
+      },
+      "execution_count": 7,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "id": "5u1acjunhoxe",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 496
+        },
+        "outputId": "3038048d-6506-473d-85c9-2d3ebdcc6a72"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "\n",
+              "  <div id=\"df-5a25a6b1-21af-4b77-8ab1-a5df357d33f1\">\n",
+              "    <div class=\"colab-df-container\">\n",
+              "      <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>normClass</th>\n",
+              "      <th>classEDdA</th>\n",
+              "      <th>author</th>\n",
+              "      <th>id_enccre</th>\n",
+              "      <th>domaine_enccre</th>\n",
+              "      <th>ensemble_domaine_enccre</th>\n",
+              "      <th>content</th>\n",
+              "      <th>contentWithoutClass</th>\n",
+              "      <th>firstParagraph</th>\n",
+              "      <th>nb_words</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>15</td>\n",
+              "      <td>5</td>\n",
+              "      <td>SENACULE</td>\n",
+              "      <td>AntiquitÃ© romaine</td>\n",
+              "      <td>Antiq. rom.</td>\n",
+              "      <td>Jaucourt</td>\n",
+              "      <td>v15-4-0</td>\n",
+              "      <td>antiquitÃ©</td>\n",
+              "      <td>AntiquitÃ©</td>\n",
+              "      <td>SENACULE, s. m. (Antiq. rom.) senaculum:\\nlieu...</td>\n",
+              "      <td>senacule s. m.   senaculum \\n lieu oÃ¹ tenoit s...</td>\n",
+              "      <td>senacule s. m.   senaculum \\n lieu oÃ¹ tenoit s...</td>\n",
+              "      <td>91</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>8</td>\n",
+              "      <td>3509</td>\n",
+              "      <td>Investir</td>\n",
+              "      <td>Marine</td>\n",
+              "      <td>Marine.</td>\n",
+              "      <td>Le Blond</td>\n",
+              "      <td>v8-2689-1</td>\n",
+              "      <td>marine</td>\n",
+              "      <td>Marine</td>\n",
+              "      <td>Investir, (Marine.) se dit parmi les matelots\\...</td>\n",
+              "      <td>investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chou...</td>\n",
+              "      <td>investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chou...</td>\n",
+              "      <td>30</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>2</td>\n",
+              "      <td>3428</td>\n",
+              "      <td>BOYARDS, ou BOJARES, ou BOJARDS</td>\n",
+              "      <td>Histoire moderne</td>\n",
+              "      <td>Hist. mod.</td>\n",
+              "      <td>Mallet</td>\n",
+              "      <td>v2-2041-0</td>\n",
+              "      <td>histoire</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>BOYARDS, ou BOJARES, ou BOJARDS, s. m.\\npl. (H...</td>\n",
+              "      <td>boyard bojares bojards s. m. \\n pl   nom donne...</td>\n",
+              "      <td>boyard bojares bojards s. m. \\n pl   nom donne...</td>\n",
+              "      <td>218</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>2</td>\n",
+              "      <td>6532</td>\n",
+              "      <td>Cerceau</td>\n",
+              "      <td>Tonnelier</td>\n",
+              "      <td>Tonneliers</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v2-4266-3</td>\n",
+              "      <td>tonnelier</td>\n",
+              "      <td>MÃ©tiers</td>\n",
+              "      <td>Cerceau, c'est un lien de bois qui se plie fac...</td>\n",
+              "      <td>cerceau lien bois plie facilement \\n   servent...</td>\n",
+              "      <td>cerceau lien bois plie facilement \\n   servent...</td>\n",
+              "      <td>229</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>8</td>\n",
+              "      <td>1827</td>\n",
+              "      <td>HIERONYMITES, ou HERMITES DE S. JEROME</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v8-1404-0</td>\n",
+              "      <td>histoireecclÃ©siastique</td>\n",
+              "      <td>Religion</td>\n",
+              "      <td>HIERONYMITES, ou HERMITES DE S. JEROME, Voyez ...</td>\n",
+              "      <td>hieronymites hermites s. jerome jeronymites he...</td>\n",
+              "      <td>hieronymites hermites s. jerome jeronymites he...</td>\n",
+              "      <td>34</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5a25a6b1-21af-4b77-8ab1-a5df357d33f1')\"\n",
+              "              title=\"Convert this dataframe to an interactive table.\"\n",
+              "              style=\"display:none;\">\n",
+              "        \n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+              "  </svg>\n",
+              "      </button>\n",
+              "      \n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      flex-wrap:wrap;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "      <script>\n",
+              "        const buttonEl =\n",
+              "          document.querySelector('#df-5a25a6b1-21af-4b77-8ab1-a5df357d33f1 button.colab-df-convert');\n",
+              "        buttonEl.style.display =\n",
+              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "        async function convertToInteractive(key) {\n",
+              "          const element = document.querySelector('#df-5a25a6b1-21af-4b77-8ab1-a5df357d33f1');\n",
+              "          const dataTable =\n",
+              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                     [key], {});\n",
+              "          if (!dataTable) return;\n",
+              "\n",
+              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "            + ' to learn more about interactive tables.';\n",
+              "          element.innerHTML = '';\n",
+              "          dataTable['output_type'] = 'display_data';\n",
+              "          await google.colab.output.renderOutput(dataTable, element);\n",
+              "          const docLink = document.createElement('div');\n",
+              "          docLink.innerHTML = docLinkHtml;\n",
+              "          element.appendChild(docLink);\n",
+              "        }\n",
+              "      </script>\n",
+              "    </div>\n",
+              "  </div>\n",
+              "  "
+            ],
+            "text/plain": [
+              "   volume  numero  ...                                     firstParagraph nb_words\n",
+              "0      15       5  ...  senacule s. m.   senaculum \\n lieu oÃ¹ tenoit s...       91\n",
+              "1       8    3509  ...  investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chou...       30\n",
+              "2       2    3428  ...  boyard bojares bojards s. m. \\n pl   nom donne...      218\n",
+              "3       2    6532  ...  cerceau lien bois plie facilement \\n   servent...      229\n",
+              "4       8    1827  ...  hieronymites hermites s. jerome jeronymites he...       34\n",
+              "\n",
+              "[5 rows x 13 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 9
+        }
+      ],
+      "source": [
+        "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
+        "df_train.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "zj3JDoJNfx1f",
+        "outputId": "f1ec1fcf-b287-460a-8110-dbb00091c203"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(46807, 13)\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(df_train.shape)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Configuration"
+      ],
+      "metadata": {
+        "id": "dADYGtTcn4AB"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "columnText = 'contentWithoutClass'\n",
+        "columnClass = 'ensemble_domaine_enccre'\n",
+        "\n",
+        "maxOfInstancePerClass = 10000\n",
+        "\n",
+        "#model_chosen = \"bert\"\n",
+        "model_chosen = \"camembert\"\n",
+        "\n",
+        "batch_size = 8  # 16 or 32 recommended\n",
+        "max_len = 512\n",
+        "\n",
+        "path = \"drive/MyDrive/Classification-EDdA/\"\n",
+        "encoder_filename = \"label_encoder.pkl\""
+      ],
+      "metadata": {
+        "id": "I0OrfFsBn4Io"
+      },
+      "execution_count": 30,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Preprocessing"
+      ],
+      "metadata": {
+        "id": "t3brU-Yvn4XS"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "if maxOfInstancePerClass != 10000:\n",
+        "  df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)"
+      ],
+      "metadata": {
+        "id": "aQCLJE4Jtm7v"
+      },
+      "execution_count": 31,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 32,
+      "metadata": {
+        "id": "zrjZvs2dhzAy"
+      },
+      "outputs": [],
+      "source": [
+        "labels  = df_train[columnClass]\n",
+        "numberOfClasses = labels.nunique()\n",
+        "\n",
+        "\n",
+        "if os.path.isfile(path+encoder_filename):    \n",
+        "    # load existing encoder \n",
+        "    with open(path+encoder_filename, 'rb') as file:\n",
+        "      encoder = pickle.load(file)\n",
+        "\n",
+        "else:\n",
+        "  encoder = preprocessing.LabelEncoder()\n",
+        "  encoder.fit(labels)\n",
+        "\n",
+        "  with open(path+encoder_filename, 'wb') as file:\n",
+        "      pickle.dump(encoder, file)\n",
+        "\n",
+        "\n",
+        "labels = encoder.transform(labels)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 33,
+      "metadata": {
+        "id": "Xt_PhH_6h1_3"
+      },
+      "outputs": [],
+      "source": [
+        "sentences_train = df_train[columnText].values\n",
+        "labels_train = labels.tolist()\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 34,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Dq_KF5WAsbpC",
+        "outputId": "7925ce5a-4b9e-4147-fdc1-f2916d0e6600"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array(['senacule s. m.   senaculum \\n lieu oÃ¹ tenoit sÃ©nat rome \\n sÃ©nacules endroit oÃ¹ corps illustre assembloit \\n capitole forum \\n porte capÃ¨ne troisieme prÃ¨s temple \\n bellone cirque flaminien empereur hÃ©liogabale bÃ¢tir lieu assemblÃ©e dame \\n lieu appellÃ© senaculum matronarum d. j.',\n",
+              "       'investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chouer toucher \\n cÃ´te banc sable q',\n",
+              "       'boyard bojares bojards s. m. \\n pl   nom donne grand seigneur \\n moscovie becman boyard \\n chez russiens chose haute noblesse \\n pays auteur ajoÃ»te \\n acte public czar nomme boyard \\n avant waivodes waivodes \\n olÃ©arius voyage moscovie \\n grand principal membre conseil Ã©tat \\n moscou magnifiques hÃ´tel \\n obligÃ© suivre prince voyage \\n jour cÃ©rÃ©monie vÃªtus \\n tunique brocard enrichie perle couvert \\n grand bonnet fourrÃ© renard noir \\n prÃ©sident tribunal justice depuis \\n czar pierre ier a tirÃ© russie grossieretÃ© \\n oÃ¹ plongÃ©e a laissÃ© boyard \\n titre noblesse joÃ¼issent grand \\n considÃ©ration paroÃ®t ayent grand \\n part gouvernement gramme',\n",
+              "       ...,\n",
+              "       'salta gÃ©og mod ville ouverte amÃ©rique mÃ©ridional tueman petit riviere \\n midi s. salvador 15 lieue estreco quoique ville petit commerce \\n beaucoup avantageusement pÃ©rou blÃ© \\n farine bÃ©tail vin chair salÃ©e c. latit \\n mÃ©ridional 24 56 d. j.',\n",
+              "       'hydrochoos s. m.   constellation \\n nomme latin aquarius franÃ§ois \\n verseau signe zodiaque \\n composÃ© trente Ã©toile soleil \\n moi janvier tire nom grec latin \\n ordinairement pluvieux grece \\n italie nom franÃ§ois rÃ©pond \\n idÃ©e verseau d.',\n",
+              "       \"bois chauffage bois chauffage \\n flottÃ© marchand bois \\n embarquent port rivieres navigables \\n bois amenÃ©s charroi empilent \\n ensuite thÃ©atre voit \\n port place ville pari a accordÃ© \\n usage chantier sorte marchand \\n font guere tiers provision \\n ville c. \\n marchand bois flottÃ© font \\n venir bois province Ã©loignÃ©es \\n jettent abord bois perdu ruisseau entrent \\n rivieres lesquelles commerce \\n Ã©tabli ensuite mÃªme rivieres amenent \\n mÃªme encore bois perdu endroit \\n oÃ¹ possible mettre train conduire \\n pari nÃ©anmoins avoir rÃ©tirÃ©s \\n eau avant flotter train avoir \\n sÃ©cher suffisamment sans quoi bois iroit fond \\n marchand font tiers provision \\n a quelques siecles apprÃ©hension \\n pari manquÃ¢t jour bois chauffage forÃªt environs dÃ©truisoient \\n prÃ©voyoit jour faudroit transporter bois \\n province Ã©loignÃ©es rendroit marchandise \\n utile usage gÃ©nÃ©ral prix \\n exorbitant occasionnÃ© coÃ»t charroi \\n demandÃ© alors plÃ»part sentent \\n moins aujourd'hui mÃ©rite invention \\n flottage bois comment pourroit remÃ©dier \\n terrible inconvÃ©nient menacÃ© \\n auroient crois bien embarrassÃ©s accroissement \\n entretien forÃªt \\n apparence unique ressource effet \\n moyen long coÃ»teux pÃ©nibles rÃ©duisit \\n alors prudence gouvernement capitale \\n\\n\\n  point devenir beaucoup moins \\n habitÃ©e chÃ©retÃ© bois nommÃ© jean \\n rouvet bourgeois pari imagina 1549 rassembler \\n eau plusieurs ruisseau rivieres \\n non navigables jetter bois coupÃ© \\n forÃªt Ã©loignÃ©es descendre \\n grand rivieres lÃ  former train \\n amener flot sans bateau pari ose assÃ»rer invention utile \\n royaume plusieurs bataille gagnÃ©es mÃ©ritoit \\n honneur autant moins aucune beau action \\n jean rouvet premier essai morvant \\n rassembla ruisseau contrÃ©e \\n couper bois abandonna hardiment courant \\n eau rÃ©ussit projet traitÃ© folie \\n avant exÃ©cution traversÃ© succÃ¨s \\n coÃ»tume portÃ© perfection \\n reÃ§ut Ã©tendue susceptible \\n 1566 renÃ© arnoul article \\n train maniere construire voyent \\n arriver pari longue masse bois effrayÃ©s \\n conduisent approche \\n pont a guere remontent \\n Ã©tendue vÃ»es intrÃ©piditÃ© premier inventeur \\n osa rassembler eau grand frai \\n jetter ensuite reste fortune \\n marchand bois flottÃ© un \\n bourgeois forain a beaucoup \\n bourgeois forain fassent commerce \\n bois vient pays amont contraire \\n a beaucoup forain bourgeois \\n fassent commerce pays aval \\n concerne bois chauffage rÃ©duit \\n faÃ§on tems tirer vente voiture \\n dÃ©chargeage diligence voiture \\n arrivÃ©e vente chantier \\n officier veillent \\n faÃ§on enjoint donner bois \\n brÃ»ler piÃ©s demi longueur bois \\n moule dix-huit pouce tour bois corde \\n quartier traverse autant bois quartier \\n traverse fendu a dix-huit pouce tour \\n mesure moule a va \\n bois corde membrure bois taillis \\n avoir pouce tour bois andelle a \\n grosseur court a piÃ©s \\n demi environ \\n sortie vente marchand tenu \\n couper sortir bois vente tems \\n fixÃ© Ã©gard lieux \\n qualitÃ© arpens \\n voiture permis voiturer depuis forÃªt \\n rivieres travers terre \\n avertissant jour auparavant publication \\n prÃ´ne jetter bois rivieres \\n pousser ruisseau Ã©tang fossÃ© chÃ¢teau \\n c. sans puissent empÃªchÃ©s \\n \\n diligence dÃ©fendu sÃ©journer chemin \\n sans nÃ©cessitÃ© dÃ©charger ailleurs pari \\n vente enjoint mettre chantier \\n peuvent vendu ailleurs \\n officier ville commet personne \\n veiller distribution diffÃ©rence \\n a bois chauffage flotte \\n tire taille voiture mesure \\n relativement taille distribue gros bois \\n menu bois voiture bois bois \\n flottÃ© mesure bois moule compte \\n bois corde \\n gros bois compris sou nom gÃ©nÃ©rique \\n bÃ»che chaque bÃ»che bois \\n avoir dÃ©jÃ  \\n piÃ©s demi long \\n grosse bÃ»che nommÃ©es bois moule moulure compte mesurent \\n moule anneau anneau \\n doivent avoir dix-huit pouce tour \\n bois traverse suit immÃ©diatement grosseur \\n bois compte moule avoir dix-sept \\n pouce tour a comprennent sou \\n dÃ©nomination bois blanc \\n appelle bois taillis a \\n pouce tour \\n bois corde avoir moins dix-sept pouce \\n appellÃ© bois corde bucherons plantent corde pieu quarrÃ© \\n cÃ´tÃ© a piÃ©s chaque pieu a piÃ©s \\n haut -lÃ  mesure corde contient \\n voit foi 64 256 piÃ©s cube bois \\n mÃ©thode mesurer bois a durÃ© \\n 1641 ordonnÃ© servir membrure \\n charpente retint nom corde \\n corde membrure \\n menu bois coteret fagot bourrÃ©e \\n a coterets bois taillis fendu coterets bois rond \\n ci viennent yonne doivent \\n avoir un piÃ©s long dixsept \\n dix-huit pouce tour \\n fagot fait branche arbre menues \\n doivent avoir piÃ©s demi long dixsept \\n dix-huit pouce tour \\n bourrÃ©e espece fagot faite \\n brossailles Ã©pine ronce c. \\n voici encore quelques dÃ©nomination donne \\n bois chauffage\"],\n",
+              "      dtype=object)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 34
+        }
+      ],
+      "source": [
+        "sentences_train"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Gs4Agx_5h43M"
+      },
+      "source": [
+        "# Model\n",
+        "## Tokenisation & Input Formatting"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 35,
+      "metadata": {
+        "id": "YZ5PhEYZiCEA"
+      },
+      "outputs": [],
+      "source": [
+        "if model_chosen == \"bert\":\n",
+        "  tokeniser_bert = 'bert-base-multilingual-cased'\n",
+        "  model_bert =  \"bert-base-multilingual-cased\"\n",
+        "elif model_chosen == \"camembert\":\n",
+        "  tokeniser_bert = 'camembert-base'\n",
+        "  model_bert = 'camembert-base'\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 36,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 131,
+          "referenced_widgets": [
+            "274e505b5f354efc8de3ef26cc43e617",
+            "f1f9d5b32f60473b86ae6b340d6c0850",
+            "ad5e0e1439a94578a31b80c90dbf3247",
+            "0779c8ea0ed24e64a800ae5dff6bc8ce",
+            "7870340ac12b469c8ac19de3a47b6e67",
+            "5f321455342348f49879a9ca8b392077",
+            "9420a47a2bf44ead8cff283f20566cda",
+            "99b785ea53744868b8b11e5e94936fcc",
+            "8d24b669a39b4876ac0a014dff678db1",
+            "2cf386a8d14d43389374f79bfa922675",
+            "2c44d9c11e704b70aa32904a23d1790c",
+            "0279837673b446b09aac18346213eb7e",
+            "09b5f0bbd5c14bc289b0f92a22bb29ab",
+            "69004a5069094f8c9d59d5136f627bef",
+            "e96a95317b0945c58c8ff0e944c7593e",
+            "68b69c9d3a274900bc2892848f725cb0",
+            "76007b17ffd2478fa4a86f959d4f1766",
+            "cb447c62ce1d4c1ea760175ae619fbb9",
+            "d4ad1a78750d49feaea584a82940bb7d",
+            "a9c47cb226ee41e18812f29f690992eb",
+            "c4c1675163bd4997bb44d7ea3967a356",
+            "5032547e748f45a3b0cdd12fafe1dd05",
+            "8f467553598f4dcc9abf55da79c11018",
+            "9d7a8b3ecfe74f66b4238fe085c05906",
+            "58b4f9e0366f4d4eba7f902af84b8965",
+            "9383e09698ae4bd1820a4bca22e78315",
+            "a189838c4de648198b0f4fc99c29ced8",
+            "c4d981755d1d42b6940396b77bc251bc",
+            "12afa6b6474b401f9ff3f189cc0d3d11",
+            "5978954f56fb40928b970f32d1634aaf",
+            "fe0e3b1df104484c98fbdcd31a04e427",
+            "2d1d632da0f740c38512c9ad779d3173",
+            "df95c20399dd4918bc7559a90886d4aa"
+          ]
+        },
+        "id": "C4bigx_3ibuN",
+        "outputId": "ebcca5ee-85d8-4525-c9ad-9fc3b5c1741d"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Loading CamemBERT tokenizer...\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "274e505b5f354efc8de3ef26cc43e617",
+              "version_minor": 0,
+              "version_major": 2
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/811k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "0279837673b446b09aac18346213eb7e",
+              "version_minor": 0,
+              "version_major": 2
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/1.40M [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "8f467553598f4dcc9abf55da79c11018",
+              "version_minor": 0,
+              "version_major": 2
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/508 [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {}
+        }
+      ],
+      "source": [
+        "# Load the BERT tokenizer.\n",
+        "if model_chosen == \"bert\":\n",
+        "  print('Loading BERT tokenizer...')\n",
+        "  tokenizer = BertTokenizer.from_pretrained(tokeniser_bert)\n",
+        "elif model_chosen == \"camembert\":\n",
+        "  print('Loading CamemBERT tokenizer...')\n",
+        "  tokenizer = CamembertTokenizer.from_pretrained(tokeniser_bert)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 37,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5hNod5X9jDZN",
+        "outputId": "bca0db0e-7463-40cd-8052-1712965c7a95"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Token indices sequence length is longer than the specified maximum sequence length for this model (531 > 512). Running this sequence through the model will result in indexing errors\n"
+          ]
+        }
+      ],
+      "source": [
+        " # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+        "input_ids_train = []\n",
+        "\n",
+        "# For every sentence...\n",
+        "for sent in sentences_train:\n",
+        "    # `encode` will:\n",
+        "    #   (1) Tokenize the sentence.\n",
+        "    #   (2) Prepend the `[CLS]` token to the start.\n",
+        "    #   (3) Append the `[SEP]` token to the end.\n",
+        "    #   (4) Map tokens to their IDs.\n",
+        "    encoded_sent_train = tokenizer.encode(\n",
+        "                        str(sent),                      # Sentence to encode.\n",
+        "                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+        "\n",
+        "                        # This function also supports truncation and conversion\n",
+        "                        # to pytorch tensors, but I need to do padding, so I\n",
+        "                        # can't use these features.\n",
+        "                        #max_length = 128,          # Truncate all sentences.\n",
+        "                        #return_tensors = 'pt',     # Return pytorch tensors.\n",
+        "                   )\n",
+        "    \n",
+        "    # Add the encoded sentence to the list.\n",
+        "    input_ids_train.append(encoded_sent_train)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 38,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "W9EWv5JvjGH3",
+        "outputId": "dde87708-7bcb-47c7-af71-2ec2b2e0c2db"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Max sentence length train:  38073\n"
+          ]
+        }
+      ],
+      "source": [
+        "print('Max sentence length train: ', max([len(sen) for sen in input_ids_train]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 39,
+      "metadata": {
+        "id": "xh1TQJyvjOx5"
+      },
+      "outputs": [],
+      "source": [
+        "\n",
+        "padded_train = []\n",
+        "for i in input_ids_train:\n",
+        "\n",
+        "  if len(i) > max_len:\n",
+        "    padded_train.extend([i[:max_len]])\n",
+        "  else:\n",
+        "    padded_train.extend([i + [0] * (max_len - len(i))])\n",
+        "\n",
+        "\n",
+        "padded_train = input_ids_train = np.array(padded_train)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 40,
+      "metadata": {
+        "id": "ZiwY6gn0jUkD"
+      },
+      "outputs": [],
+      "source": [
+        " # Create attention masks\n",
+        "attention_masks_train = []\n",
+        "\n",
+        "# For each sentence...\n",
+        "for sent in padded_train:\n",
+        "    \n",
+        "    # Create the attention mask.\n",
+        "    #   - If a token ID is 0, then it's padding, set the mask to 0.\n",
+        "    #   - If a token ID is > 0, then it's a real token, set the mask to 1.\n",
+        "    att_mask = [int(token_id > 0) for token_id in sent]\n",
+        "    \n",
+        "    # Store the attention mask for this sentence.\n",
+        "    attention_masks_train.append(att_mask)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 41,
+      "metadata": {
+        "id": "oBTR5AfAjXJe"
+      },
+      "outputs": [],
+      "source": [
+        "# Use 70% for training and 30% for validation.\n",
+        "#train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(padded, labels, \n",
+        "#                                                            random_state=2018, test_size=0.3, stratify = labels)\n",
+        "# Do the same for the masks.\n",
+        "#train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,\n",
+        "#                                             random_state=2018, test_size=0.3, stratify = labels)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 42,
+      "metadata": {
+        "id": "b9Mw5kq3jhTb"
+      },
+      "outputs": [],
+      "source": [
+        "# Convert all inputs and labels into torch tensors, the required datatype \n",
+        "# for my model.\n",
+        "train_inputs = torch.tensor(padded_train)\n",
+        "\n",
+        "train_labels = torch.tensor(labels_train)\n",
+        "\n",
+        "train_masks = torch.tensor(attention_masks_train)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 43,
+      "metadata": {
+        "id": "UfFWzbENjnkw"
+      },
+      "outputs": [],
+      "source": [
+        "# The DataLoader needs to know the batch size for training, so I specify it here.\n",
+        "# For fine-tuning BERT on a specific task, the authors recommend a batch size of\n",
+        "# 16 or 32.\n",
+        "\n",
+        "# Create the DataLoader for training set.\n",
+        "train_data = TensorDataset(train_inputs, train_masks, train_labels)\n",
+        "train_sampler = RandomSampler(train_data)\n",
+        "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "x45JNGqhkUn2"
+      },
+      "source": [
+        "## Training"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 44,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000,
+          "referenced_widgets": [
+            "d09d664839d04303b8fef9ef895f6e4f",
+            "500826e3813b414a820aa260bfde9e23",
+            "70dd7428d78c44409308d62ba04917de",
+            "152a31110bf9477989833eac91794688",
+            "fcde5f4cf49846a0ad1b284aad98a38a",
+            "1bf6a76237454349aafc1e9284376879",
+            "4a23110523184d019a77368116f738f3",
+            "e86a1d4d268c4314897b58f7bba5ec25",
+            "826bd7d0a1f146ea9f7d53584468190c",
+            "3592b1ed1d6d452b93c57b304943a1cb",
+            "a159d62667734657a49ba3a96494f137"
+          ]
+        },
+        "id": "C7M2Er1ajsTf",
+        "outputId": "151034cd-9a77-413e-a61e-561c97b4072e"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "d09d664839d04303b8fef9ef895f6e4f",
+              "version_minor": 0,
+              "version_major": 2
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/445M [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Some weights of the model checkpoint at camembert-base were not used when initializing CamembertForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']\n",
+            "- This IS expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+            "- This IS NOT expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+            "Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']\n",
+            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "CamembertForSequenceClassification(\n",
+              "  (roberta): RobertaModel(\n",
+              "    (embeddings): RobertaEmbeddings(\n",
+              "      (word_embeddings): Embedding(32005, 768, padding_idx=1)\n",
+              "      (position_embeddings): Embedding(514, 768, padding_idx=1)\n",
+              "      (token_type_embeddings): Embedding(1, 768)\n",
+              "      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "      (dropout): Dropout(p=0.1, inplace=False)\n",
+              "    )\n",
+              "    (encoder): RobertaEncoder(\n",
+              "      (layer): ModuleList(\n",
+              "        (0): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (1): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (2): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (3): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (4): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (5): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (6): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (7): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (8): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (9): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (10): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "        (11): RobertaLayer(\n",
+              "          (attention): RobertaAttention(\n",
+              "            (self): RobertaSelfAttention(\n",
+              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "            (output): RobertaSelfOutput(\n",
+              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "              (dropout): Dropout(p=0.1, inplace=False)\n",
+              "            )\n",
+              "          )\n",
+              "          (intermediate): RobertaIntermediate(\n",
+              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+              "          )\n",
+              "          (output): RobertaOutput(\n",
+              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+              "            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
+              "            (dropout): Dropout(p=0.1, inplace=False)\n",
+              "          )\n",
+              "        )\n",
+              "      )\n",
+              "    )\n",
+              "  )\n",
+              "  (classifier): RobertaClassificationHead(\n",
+              "    (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+              "    (dropout): Dropout(p=0.1, inplace=False)\n",
+              "    (out_proj): Linear(in_features=768, out_features=38, bias=True)\n",
+              "  )\n",
+              ")"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 44
+        }
+      ],
+      "source": [
+        "# Load BertForSequenceClassification, the pretrained BERT model with a single \n",
+        "# linear classification layer on top.\n",
+        "\n",
+        "#model = CamembertForSequenceClassification.from_pretrained(\n",
+        "if model_chosen == \"bert\":\n",
+        "  model = BertForSequenceClassification.from_pretrained(\n",
+        "      model_bert, # Use the 12-layer BERT model, with an uncased vocab.\n",
+        "      num_labels = numberOfClasses, # The number of output labels--2 for binary classification.\n",
+        "                      # You can increase this for multi-class tasks.   \n",
+        "      output_attentions = False, # Whether the model returns attentions weights.\n",
+        "      output_hidden_states = False, # Whether the model returns all hidden-states.\n",
+        "  )\n",
+        "elif model_chosen == \"camembert\":\n",
+        "  model = CamembertForSequenceClassification.from_pretrained(\n",
+        "      model_bert, # Use the 12-layer BERT model, with an uncased vocab.\n",
+        "      num_labels = numberOfClasses, # The number of output labels--2 for binary classification.\n",
+        "                      # You can increase this for multi-class tasks.   \n",
+        "      output_attentions = False, # Whether the model returns attentions weights.\n",
+        "      output_hidden_states = False, # Whether the model returns all hidden-states.\n",
+        "  )\n",
+        "\n",
+        "# Tell pytorch to run this model on the GPU.\n",
+        "model.cuda()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 45,
+      "metadata": {
+        "id": "xd_cG-8pj4Iw"
+      },
+      "outputs": [],
+      "source": [
+        "#Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n",
+        "# I believe the 'W' stands for 'Weight Decay fix\"\n",
+        "optimizer = AdamW(model.parameters(),\n",
+        "                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
+        "                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n",
+        "                )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 46,
+      "metadata": {
+        "id": "65G-uHuLj4_6"
+      },
+      "outputs": [],
+      "source": [
+        "# Number of training epochs (authors recommend between 2 and 4)\n",
+        "epochs = 4\n",
+        "\n",
+        "# Total number of training steps is number of batches * number of epochs.\n",
+        "total_steps = len(train_dataloader) * epochs\n",
+        "\n",
+        "# Create the learning rate scheduler.\n",
+        "scheduler = get_linear_schedule_with_warmup(optimizer, \n",
+        "                                            num_warmup_steps = 0, # Default value in run_glue.py\n",
+        "                                            num_training_steps = total_steps)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "SbHBbYpwkKaA",
+        "outputId": "4cd1be4a-6014-4804-df56-f38e98039797"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "======== Epoch 1 / 4 ========\n",
+            "Training...\n",
+            "  Batch    40  of  5,851.    Elapsed: 0:00:19.\n",
+            "  Batch    80  of  5,851.    Elapsed: 0:00:37.\n",
+            "  Batch   120  of  5,851.    Elapsed: 0:00:55.\n",
+            "  Batch   160  of  5,851.    Elapsed: 0:01:14.\n",
+            "  Batch   200  of  5,851.    Elapsed: 0:01:32.\n",
+            "  Batch   240  of  5,851.    Elapsed: 0:01:51.\n",
+            "  Batch   280  of  5,851.    Elapsed: 0:02:09.\n",
+            "  Batch   320  of  5,851.    Elapsed: 0:02:28.\n",
+            "  Batch   360  of  5,851.    Elapsed: 0:02:46.\n",
+            "  Batch   400  of  5,851.    Elapsed: 0:03:04.\n",
+            "  Batch   440  of  5,851.    Elapsed: 0:03:23.\n",
+            "  Batch   480  of  5,851.    Elapsed: 0:03:41.\n",
+            "  Batch   520  of  5,851.    Elapsed: 0:04:00.\n",
+            "  Batch   560  of  5,851.    Elapsed: 0:04:18.\n",
+            "  Batch   600  of  5,851.    Elapsed: 0:04:37.\n",
+            "  Batch   640  of  5,851.    Elapsed: 0:04:55.\n",
+            "  Batch   680  of  5,851.    Elapsed: 0:05:14.\n",
+            "  Batch   720  of  5,851.    Elapsed: 0:05:32.\n",
+            "  Batch   760  of  5,851.    Elapsed: 0:05:50.\n",
+            "  Batch   800  of  5,851.    Elapsed: 0:06:09.\n",
+            "  Batch   840  of  5,851.    Elapsed: 0:06:27.\n",
+            "  Batch   880  of  5,851.    Elapsed: 0:06:46.\n",
+            "  Batch   920  of  5,851.    Elapsed: 0:07:04.\n",
+            "  Batch   960  of  5,851.    Elapsed: 0:07:23.\n",
+            "  Batch 1,000  of  5,851.    Elapsed: 0:07:41.\n",
+            "  Batch 1,040  of  5,851.    Elapsed: 0:08:00.\n",
+            "  Batch 1,080  of  5,851.    Elapsed: 0:08:18.\n",
+            "  Batch 1,120  of  5,851.    Elapsed: 0:08:37.\n",
+            "  Batch 1,160  of  5,851.    Elapsed: 0:08:55.\n",
+            "  Batch 1,200  of  5,851.    Elapsed: 0:09:14.\n",
+            "  Batch 1,240  of  5,851.    Elapsed: 0:09:32.\n",
+            "  Batch 1,280  of  5,851.    Elapsed: 0:09:51.\n",
+            "  Batch 1,320  of  5,851.    Elapsed: 0:10:09.\n",
+            "  Batch 1,360  of  5,851.    Elapsed: 0:10:28.\n",
+            "  Batch 1,400  of  5,851.    Elapsed: 0:10:46.\n",
+            "  Batch 1,440  of  5,851.    Elapsed: 0:11:05.\n",
+            "  Batch 1,480  of  5,851.    Elapsed: 0:11:23.\n",
+            "  Batch 1,520  of  5,851.    Elapsed: 0:11:42.\n",
+            "  Batch 1,560  of  5,851.    Elapsed: 0:12:00.\n",
+            "  Batch 1,600  of  5,851.    Elapsed: 0:12:19.\n",
+            "  Batch 1,640  of  5,851.    Elapsed: 0:12:37.\n",
+            "  Batch 1,680  of  5,851.    Elapsed: 0:12:56.\n",
+            "  Batch 1,720  of  5,851.    Elapsed: 0:13:14.\n",
+            "  Batch 1,760  of  5,851.    Elapsed: 0:13:33.\n",
+            "  Batch 1,800  of  5,851.    Elapsed: 0:13:51.\n",
+            "  Batch 1,840  of  5,851.    Elapsed: 0:14:10.\n",
+            "  Batch 1,880  of  5,851.    Elapsed: 0:14:28.\n",
+            "  Batch 1,920  of  5,851.    Elapsed: 0:14:47.\n",
+            "  Batch 1,960  of  5,851.    Elapsed: 0:15:05.\n",
+            "  Batch 2,000  of  5,851.    Elapsed: 0:15:23.\n",
+            "  Batch 2,040  of  5,851.    Elapsed: 0:15:42.\n",
+            "  Batch 2,080  of  5,851.    Elapsed: 0:16:00.\n",
+            "  Batch 2,120  of  5,851.    Elapsed: 0:16:19.\n",
+            "  Batch 2,160  of  5,851.    Elapsed: 0:16:37.\n",
+            "  Batch 2,200  of  5,851.    Elapsed: 0:16:56.\n",
+            "  Batch 2,240  of  5,851.    Elapsed: 0:17:14.\n",
+            "  Batch 2,280  of  5,851.    Elapsed: 0:17:32.\n",
+            "  Batch 2,320  of  5,851.    Elapsed: 0:17:51.\n",
+            "  Batch 2,360  of  5,851.    Elapsed: 0:18:09.\n",
+            "  Batch 2,400  of  5,851.    Elapsed: 0:18:28.\n",
+            "  Batch 2,440  of  5,851.    Elapsed: 0:18:46.\n",
+            "  Batch 2,480  of  5,851.    Elapsed: 0:19:04.\n",
+            "  Batch 2,520  of  5,851.    Elapsed: 0:19:23.\n",
+            "  Batch 2,560  of  5,851.    Elapsed: 0:19:41.\n",
+            "  Batch 2,600  of  5,851.    Elapsed: 0:20:00.\n",
+            "  Batch 2,640  of  5,851.    Elapsed: 0:20:18.\n",
+            "  Batch 2,680  of  5,851.    Elapsed: 0:20:37.\n",
+            "  Batch 2,720  of  5,851.    Elapsed: 0:20:55.\n",
+            "  Batch 2,760  of  5,851.    Elapsed: 0:21:13.\n",
+            "  Batch 2,800  of  5,851.    Elapsed: 0:21:32.\n",
+            "  Batch 2,840  of  5,851.    Elapsed: 0:21:50.\n",
+            "  Batch 2,880  of  5,851.    Elapsed: 0:22:09.\n",
+            "  Batch 2,920  of  5,851.    Elapsed: 0:22:27.\n",
+            "  Batch 2,960  of  5,851.    Elapsed: 0:22:46.\n",
+            "  Batch 3,000  of  5,851.    Elapsed: 0:23:04.\n",
+            "  Batch 3,040  of  5,851.    Elapsed: 0:23:22.\n",
+            "  Batch 3,080  of  5,851.    Elapsed: 0:23:41.\n",
+            "  Batch 3,120  of  5,851.    Elapsed: 0:23:59.\n",
+            "  Batch 3,160  of  5,851.    Elapsed: 0:24:18.\n",
+            "  Batch 3,200  of  5,851.    Elapsed: 0:24:36.\n",
+            "  Batch 3,240  of  5,851.    Elapsed: 0:24:54.\n",
+            "  Batch 3,280  of  5,851.    Elapsed: 0:25:13.\n",
+            "  Batch 3,320  of  5,851.    Elapsed: 0:25:31.\n",
+            "  Batch 3,360  of  5,851.    Elapsed: 0:25:50.\n",
+            "  Batch 3,400  of  5,851.    Elapsed: 0:26:08.\n",
+            "  Batch 3,440  of  5,851.    Elapsed: 0:26:27.\n",
+            "  Batch 3,480  of  5,851.    Elapsed: 0:26:45.\n",
+            "  Batch 3,520  of  5,851.    Elapsed: 0:27:04.\n",
+            "  Batch 3,560  of  5,851.    Elapsed: 0:27:23.\n",
+            "  Batch 3,600  of  5,851.    Elapsed: 0:27:41.\n",
+            "  Batch 3,640  of  5,851.    Elapsed: 0:27:59.\n",
+            "  Batch 3,680  of  5,851.    Elapsed: 0:28:18.\n",
+            "  Batch 3,720  of  5,851.    Elapsed: 0:28:36.\n",
+            "  Batch 3,760  of  5,851.    Elapsed: 0:28:55.\n",
+            "  Batch 3,800  of  5,851.    Elapsed: 0:29:13.\n"
+          ]
+        }
+      ],
+      "source": [
+        "# This training code is based on the `run_glue.py` script here:\n",
+        "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n",
+        "\n",
+        "# Set the seed value all over the place to make this reproducible.\n",
+        "seed_val = 42\n",
+        "\n",
+        "random.seed(seed_val)\n",
+        "np.random.seed(seed_val)\n",
+        "torch.manual_seed(seed_val)\n",
+        "torch.cuda.manual_seed_all(seed_val)\n",
+        "\n",
+        "# Store the average loss after each epoch so I can plot them.\n",
+        "loss_values = []\n",
+        "\n",
+        "# For each epoch...\n",
+        "for epoch_i in range(0, epochs):\n",
+        "    \n",
+        "    # ========================================\n",
+        "    #               Training\n",
+        "    # ========================================\n",
+        "    \n",
+        "    # Perform one full pass over the training set.\n",
+        "\n",
+        "    print(\"\")\n",
+        "    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
+        "    print('Training...')\n",
+        "\n",
+        "    # Measure how long the training epoch takes.\n",
+        "    t0 = time.time()\n",
+        "\n",
+        "    # Reset the total loss for this epoch.\n",
+        "    total_loss = 0\n",
+        "\n",
+        "    # Put the model into training mode.\n",
+        "    model.train()\n",
+        "\n",
+        "    # For each batch of training data...\n",
+        "    for step, batch in enumerate(train_dataloader):\n",
+        "\n",
+        "        # Progress update every 40 batches.\n",
+        "        if step % 40 == 0 and not step == 0:\n",
+        "            # Calculate elapsed time in minutes.\n",
+        "            elapsed = format_time(time.time() - t0)\n",
+        "            \n",
+        "            # Report progress.\n",
+        "            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n",
+        "\n",
+        "        # Unpack this training batch from the dataloader. \n",
+        "        #\n",
+        "        # As I unpack the batch, I'll also copy each tensor to the GPU using the \n",
+        "        # `to` method.\n",
+        "        #\n",
+        "        # `batch` contains three pytorch tensors:\n",
+        "        #   [0]: input ids \n",
+        "        #   [1]: attention masks\n",
+        "        #   [2]: labels \n",
+        "        b_input_ids = batch[0].to(device)\n",
+        "        b_input_mask = batch[1].to(device)\n",
+        "        b_labels = batch[2].to(device)\n",
+        "\n",
+        "        # Always clear any previously calculated gradients before performing a\n",
+        "        # backward pass. PyTorch doesn't do this automatically because \n",
+        "        # accumulating the gradients is \"convenient while training RNNs\". \n",
+        "        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n",
+        "        model.zero_grad()        \n",
+        "\n",
+        "        # Perform a forward pass (evaluate the model on this training batch).\n",
+        "        # This will return the loss (rather than the model output) because I\n",
+        "        # have provided the `labels`.\n",
+        "        # The documentation for this `model` function is here: \n",
+        "        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
+        "        outputs = model(b_input_ids, \n",
+        "                    token_type_ids=None, \n",
+        "                    attention_mask=b_input_mask, \n",
+        "                    labels=b_labels)\n",
+        "        \n",
+        "        # The call to `model` always returns a tuple, so I need to pull the \n",
+        "        # loss value out of the tuple.\n",
+        "        loss = outputs[0]\n",
+        "\n",
+        "        # Accumulate the training loss over all of the batches so that I can\n",
+        "        # calculate the average loss at the end. `loss` is a Tensor containing a\n",
+        "        # single value; the `.item()` function just returns the Python value \n",
+        "        # from the tensor.\n",
+        "        total_loss += loss.item()\n",
+        "\n",
+        "        # Perform a backward pass to calculate the gradients.\n",
+        "        loss.backward()\n",
+        "\n",
+        "        # Clip the norm of the gradients to 1.0.\n",
+        "        # This is to help prevent the \"exploding gradients\" problem.\n",
+        "        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
+        "\n",
+        "        # Update parameters and take a step using the computed gradient.\n",
+        "        # The optimizer dictates the \"update rule\"--how the parameters are\n",
+        "        # modified based on their gradients, the learning rate, etc.\n",
+        "        optimizer.step()\n",
+        "\n",
+        "        # Update the learning rate.\n",
+        "        scheduler.step()\n",
+        "\n",
+        "    # Calculate the average loss over the training data.\n",
+        "    avg_train_loss = total_loss / len(train_dataloader)            \n",
+        "    \n",
+        "    # Store the loss value for plotting the learning curve.\n",
+        "    loss_values.append(avg_train_loss)\n",
+        "\n",
+        "    print(\"\")\n",
+        "    print(\"  Average training loss: {0:.2f}\".format(avg_train_loss))\n",
+        "    print(\"  Training epoch took: {:}\".format(format_time(time.time() - t0)))\n",
+        "      \n",
+        "print(\"\")\n",
+        "print(\"Training complete!\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uEe7lPtVKpIY"
+      },
+      "source": [
+        "## Saving model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AYCSVm_wKnuM"
+      },
+      "outputs": [],
+      "source": [
+        "name = model_bert+\"_s\"+str(maxOfInstancePerClass)\n",
+        "model_path = path+\"model_\"+name+\".pt\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qmsxrOqjCsGo"
+      },
+      "outputs": [],
+      "source": [
+        "torch.save(model, model_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pM9bSsckCndR"
+      },
+      "source": [
+        "## Loading model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 21,
+      "metadata": {
+        "id": "cEycmiS8Cnjw"
+      },
+      "outputs": [],
+      "source": [
+        "model = torch.load(model_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VJwyfmakkQyj"
+      },
+      "source": [
+        "## Evaluation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {
+        "id": "K9qdtYexIIvk"
+      },
+      "outputs": [],
+      "source": [
+        "def evaluate_bert(data, labels, model, batch_size):\n",
+        "  # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+        "  input_ids = []\n",
+        "  # For every sentence...\n",
+        "  for sent in data:\n",
+        "      # `encode` will:\n",
+        "      #   (1) Tokenize the sentence.\n",
+        "      #   (2) Prepend the `[CLS]` token to the start.\n",
+        "      #   (3) Append the `[SEP]` token to the end.\n",
+        "      #   (4) Map tokens to their IDs.\n",
+        "      encoded_sent = tokenizer.encode(\n",
+        "                          str(sent),                      # Sentence to encode.\n",
+        "                          add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+        "                  )\n",
+        "      \n",
+        "      input_ids.append(encoded_sent)\n",
+        "\n",
+        "  # Pad our input tokens\n",
+        "  padded = []\n",
+        "  for i in input_ids:\n",
+        "\n",
+        "    if len(i) > max_len:\n",
+        "      padded.extend([i[:max_len]])\n",
+        "    else:\n",
+        "      padded.extend([i + [0] * (max_len - len(i))])\n",
+        "  input_ids = np.array(padded)\n",
+        "\n",
+        "  # Create attention masks\n",
+        "  attention_masks = []\n",
+        "\n",
+        "  # Create a mask of 1s for each token followed by 0s for padding\n",
+        "  for seq in input_ids:\n",
+        "      seq_mask = [float(i>0) for i in seq]\n",
+        "      attention_masks.append(seq_mask) \n",
+        "\n",
+        "  # Convert to tensors.\n",
+        "  prediction_inputs = torch.tensor(input_ids)\n",
+        "  prediction_masks = torch.tensor(attention_masks)\n",
+        "  prediction_labels = torch.tensor(labels)\n",
+        "\n",
+        "  # Create the DataLoader.\n",
+        "  prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)\n",
+        "  prediction_sampler = SequentialSampler(prediction_data)\n",
+        "  prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n",
+        "\n",
+        "  print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs)))\n",
+        "\n",
+        "  # Put model in evaluation mode\n",
+        "  model.eval()\n",
+        "\n",
+        "  # Tracking variables \n",
+        "  predictions , true_labels = [], []\n",
+        "\n",
+        "  # Predict \n",
+        "  for batch in prediction_dataloader:\n",
+        "  # Add batch to GPU\n",
+        "      batch = tuple(t.to(device) for t in batch)\n",
+        "      \n",
+        "      # Unpack the inputs from the dataloader\n",
+        "      b_input_ids, b_input_mask, b_labels = batch\n",
+        "      \n",
+        "      # Telling the model not to compute or store gradients, saving memory and \n",
+        "      # speeding up prediction\n",
+        "      with torch.no_grad():\n",
+        "          # Forward pass, calculate logit predictions\n",
+        "          outputs = model(b_input_ids, token_type_ids=None, \n",
+        "                          attention_mask=b_input_mask)\n",
+        "\n",
+        "      logits = outputs[0]\n",
+        "      #print(logits)\n",
+        "\n",
+        "      # Move logits and labels to CPU\n",
+        "      logits = logits.detach().cpu().numpy()\n",
+        "      label_ids = b_labels.to('cpu').numpy()\n",
+        "      #print(logits)\n",
+        "      \n",
+        "      # Store predictions and true labels\n",
+        "      predictions.append(logits)\n",
+        "      true_labels.append(label_ids)\n",
+        "\n",
+        "  print('    DONE.')\n",
+        "\n",
+        "\n",
+        "  pred_labels = []\n",
+        "\n",
+        "  # Evaluate each test batch using many matrics\n",
+        "  print('Calculating the matrics for each batch...')\n",
+        "\n",
+        "  for i in range(len(true_labels)):\n",
+        "    \n",
+        "    # The predictions for this batch are a 2-column ndarray (one column for \"0\" \n",
+        "    # and one column for \"1\"). Pick the label with the highest value and turn this\n",
+        "    # in to a list of 0s and 1s.\n",
+        "    pred_labels_i = np.argmax(predictions[i], axis=1).flatten()\n",
+        "    pred_labels.append(pred_labels_i)\n",
+        "\n",
+        "\n",
+        "  pred_labels_ = [item for sublist in pred_labels for item in sublist]\n",
+        "  true_labels_ = [item for sublist in true_labels for item in sublist]\n",
+        "\n",
+        "  return pred_labels_, true_labels_"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dPjV_5g8DDQy"
+      },
+      "outputs": [],
+      "source": [
+        "dataset = \"test\"\n",
+        "\n",
+        "df_eval = pd.read_csv(dataset+\"_set.tsv\", sep=\"\\t\")\n",
+        "\n",
+        "data_eval = df_eval[columnText].values\n",
+        "\n",
+        "y = df_eval[columnClass]\n",
+        "\n",
+        "\n",
+        "\n",
+        "y = encoder.transform(y)\n",
+        "labels = y.tolist()\n",
+        "\n",
+        "\n",
+        "model_path = path+\"/model_\"+model_bert+\"_s\"+str(maxOfInstancePerClass)+\".pt\"\n",
+        "model = torch.load(model_path)\n",
+        "\n",
+        "if model_bert == \"bert-base-multilingual-cased\":\n",
+        "  tokenizer = BertTokenizer.from_pretrained(model_bert)\n",
+        "elif model_bert == \"camembert-base\":\n",
+        "  tokenizer = CamembertTokenizer.from_pretrained(model_bert)\n",
+        "\n",
+        "pred_labels_, true_labels_ = evaluate_bert(data_eval, labels, model, batch_size)\n",
+        "\n",
+        "\n",
+        "report = classification_report(true_labels_, pred_labels_,  output_dict = True)\n",
+        "    \n",
+        "classes = [str(e) for e in encoder.transform(encoder.classes_)]\n",
+        "classesName = encoder.classes_\n",
+        "\n",
+        "precision = []\n",
+        "recall = []\n",
+        "f1 = []\n",
+        "support = []\n",
+        "dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
+        "for c in classes:\n",
+        "  precision.append(report[c]['precision'])\n",
+        "  recall.append(report[c]['recall'])\n",
+        "  f1.append(report[c]['f1-score'])\n",
+        "  support.append(report[c]['support'])\n",
+        "\n",
+        "accuracy = report['accuracy']\n",
+        "weighted_avg = report['weighted avg']\n",
+        "cnf_matrix = confusion_matrix(true_labels_, pred_labels_)\n",
+        "FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
+        "FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
+        "TP = np.diag(cnf_matrix)\n",
+        "TN = cnf_matrix.sum() - (FP + FN + TP)\n",
+        "\n",
+        "dff['className'] = classesName\n",
+        "dff['precision'] = precision\n",
+        "dff['recall'] = recall\n",
+        "dff['f1-score'] = f1\n",
+        "dff['support'] = support\n",
+        "dff['FP'] = FP\n",
+        "dff['FN'] = FN\n",
+        "dff['TP'] = TP\n",
+        "dff['TN'] = TN\n",
+        "\n",
+        "print(name)\n",
+        "\n",
+        "name = \"test_\"+ name\n",
+        "content = name + \"\\n\"\n",
+        "print(name)\n",
+        "content += str(weighted_avg) + \"\\n\"\n",
+        "\n",
+        "print(weighted_avg)\n",
+        "print(accuracy)\n",
+        "print(dff)\n",
+        "\n",
+        "dff.to_csv(path+\"/report_\"+name+\".csv\", index=False)\n",
+        "# enregistrer les predictions\n",
+        "pd.DataFrame({'labels': pd.Series(true_labels_), 'predictions': pd.Series(pred_labels_)}).to_csv(path+\"/predictions/predictions_\"+name+\".csv\")\n",
+        "\n",
+        "with open(path+\"reports/report_\"+name+\".txt\", 'w') as f:\n",
+        "  f.write(content)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cVdM4eT6I8g2"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HzxyFO3knanV"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KDRPPw4Wnap7"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DX81R2dcnasF"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wgfqJFVeJMK1"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GqEf5_41JMNZ"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "x_n57EvhJMQh"
+      },
+      "outputs": [],
+      "source": [
+        "model_path = \"drive/MyDrive/Classification-EDdA/model_bert-base-multilingual-cased_s10000.pt\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "R3_9tA9MI8ju"
+      },
+      "outputs": [],
+      "source": [
+        "model = torch.load(model_path)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_fzgS5USJeAF",
+        "outputId": "be4a5506-76ed-4eef-bb3c-fe2bb77c6e4d"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "--2021-09-30 19:38:22--  https://projet.liris.cnrs.fr/geode/files/datasets/EDdA/Classification/LGE_withContent.tsv\n",
+            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
+            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 356197 (348K) [text/tab-separated-values]\n",
+            "Saving to: â€˜LGE_withContent.tsvâ€™\n",
+            "\n",
+            "LGE_withContent.tsv 100%[===================>] 347.85K   567KB/s    in 0.6s    \n",
+            "\n",
+            "2021-09-30 19:38:24 (567 KB/s) - â€˜LGE_withContent.tsvâ€™ saved [356197/356197]\n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "!wget https://projet.liris.cnrs.fr/geode/files/datasets/EDdA/Classification/LGE_withContent.tsv"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8WEJjQC7I8mP"
+      },
+      "outputs": [],
+      "source": [
+        "df_LGE = pd.read_csv(\"LGE_withContent.tsv\", sep=\"\\t\")\n",
+        "data_LGE = df_LGE[\"content\"].values\n",
+        "\n",
+        "\n",
+        "#pred_labels_, true_labels_ = evaluate_bert(data_eval, labels, model, batch_size)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "9qJDTU-6vzkk",
+        "outputId": "1b279f0e-7715-4d23-f524-08e8ba327f6c"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>tome</th>\n",
+              "      <th>rank</th>\n",
+              "      <th>domain</th>\n",
+              "      <th>remark</th>\n",
+              "      <th>content</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>abrabeses-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>623</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ABRABESES. Village dâ€™Espagne de la prov. de Za...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>accius-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1076</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>achenbach-2</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1357</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACHENBACH(Henri), administrateur prussien, nÃ© ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>acireale-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1513</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>actÃ©e-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1731</td>\n",
+              "      <td>botany</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACTÃ‰E(ActÅ“a L.). Genre de plantes de la famill...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "            id  tome  ...  remark                                            content\n",
+              "0  abrabeses-0     1  ...     NaN  ABRABESES. Village dâ€™Espagne de la prov. de Za...\n",
+              "1     accius-0     1  ...     NaN  ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...\n",
+              "2  achenbach-2     1  ...     NaN  ACHENBACH(Henri), administrateur prussien, nÃ© ...\n",
+              "3   acireale-0     1  ...     NaN  ACIREALE. Yille de Sicile, de la province et d...\n",
+              "4      actÃ©e-0     1  ...     NaN  ACTÃ‰E(ActÅ“a L.). Genre de plantes de la famill...\n",
+              "\n",
+              "[5 rows x 6 columns]"
+            ]
+          },
+          "execution_count": 10,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_LGE.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "71-fP61-OOwQ",
+        "outputId": "ef08b49e-0a9f-4653-e303-3163250af35b"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "(310, 6)"
+            ]
+          },
+          "execution_count": 15,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_LGE.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lFFed2EAI8oq"
+      },
+      "outputs": [],
+      "source": [
+        "def generate_prediction_dataloader(chosen_model, sentences_to_predict, batch_size = 8, max_len = 512):\n",
+        "\n",
+        "    if chosen_model == 'bert-base-multilingual-cased' :\n",
+        "        print('Loading Bert Tokenizer...')\n",
+        "        tokenizer = BertTokenizer.from_pretrained(chosen_model)\n",
+        "    elif chosen_model == 'camembert-base':\n",
+        "        print('Loading Camembert Tokenizer...')\n",
+        "        tokenizer = CamembertTokenizer.from_pretrained(chosen_model)\n",
+        "\n",
+        "    # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+        "    input_ids_test = []\n",
+        "    # For every sentence...\n",
+        "    for sent in sentences_to_predict:\n",
+        "        # `encode` will:\n",
+        "        #   (1) Tokenize the sentence.\n",
+        "        #   (2) Prepend the `[CLS]` token to the start.\n",
+        "        #   (3) Append the `[SEP]` token to the end.\n",
+        "        #   (4) Map tokens to their IDs.\n",
+        "        encoded_sent = tokenizer.encode(\n",
+        "                            sent,                      # Sentence to encode.\n",
+        "                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+        "                    )\n",
+        "\n",
+        "        input_ids_test.append(encoded_sent)\n",
+        "\n",
+        "    # Pad our input tokens\n",
+        "    padded_test = []\n",
+        "    for i in input_ids_test:\n",
+        "\n",
+        "        if len(i) > max_len:\n",
+        "            padded_test.extend([i[:max_len]])\n",
+        "        else:\n",
+        "\n",
+        "            padded_test.extend([i + [0] * (max_len - len(i))])\n",
+        "    input_ids_test = np.array(padded_test)\n",
+        "\n",
+        "    # Create attention masks\n",
+        "    attention_masks = []\n",
+        "\n",
+        "    # Create a mask of 1s for each token followed by 0s for padding\n",
+        "    for seq in input_ids_test:\n",
+        "        seq_mask = [float(i>0) for i in seq]\n",
+        "        attention_masks.append(seq_mask)\n",
+        "\n",
+        "    # Convert to tensors.\n",
+        "    prediction_inputs = torch.tensor(input_ids_test)\n",
+        "    prediction_masks = torch.tensor(attention_masks)\n",
+        "    #set batch size\n",
+        "\n",
+        "\n",
+        "    # Create the DataLoader.\n",
+        "    prediction_data = TensorDataset(prediction_inputs, prediction_masks)\n",
+        "    prediction_sampler = SequentialSampler(prediction_data)\n",
+        "    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n",
+        "\n",
+        "    return prediction_dataloader\n",
+        "\n",
+        "\n",
+        "\n",
+        "def predict_class_bertFineTuning(model, sentences_to_predict_dataloader):\n",
+        "\n",
+        "\n",
+        "    # If there's a GPU available...\n",
+        "    if torch.cuda.is_available():\n",
+        "\n",
+        "        # Tell PyTorch to use the GPU.\n",
+        "        device = torch.device(\"cuda\")\n",
+        "\n",
+        "        print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
+        "\n",
+        "        print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
+        "\n",
+        "        # If not...\n",
+        "    else:\n",
+        "        print('No GPU available, using the CPU instead.')\n",
+        "        device = torch.device(\"cpu\")\n",
+        "\n",
+        "    # Put model in evaluation mode\n",
+        "    model.eval()\n",
+        "\n",
+        "    # Tracking variables\n",
+        "    predictions_test , true_labels = [], []\n",
+        "    pred_labels_ = []\n",
+        "    # Predict\n",
+        "    for batch in sentences_to_predict_dataloader:\n",
+        "    # Add batch to GPU\n",
+        "        batch = tuple(t.to(device) for t in batch)\n",
+        "\n",
+        "        # Unpack the inputs from the dataloader\n",
+        "        b_input_ids, b_input_mask = batch\n",
+        "\n",
+        "        # Telling the model not to compute or store gradients, saving memory and\n",
+        "        # speeding up prediction\n",
+        "        with torch.no_grad():\n",
+        "            # Forward pass, calculate logit predictions\n",
+        "            outputs = model(b_input_ids, token_type_ids=None,\n",
+        "                            attention_mask=b_input_mask)\n",
+        "\n",
+        "        logits = outputs[0]\n",
+        "        #print(logits)\n",
+        "\n",
+        "        # Move logits and labels to CPU\n",
+        "        logits = logits.detach().cpu().numpy()\n",
+        "        #print(logits)\n",
+        "\n",
+        "        # Store predictions and true labels\n",
+        "        predictions_test.append(logits)\n",
+        "\n",
+        "        #print('    DONE.')\n",
+        "\n",
+        "        pred_labels = []\n",
+        "        \n",
+        "        for i in range(len(predictions_test)):\n",
+        "\n",
+        "            # The predictions for this batch are a 2-column ndarray (one column for \"0\"\n",
+        "            # and one column for \"1\"). Pick the label with the highest value and turn this\n",
+        "            # in to a list of 0s and 1s.\n",
+        "            pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()\n",
+        "            pred_labels.append(pred_labels_i)\n",
+        "\n",
+        "    pred_labels_ += [item for sublist in pred_labels for item in sublist]\n",
+        "    return pred_labels_\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "O9eer_kgI8rC",
+        "outputId": "94ea7418-14a8-4918-e210-caf0018f5989"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Loading Bert Tokenizer...\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Token indices sequence length is longer than the specified maximum sequence length for this model (1204 > 512). Running this sequence through the model will result in indexing errors\n"
+          ]
+        }
+      ],
+      "source": [
+        "data_loader = generate_prediction_dataloader('bert-base-multilingual-cased', data_LGE)\n",
+        "#data_loader = generate_prediction_dataloader('camembert-base', data_LGE)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "sFpAwbrBwF2h",
+        "outputId": "8d210732-619d-41f0-b6e2-ad9d06a85069"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "There are 1 GPU(s) available.\n",
+            "We will use the GPU: Tesla P100-PCIE-16GB\n"
+          ]
+        }
+      ],
+      "source": [
+        "p = predict_class_bertFineTuning( model, data_loader )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "51HF6-8UPSTc",
+        "outputId": "26bff792-eb8d-4e1a-efa4-a7a6c9d32bf9"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "310"
+            ]
+          },
+          "execution_count": 30,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "len(p)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rFFGhaCvQHfh"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qgJ-O4rcQHiI",
+        "outputId": "bfe93dd6-4d89-4d5c-be0d-45e1c98c6b14"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "LabelEncoder()"
+            ]
+          },
+          "execution_count": 41,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# Il faudrait enregistrer l'encoder, \n",
+        "# sinon on est obligÃ© de le refaire Ã  partir du jeu d'entrainement pour rÃ©cupÃ©rer le noms des classes.\n",
+        "encoder"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QuST9wJoQHnS"
+      },
+      "outputs": [],
+      "source": [
+        "p2 = list(encoder.inverse_transform(p))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6ek7suq9QHqE",
+        "outputId": "6636983a-7eba-48c8-d884-f8fb437294dc"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "['GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'Chimie',\n",
+              " 'Histoire naturelle',\n",
+              " 'GÃ©ographie',\n",
+              " 'MathÃ©matiques',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'Musique',\n",
+              " 'Commerce',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'GÃ©ographie',\n",
+              " 'Physique - [Sciences physico-mathÃ©matiques]',\n",
+              " 'Histoire naturelle',\n",
+              " 'Chimie',\n",
+              " 'Histoire',\n",
+              " 'Physique - [Sciences physico-mathÃ©matiques]',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire',\n",
+              " 'Histoire naturelle',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Architecture',\n",
+              " 'Histoire naturelle',\n",
+              " 'Histoire naturelle',\n",
+              " 'GÃ©ographie',\n",
+              " 'Arts et mÃ©tiers',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'Marine',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'Architecture',\n",
+              " 'Histoire naturelle',\n",
+              " 'Beaux-arts',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'Chimie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'Religion',\n",
+              " 'Histoire naturelle',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Agriculture - Economie rustique',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Jeu',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'Histoire naturelle',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'Histoire',\n",
+              " 'Histoire naturelle',\n",
+              " 'Commerce',\n",
+              " 'Histoire',\n",
+              " 'Militaire (Art) - Guerre - Arme',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'Religion',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Agriculture - Economie rustique',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'MÃ©tiers',\n",
+              " 'Belles-lettres - PoÃ©sie',\n",
+              " 'Beaux-arts',\n",
+              " 'Religion',\n",
+              " 'Architecture',\n",
+              " 'Architecture',\n",
+              " 'Architecture',\n",
+              " 'GÃ©ographie',\n",
+              " 'Chimie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'Histoire naturelle',\n",
+              " 'Militaire (Art) - Guerre - Arme',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'MinÃ©ralogie',\n",
+              " 'Belles-lettres - PoÃ©sie',\n",
+              " 'Histoire naturelle',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Grammaire',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'MathÃ©matiques',\n",
+              " 'GÃ©ographie',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'Blason',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Histoire naturelle',\n",
+              " 'Militaire (Art) - Guerre - Arme',\n",
+              " 'GÃ©ographie',\n",
+              " 'AntiquitÃ©',\n",
+              " 'Agriculture - Economie rustique',\n",
+              " 'Chimie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'Belles-lettres - PoÃ©sie',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'MÃ©tiers',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Arts et mÃ©tiers',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Musique',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'Religion',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire',\n",
+              " 'Droit - Jurisprudence',\n",
+              " 'Histoire',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'Histoire',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Chimie',\n",
+              " 'AntiquitÃ©',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Beaux-arts',\n",
+              " 'Histoire',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire naturelle',\n",
+              " 'AntiquitÃ©',\n",
+              " 'Grammaire',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'Beaux-arts',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire',\n",
+              " 'Architecture',\n",
+              " 'Commerce',\n",
+              " 'AntiquitÃ©',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'Histoire naturelle',\n",
+              " 'Histoire',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Anatomie',\n",
+              " 'Commerce',\n",
+              " 'Beaux-arts',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'Histoire naturelle',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'Commerce',\n",
+              " 'Architecture',\n",
+              " 'Commerce',\n",
+              " 'AntiquitÃ©',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'MÃ©decine - Chirurgie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'AntiquitÃ©',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Histoire',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'AntiquitÃ©',\n",
+              " 'GÃ©ographie',\n",
+              " 'Religion',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Philosophie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Chimie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie',\n",
+              " 'Beaux-arts',\n",
+              " 'Commerce',\n",
+              " 'Commerce',\n",
+              " 'GÃ©ographie',\n",
+              " 'GÃ©ographie']"
+            ]
+          },
+          "execution_count": 44,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "p2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "XvdDj5PBQHtk"
+      },
+      "outputs": [],
+      "source": [
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "t39Xs0j7QHXJ"
+      },
+      "outputs": [],
+      "source": [
+        "df_LGE['class_bert'] = p2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "-VZ7geRmQHaD",
+        "outputId": "350a4122-5b1f-43e2-e372-2f628f665c4a"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>tome</th>\n",
+              "      <th>rank</th>\n",
+              "      <th>domain</th>\n",
+              "      <th>remark</th>\n",
+              "      <th>content</th>\n",
+              "      <th>class_bert</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>abrabeses-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>623</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ABRABESES. Village dâ€™Espagne de la prov. de Za...</td>\n",
+              "      <td>GÃ©ographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>accius-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1076</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+              "      <td>GÃ©ographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>achenbach-2</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1357</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACHENBACH(Henri), administrateur prussien, nÃ© ...</td>\n",
+              "      <td>GÃ©ographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>acireale-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1513</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+              "      <td>GÃ©ographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>actÃ©e-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1731</td>\n",
+              "      <td>botany</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACTÃ‰E(ActÅ“a L.). Genre de plantes de la famill...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "            id  ...          class_bert\n",
+              "0  abrabeses-0  ...          GÃ©ographie\n",
+              "1     accius-0  ...          GÃ©ographie\n",
+              "2  achenbach-2  ...          GÃ©ographie\n",
+              "3   acireale-0  ...          GÃ©ographie\n",
+              "4      actÃ©e-0  ...  Histoire naturelle\n",
+              "\n",
+              "[5 rows x 7 columns]"
+            ]
+          },
+          "execution_count": 46,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_LGE.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3xkzdkrKQHwA"
+      },
+      "outputs": [],
+      "source": [
+        "df_LGE.to_csv(\"drive/MyDrive/Classification-EDdA/classification_LGE.tsv\", sep=\"\\t\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "machine_shape": "hm",
+      "name": "EDdA-Classification_BertFineTuning.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "274e505b5f354efc8de3ef26cc43e617": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HBoxView",
+            "_dom_classes": [],
+            "_model_name": "HBoxModel",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "box_style": "",
+            "layout": "IPY_MODEL_f1f9d5b32f60473b86ae6b340d6c0850",
+            "_model_module": "@jupyter-widgets/controls",
+            "children": [
+              "IPY_MODEL_ad5e0e1439a94578a31b80c90dbf3247",
+              "IPY_MODEL_0779c8ea0ed24e64a800ae5dff6bc8ce",
+              "IPY_MODEL_7870340ac12b469c8ac19de3a47b6e67"
+            ]
+          }
+        },
+        "f1f9d5b32f60473b86ae6b340d6c0850": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "ad5e0e1439a94578a31b80c90dbf3247": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HTMLView",
+            "style": "IPY_MODEL_5f321455342348f49879a9ca8b392077",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "HTMLModel",
+            "placeholder": "â€‹",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": "Downloading: 100%",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_9420a47a2bf44ead8cff283f20566cda"
+          }
+        },
+        "0779c8ea0ed24e64a800ae5dff6bc8ce": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "ProgressView",
+            "style": "IPY_MODEL_99b785ea53744868b8b11e5e94936fcc",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "FloatProgressModel",
+            "bar_style": "success",
+            "max": 810912,
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": 810912,
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "orientation": "horizontal",
+            "min": 0,
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_8d24b669a39b4876ac0a014dff678db1"
+          }
+        },
+        "7870340ac12b469c8ac19de3a47b6e67": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HTMLView",
+            "style": "IPY_MODEL_2cf386a8d14d43389374f79bfa922675",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "HTMLModel",
+            "placeholder": "â€‹",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": " 811k/811k [00:00&lt;00:00, 2.75MB/s]",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_2c44d9c11e704b70aa32904a23d1790c"
+          }
+        },
+        "5f321455342348f49879a9ca8b392077": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "DescriptionStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "9420a47a2bf44ead8cff283f20566cda": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "99b785ea53744868b8b11e5e94936fcc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "ProgressStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "bar_color": null,
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "8d24b669a39b4876ac0a014dff678db1": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "2cf386a8d14d43389374f79bfa922675": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "DescriptionStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "2c44d9c11e704b70aa32904a23d1790c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "0279837673b446b09aac18346213eb7e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HBoxView",
+            "_dom_classes": [],
+            "_model_name": "HBoxModel",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "box_style": "",
+            "layout": "IPY_MODEL_09b5f0bbd5c14bc289b0f92a22bb29ab",
+            "_model_module": "@jupyter-widgets/controls",
+            "children": [
+              "IPY_MODEL_69004a5069094f8c9d59d5136f627bef",
+              "IPY_MODEL_e96a95317b0945c58c8ff0e944c7593e",
+              "IPY_MODEL_68b69c9d3a274900bc2892848f725cb0"
+            ]
+          }
+        },
+        "09b5f0bbd5c14bc289b0f92a22bb29ab": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "69004a5069094f8c9d59d5136f627bef": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HTMLView",
+            "style": "IPY_MODEL_76007b17ffd2478fa4a86f959d4f1766",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "HTMLModel",
+            "placeholder": "â€‹",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": "Downloading: 100%",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_cb447c62ce1d4c1ea760175ae619fbb9"
+          }
+        },
+        "e96a95317b0945c58c8ff0e944c7593e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "ProgressView",
+            "style": "IPY_MODEL_d4ad1a78750d49feaea584a82940bb7d",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "FloatProgressModel",
+            "bar_style": "success",
+            "max": 1395301,
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": 1395301,
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "orientation": "horizontal",
+            "min": 0,
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_a9c47cb226ee41e18812f29f690992eb"
+          }
+        },
+        "68b69c9d3a274900bc2892848f725cb0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HTMLView",
+            "style": "IPY_MODEL_c4c1675163bd4997bb44d7ea3967a356",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "HTMLModel",
+            "placeholder": "â€‹",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": " 1.40M/1.40M [00:00&lt;00:00, 6.57MB/s]",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_5032547e748f45a3b0cdd12fafe1dd05"
+          }
+        },
+        "76007b17ffd2478fa4a86f959d4f1766": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "DescriptionStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "cb447c62ce1d4c1ea760175ae619fbb9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "d4ad1a78750d49feaea584a82940bb7d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "ProgressStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "bar_color": null,
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "a9c47cb226ee41e18812f29f690992eb": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "c4c1675163bd4997bb44d7ea3967a356": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "DescriptionStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "5032547e748f45a3b0cdd12fafe1dd05": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "8f467553598f4dcc9abf55da79c11018": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HBoxView",
+            "_dom_classes": [],
+            "_model_name": "HBoxModel",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "box_style": "",
+            "layout": "IPY_MODEL_9d7a8b3ecfe74f66b4238fe085c05906",
+            "_model_module": "@jupyter-widgets/controls",
+            "children": [
+              "IPY_MODEL_58b4f9e0366f4d4eba7f902af84b8965",
+              "IPY_MODEL_9383e09698ae4bd1820a4bca22e78315",
+              "IPY_MODEL_a189838c4de648198b0f4fc99c29ced8"
+            ]
+          }
+        },
+        "9d7a8b3ecfe74f66b4238fe085c05906": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "58b4f9e0366f4d4eba7f902af84b8965": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HTMLView",
+            "style": "IPY_MODEL_c4d981755d1d42b6940396b77bc251bc",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "HTMLModel",
+            "placeholder": "â€‹",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": "Downloading: 100%",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_12afa6b6474b401f9ff3f189cc0d3d11"
+          }
+        },
+        "9383e09698ae4bd1820a4bca22e78315": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "ProgressView",
+            "style": "IPY_MODEL_5978954f56fb40928b970f32d1634aaf",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "FloatProgressModel",
+            "bar_style": "success",
+            "max": 508,
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": 508,
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "orientation": "horizontal",
+            "min": 0,
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_fe0e3b1df104484c98fbdcd31a04e427"
+          }
+        },
+        "a189838c4de648198b0f4fc99c29ced8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HTMLView",
+            "style": "IPY_MODEL_2d1d632da0f740c38512c9ad779d3173",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "HTMLModel",
+            "placeholder": "â€‹",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": " 508/508 [00:00&lt;00:00, 16.9kB/s]",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_df95c20399dd4918bc7559a90886d4aa"
+          }
+        },
+        "c4d981755d1d42b6940396b77bc251bc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "DescriptionStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "12afa6b6474b401f9ff3f189cc0d3d11": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "5978954f56fb40928b970f32d1634aaf": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "ProgressStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "bar_color": null,
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "fe0e3b1df104484c98fbdcd31a04e427": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "2d1d632da0f740c38512c9ad779d3173": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "DescriptionStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "df95c20399dd4918bc7559a90886d4aa": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "d09d664839d04303b8fef9ef895f6e4f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HBoxView",
+            "_dom_classes": [],
+            "_model_name": "HBoxModel",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "box_style": "",
+            "layout": "IPY_MODEL_500826e3813b414a820aa260bfde9e23",
+            "_model_module": "@jupyter-widgets/controls",
+            "children": [
+              "IPY_MODEL_70dd7428d78c44409308d62ba04917de",
+              "IPY_MODEL_152a31110bf9477989833eac91794688",
+              "IPY_MODEL_fcde5f4cf49846a0ad1b284aad98a38a"
+            ]
+          }
+        },
+        "500826e3813b414a820aa260bfde9e23": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "70dd7428d78c44409308d62ba04917de": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HTMLView",
+            "style": "IPY_MODEL_1bf6a76237454349aafc1e9284376879",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "HTMLModel",
+            "placeholder": "â€‹",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": "Downloading: 100%",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_4a23110523184d019a77368116f738f3"
+          }
+        },
+        "152a31110bf9477989833eac91794688": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "ProgressView",
+            "style": "IPY_MODEL_e86a1d4d268c4314897b58f7bba5ec25",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "FloatProgressModel",
+            "bar_style": "success",
+            "max": 445032417,
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": 445032417,
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "orientation": "horizontal",
+            "min": 0,
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_826bd7d0a1f146ea9f7d53584468190c"
+          }
+        },
+        "fcde5f4cf49846a0ad1b284aad98a38a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "HTMLView",
+            "style": "IPY_MODEL_3592b1ed1d6d452b93c57b304943a1cb",
+            "_dom_classes": [],
+            "description": "",
+            "_model_name": "HTMLModel",
+            "placeholder": "â€‹",
+            "_view_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "value": " 445M/445M [00:14&lt;00:00, 39.2MB/s]",
+            "_view_count": null,
+            "_view_module_version": "1.5.0",
+            "description_tooltip": null,
+            "_model_module": "@jupyter-widgets/controls",
+            "layout": "IPY_MODEL_a159d62667734657a49ba3a96494f137"
+          }
+        },
+        "1bf6a76237454349aafc1e9284376879": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "DescriptionStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "4a23110523184d019a77368116f738f3": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "e86a1d4d268c4314897b58f7bba5ec25": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "ProgressStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "bar_color": null,
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "826bd7d0a1f146ea9f7d53584468190c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        },
+        "3592b1ed1d6d452b93c57b304943a1cb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_view_name": "StyleView",
+            "_model_name": "DescriptionStyleModel",
+            "description_width": "",
+            "_view_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.5.0",
+            "_view_count": null,
+            "_view_module_version": "1.2.0",
+            "_model_module": "@jupyter-widgets/controls"
+          }
+        },
+        "a159d62667734657a49ba3a96494f137": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_view_name": "LayoutView",
+            "grid_template_rows": null,
+            "right": null,
+            "justify_content": null,
+            "_view_module": "@jupyter-widgets/base",
+            "overflow": null,
+            "_model_module_version": "1.2.0",
+            "_view_count": null,
+            "flex_flow": null,
+            "width": null,
+            "min_width": null,
+            "border": null,
+            "align_items": null,
+            "bottom": null,
+            "_model_module": "@jupyter-widgets/base",
+            "top": null,
+            "grid_column": null,
+            "overflow_y": null,
+            "overflow_x": null,
+            "grid_auto_flow": null,
+            "grid_area": null,
+            "grid_template_columns": null,
+            "flex": null,
+            "_model_name": "LayoutModel",
+            "justify_items": null,
+            "grid_row": null,
+            "max_height": null,
+            "align_content": null,
+            "visibility": null,
+            "align_self": null,
+            "height": null,
+            "min_height": null,
+            "padding": null,
+            "grid_auto_rows": null,
+            "grid_gap": null,
+            "max_width": null,
+            "order": null,
+            "_view_module_version": "1.2.0",
+            "grid_template_areas": null,
+            "object_position": null,
+            "object_fit": null,
+            "grid_auto_columns": null,
+            "margin": null,
+            "display": null,
+            "left": null
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file