From b1a80f6ab6f48ca71d3dcf554ef13d1ca0ccb614 Mon Sep 17 00:00:00 2001
From: Ludovic Moncla <moncla.ludovic@gmail.com>
Date: Sun, 27 Nov 2022 20:18:14 +0100
Subject: [PATCH] Create Predict_XAI.ipynb

---
 notebooks/Predict_XAI.ipynb | 3031 +++++++++++++++++++++++++++++++++++
 1 file changed, 3031 insertions(+)
 create mode 100644 notebooks/Predict_XAI.ipynb

diff --git a/notebooks/Predict_XAI.ipynb b/notebooks/Predict_XAI.ipynb
new file mode 100644
index 0000000..8cada19
--- /dev/null
+++ b/notebooks/Predict_XAI.ipynb
@@ -0,0 +1,3031 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# BERT Predict classification\n",
+        "\n",
+        "## 1. Setup the environment\n",
+        "\n",
+        "### 1.1 Setup colab environment\n",
+        "\n",
+        "#### 1.1.1 Install packages"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "pwmZ5bBvgGNh",
+        "outputId": "fce0a8bf-1779-4079-c7ac-200ebb2678c5"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install transformers==4.10.3\n",
+        "!pip install transformers_interpret\n",
+        "!pip install sentencepiece"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "#### 1.1.2 Use more RAM"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WF0qFN_g3ekz",
+        "outputId": "f3a5f049-24ee-418f-fe5e-84c633234ad8"
+      },
+      "outputs": [],
+      "source": [
+        "from psutil import virtual_memory\n",
+        "ram_gb = virtual_memory().total / 1e9\n",
+        "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
+        "\n",
+        "if ram_gb < 20:\n",
+        "  print('Not using a high-RAM runtime')\n",
+        "else:\n",
+        "  print('You are using a high-RAM runtime!')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "#### 1.1.3 Mount GoogleDrive"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "vL0S-s9Uofvn",
+        "outputId": "4b7efa4d-7f09-4c8e-bc98-99e6099ede32"
+      },
+      "outputs": [],
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wSqbrupGMc1M"
+      },
+      "source": [
+        "### 1.2 Import librairies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "SkErnwgMMbRj"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import pandas as pd \n",
+        "import numpy as np\n",
+        "import pickle \n",
+        "import torch\n",
+        "from tqdm import tqdm\n",
+        "\n",
+        "from transformers import BertTokenizer, BertForSequenceClassification, CamembertTokenizer, CamembertForSequenceClassification\n",
+        "from transformers_interpret import SequenceClassificationExplainer\n",
+        "from torch.utils.data import TensorDataset, DataLoader, SequentialSampler"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8hzEGHl7gmzk"
+      },
+      "source": [
+        "### 1.3 Setup GPU"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dPOU-Efhf4ui",
+        "outputId": "121dd21e-f98c-483d-d6d1-2838f732a4e2"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "We will use the GPU\n"
+          ]
+        }
+      ],
+      "source": [
+        "  # If there's a GPU available...\n",
+        "if torch.cuda.is_available():    \n",
+        "    # Tell PyTorch to use the GPU.    \n",
+        "    device = torch.device(\"cuda\")\n",
+        "    gpu_name = \"cuda\"\n",
+        "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
+        "    print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
+        "# for MacOS\n",
+        "elif torch.backends.mps.is_available() and torch.backends.mps.is_built():\n",
+        "    device = torch.device(\"mps\")\n",
+        "    gpu_name = \"mps\"\n",
+        "    print('We will use the GPU')\n",
+        "else:\n",
+        "    device = torch.device(\"cpu\")\n",
+        "    gpu_name = \"cpu\"\n",
+        "    print('No GPU available, using the CPU instead.')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "device = torch.device(\"cpu\")\n",
+        "gpu_name = \"cpu\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## 2. Utils"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def padding(content, max_len):\n",
+        "    if len(content) > max_len:\n",
+        "        content[:max_len]\n",
+        "    else:\n",
+        "        content + [0] * (max_len - len(content))\n",
+        "    return \n",
+        "\n",
+        "def generate_dataloader(tokenizer, sentences, batch_size = 8, max_len = 512):\n",
+        "\n",
+        "    # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
+        "    input_ids_test = []\n",
+        "    # For every sentence...\n",
+        "    for sent in sentences:\n",
+        "        # `encode` will:\n",
+        "        #   (1) Tokenize the sentence.\n",
+        "        #   (2) Prepend the `[CLS]` token to the start.\n",
+        "        #   (3) Append the `[SEP]` token to the end.\n",
+        "        #   (4) Map tokens to their IDs.\n",
+        "        encoded_sent = tokenizer.encode(\n",
+        "                            sent,                      # Sentence to encode.\n",
+        "                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
+        "                            # This function also supports truncation and conversion\n",
+        "                            # to pytorch tensors, but I need to do padding, so I\n",
+        "                            # can't use these features.\n",
+        "                            #max_length = max_len,          # Truncate all sentences.\n",
+        "                            #return_tensors = 'pt',     # Return pytorch tensors.\n",
+        "                    )\n",
+        "        input_ids_test.append(encoded_sent)\n",
+        "\n",
+        "    # Pad our input tokens\n",
+        "    padded_test = []\n",
+        "    for i in input_ids_test:\n",
+        "        if len(i) > max_len:\n",
+        "            padded_test.extend([i[:max_len]])\n",
+        "        else:\n",
+        "            padded_test.extend([i + [0] * (max_len - len(i))])\n",
+        "    input_ids_test = np.array(padded_test)\n",
+        "\n",
+        "    # Create attention masks\n",
+        "    attention_masks = []\n",
+        "\n",
+        "    # Create a mask of 1s for each token followed by 0s for padding\n",
+        "    for seq in input_ids_test:\n",
+        "        seq_mask = [float(i>0) for i in seq]\n",
+        "        attention_masks.append(seq_mask)\n",
+        "\n",
+        "    # Convert to tensors.\n",
+        "    inputs = torch.tensor(input_ids_test)\n",
+        "    masks = torch.tensor(attention_masks)\n",
+        "    #set batch size\n",
+        "\n",
+        "    # Create the DataLoader.\n",
+        "    data = TensorDataset(inputs, masks)\n",
+        "    prediction_sampler = SequentialSampler(data)\n",
+        "\n",
+        "    return DataLoader(data, sampler=prediction_sampler, batch_size=batch_size)\n",
+        "\n",
+        "\n",
+        "def predict(model, dataloader, device):\n",
+        "\n",
+        "    # Put model in evaluation mode\n",
+        "    model.eval()\n",
+        "\n",
+        "    # Tracking variables\n",
+        "    predictions_test , true_labels = [], []\n",
+        "    pred_labels_ = []\n",
+        "    # Predict\n",
+        "    for batch in dataloader:\n",
+        "    # Add batch to GPU\n",
+        "        batch = tuple(t.to(device) for t in batch)\n",
+        "\n",
+        "        # Unpack the inputs from the dataloader\n",
+        "        b_input_ids, b_input_mask = batch\n",
+        "\n",
+        "        # Telling the model not to compute or store gradients, saving memory and\n",
+        "        # speeding up prediction\n",
+        "        with torch.no_grad():\n",
+        "            # Forward pass, calculate logit predictions\n",
+        "            outputs = model(b_input_ids, token_type_ids=None,\n",
+        "                            attention_mask=b_input_mask)\n",
+        "        logits = outputs[0]\n",
+        "        #print(logits)\n",
+        "\n",
+        "        # Move logits and labels to CPU ???\n",
+        "        logits = logits.detach().cpu().numpy()\n",
+        "        #print(logits)\n",
+        "\n",
+        "        # Store predictions and true labels\n",
+        "        predictions_test.append(logits)\n",
+        "\n",
+        "        pred_labels = []\n",
+        "        \n",
+        "        for i in range(len(predictions_test)):\n",
+        "            # The predictions for this batch are a 2-column ndarray (one column for \"0\"\n",
+        "            # and one column for \"1\"). Pick the label with the highest value and turn this\n",
+        "            # in to a list of 0s and 1s.\n",
+        "            pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()\n",
+        "            pred_labels.append(pred_labels_i)\n",
+        "\n",
+        "    pred_labels_ += [item for sublist in pred_labels for item in sublist]\n",
+        "    return pred_labels_\n",
+        "\n",
+        "\n",
+        "def text_folder_to_dataframe(path):\n",
+        "\n",
+        "  data = []\n",
+        "  # id,tome,filename,nb_words,content,domain\n",
+        "\n",
+        "  for tome in sorted(os.listdir(path)):\n",
+        "    try:\n",
+        "        for article in tqdm(sorted(os.listdir(path + \"/\" + tome))):\n",
+        "            filename = article[:-4]\n",
+        "            id = tome + filename\n",
+        "\n",
+        "            if article[-4:] == \".txt\":\n",
+        "                with open(path + \"/\" + tome + \"/\" + article) as f:\n",
+        "                    content = f.read()\n",
+        "\n",
+        "                    data.append([id, tome, filename, content, len(content.split(' '))])\n",
+        "    except NotADirectoryError:\n",
+        "        pass\n",
+        "  return pd.DataFrame(data, columns=['id', 'tome', 'filename', 'content', 'nb_words'])\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "c5QKcXulhNJ-"
+      },
+      "source": [
+        "## 3. Load Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!wget https://api.nakala.fr/data/10.34847/nkl.74eb1xfd/e522413b58b04ab7c283f8fa68642e9cb69ab5c5"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!unzip e522413b58b04ab7c283f8fa68642e9cb69ab5c5"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "input_path = \"/Users/lmoncla/Documents/Data/Corpus/LGE/Text\"\n",
+        "#input_path = \"./Text\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 5201/5201 [00:00<00:00, 6137.32it/s]\n",
+            "100%|██████████| 5704/5704 [00:00<00:00, 6325.35it/s]\n",
+            "100%|██████████| 5214/5214 [00:00<00:00, 5986.96it/s]\n",
+            "100%|██████████| 5528/5528 [00:00<00:00, 6213.04it/s]\n",
+            "100%|██████████| 6963/6963 [00:01<00:00, 5686.82it/s]\n",
+            "100%|██████████| 5983/5983 [00:00<00:00, 6120.28it/s]\n",
+            "100%|██████████| 13713/13713 [00:01<00:00, 7057.45it/s]\n",
+            "100%|██████████| 9202/9202 [00:01<00:00, 7161.23it/s]\n",
+            "100%|██████████| 10704/10704 [00:01<00:00, 7208.53it/s]\n",
+            "100%|██████████| 6378/6378 [00:00<00:00, 6988.23it/s]\n",
+            "100%|██████████| 8476/8476 [00:01<00:00, 7098.48it/s]\n",
+            "100%|██████████| 6576/6576 [00:00<00:00, 6996.19it/s]\n",
+            "100%|██████████| 7797/7797 [00:01<00:00, 6981.47it/s]\n",
+            "100%|██████████| 9027/9027 [00:01<00:00, 6563.44it/s]\n",
+            "100%|██████████| 8383/8383 [00:01<00:00, 7017.88it/s]\n",
+            "100%|██████████| 7319/7319 [00:01<00:00, 7064.77it/s]\n",
+            "100%|██████████| 10269/10269 [00:01<00:00, 6864.36it/s]\n",
+            "100%|██████████| 7512/7512 [00:01<00:00, 6854.61it/s]\n",
+            "100%|██████████| 6701/6701 [00:01<00:00, 6501.17it/s]\n",
+            "100%|██████████| 7343/7343 [00:01<00:00, 6933.17it/s]\n",
+            "100%|██████████| 7273/7273 [00:01<00:00, 6877.68it/s]\n",
+            "100%|██████████| 10877/10877 [00:01<00:00, 6410.62it/s]\n",
+            "100%|██████████| 4731/4731 [00:00<00:00, 6429.83it/s]\n",
+            "100%|██████████| 8698/8698 [00:01<00:00, 6076.43it/s]\n",
+            "100%|██████████| 9675/9675 [00:01<00:00, 6399.53it/s]\n",
+            "100%|██████████| 5710/5710 [00:00<00:00, 6343.15it/s]\n",
+            "100%|██████████| 5664/5664 [00:00<00:00, 6450.75it/s]\n",
+            "100%|██████████| 5828/5828 [00:00<00:00, 6425.49it/s]\n",
+            "100%|██████████| 5721/5721 [00:00<00:00, 6536.62it/s]\n",
+            "100%|██████████| 6110/6110 [00:00<00:00, 6391.42it/s]\n",
+            "100%|██████████| 5195/5195 [00:00<00:00, 6016.13it/s]\n"
+          ]
+        }
+      ],
+      "source": [
+        "df_LGE = text_folder_to_dataframe(input_path)\n",
+        "#df_LGE = pd.read_csv(path + \"data/LGE_withContent.tsv\", sep=\"\\t\")\n",
+        "data_LGE = df_LGE[\"content\"].values"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>tome</th>\n",
+              "      <th>rank</th>\n",
+              "      <th>domain</th>\n",
+              "      <th>remark</th>\n",
+              "      <th>content</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>abrabeses-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>623</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>accius-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1076</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>achenbach-2</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1357</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>acireale-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1513</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>actée-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1731</td>\n",
+              "      <td>botany</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "            id  tome  rank     domain remark  \\\n",
+              "0  abrabeses-0     1   623  geography    NaN   \n",
+              "1     accius-0     1  1076  biography    NaN   \n",
+              "2  achenbach-2     1  1357  biography    NaN   \n",
+              "3   acireale-0     1  1513  geography    NaN   \n",
+              "4      actée-0     1  1731     botany    NaN   \n",
+              "\n",
+              "                                             content  \n",
+              "0  ABRABESES. Village d’Espagne de la prov. de Za...  \n",
+              "1  ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...  \n",
+              "2  ACHENBACH(Henri), administrateur prussien, né ...  \n",
+              "3  ACIREALE. Yille de Sicile, de la province et d...  \n",
+              "4  ACTÉE(Actœa L.). Genre de plantes de la famill...  "
+            ]
+          },
+          "execution_count": 5,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_LGE.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "(310, 6)"
+            ]
+          },
+          "execution_count": 6,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_LGE.shape"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## 4. Load model and predict\n",
+        "\n",
+        "### 4.1 BERT"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#path = \"drive/MyDrive/Classification-EDdA/\"\n",
+        "path = \"../\"\n",
+        "model_name = \"bert-base-multilingual-cased\"\n",
+        "#model_name = \"camembert-base\"\n",
+        "model_path = path + \"models/model_\" + model_name + \"_s10000.pt\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Loading Bert Tokenizer...\n"
+          ]
+        }
+      ],
+      "source": [
+        "if model_name == 'bert-base-multilingual-cased' :\n",
+        "    print('Loading Bert Tokenizer...')\n",
+        "    tokenizer = BertTokenizer.from_pretrained(model_name)\n",
+        "elif model_name == 'camembert-base':\n",
+        "    print('Loading Camembert Tokenizer...')\n",
+        "    tokenizer = CamembertTokenizer.from_pretrained(model_name)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Token indices sequence length is longer than the specified maximum sequence length for this model (1204 > 512). Running this sequence through the model will result in indexing errors\n"
+          ]
+        }
+      ],
+      "source": [
+        "data_loader = generate_dataloader(tokenizer, data_LGE)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 18,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "model = BertForSequenceClassification.from_pretrained(model_path).to(gpu_name) #.to(\"cuda\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### 4.2 Predict"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_fzgS5USJeAF",
+        "outputId": "be4a5506-76ed-4eef-bb3c-fe2bb77c6e4d"
+      },
+      "outputs": [],
+      "source": [
+        "pred = predict(model, data_loader, device)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 22,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "encoder_filename = \"models/label_encoder.pkl\"\n",
+        "with open(path + encoder_filename, 'rb') as file:\n",
+        "      encoder = pickle.load(file)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "p2 = list(encoder.inverse_transform(pred))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df_LGE['domain'] = p2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>tome</th>\n",
+              "      <th>rank</th>\n",
+              "      <th>domain</th>\n",
+              "      <th>remark</th>\n",
+              "      <th>content</th>\n",
+              "      <th>class_bert</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>abrabeses-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>623</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>accius-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1076</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+              "      <td>Belles-lettres - Poésie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>achenbach-2</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1357</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n",
+              "      <td>Histoire</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>acireale-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1513</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>actée-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>1731</td>\n",
+              "      <td>botany</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>5</th>\n",
+              "      <td>adulteration-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2197</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>cross reference</td>\n",
+              "      <td>ADULTERATION. Altération d’un médicament, d’un...</td>\n",
+              "      <td>Chimie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>6</th>\n",
+              "      <td>aérides-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2334</td>\n",
+              "      <td>botany</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>AÉRIDES{Aérides Lour.). Genres de plantes de l...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>7</th>\n",
+              "      <td>ager-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2710</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>AGERouAGERIUS (Nicolaus), médecin alsacien, né...</td>\n",
+              "      <td>Histoire</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8</th>\n",
+              "      <td>aigu-1</td>\n",
+              "      <td>1</td>\n",
+              "      <td>3160</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>cross reference</td>\n",
+              "      <td>AIGU1 LH E (V. Raimond d’).\\n</td>\n",
+              "      <td>Marine</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>9</th>\n",
+              "      <td>alavika-0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>3664</td>\n",
+              "      <td>theology</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ALAVIKA« qui est d'Alava »(V. ce mot) : Bhikch...</td>\n",
+              "      <td>Religion</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>10</th>\n",
+              "      <td>allassac-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>755</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ALLASSAC. Com. du dép. de la Corrèze, arr. de ...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>11</th>\n",
+              "      <td>allegretto-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>786</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>cross reference</td>\n",
+              "      <td>ALLEGRETTO(V. Allegro).\\n</td>\n",
+              "      <td>Musique</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>12</th>\n",
+              "      <td>alleuze-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>908</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ALLEUZE. Com. du dép. du Cantal, arr. et cant....</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>13</th>\n",
+              "      <td>alliat-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>933</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ALLIAT. Com. du dép. de l’Ariège, arr. de Foix...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>14</th>\n",
+              "      <td>amanty-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>1651</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>AMANTY. Corn, du dép. de la Meuse, arr. de Com...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>15</th>\n",
+              "      <td>âmasserah-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>1701</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>explicit domain</td>\n",
+              "      <td>ÂMASSERAH, AMASR1 ou AMASRAH (Géogr.). Ville d...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>16</th>\n",
+              "      <td>a-118</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2971</td>\n",
+              "      <td>history</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>AN Cl LIA. Boucliers sacrés des Romains, au no...</td>\n",
+              "      <td>Antiquité</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>17</th>\n",
+              "      <td>androclès-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>3261</td>\n",
+              "      <td>mythology</td>\n",
+              "      <td>explicit domain</td>\n",
+              "      <td>ANDROCLÈS(Myth.), un fils d’Eole qui régna sur...</td>\n",
+              "      <td>Antiquité</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>18</th>\n",
+              "      <td>anfouson-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>3394</td>\n",
+              "      <td>zoology</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ANFOUSON. Nom donné à Nice au Néron brun\\n(V. ...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>19</th>\n",
+              "      <td>anicet-bourgeois-0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>3717</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ANICET-BOURGEOIS(Auguste Anicet, connu sous le...</td>\n",
+              "      <td>Belles-lettres - Poésie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>20</th>\n",
+              "      <td>anomalistique-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>238</td>\n",
+              "      <td>astronomy</td>\n",
+              "      <td>explicit domain</td>\n",
+              "      <td>ANOMALISTIQUE(Astron.). On appelle révolution\\...</td>\n",
+              "      <td>Physique - [Sciences physico-mathématiques]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>21</th>\n",
+              "      <td>anostostome-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>298</td>\n",
+              "      <td>zoology</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ANOSTOSTOME(Anostostoma Gray). Genre d’insecte...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>22</th>\n",
+              "      <td>anthoxanthème-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>571</td>\n",
+              "      <td>chemistry</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ANTHOXANTHÈME. L’un des deux principes coloran...</td>\n",
+              "      <td>Pharmacie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>23</th>\n",
+              "      <td>aod-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>1024</td>\n",
+              "      <td>theology</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>AOD, plus exactement Ehoud. personnage des com...</td>\n",
+              "      <td>Histoire</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>24</th>\n",
+              "      <td>aphellan-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>1177</td>\n",
+              "      <td>astronomy</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>APHELLAN(Astron.). Un des noms de l’étoile a2 ...</td>\n",
+              "      <td>Physique - [Sciences physico-mathématiques]</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>25</th>\n",
+              "      <td>appelle-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>1494</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>APPELLE. Com. du dép. du Tarn, arr. de Lavaux,...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>26</th>\n",
+              "      <td>aragona-1</td>\n",
+              "      <td>3</td>\n",
+              "      <td>1841</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ARAGONA, cardinal d’origine sicilienne, né en ...</td>\n",
+              "      <td>Religion</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>27</th>\n",
+              "      <td>araujuzon-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>1940</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ARAUJUZON. Com. du dép. des Basses-Pyrénées, a...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>28</th>\n",
+              "      <td>ardant-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>2421</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ARDANT(Paul-Joseph), général français, né en 1...</td>\n",
+              "      <td>Militaire (Art) - Guerre - Arme</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>29</th>\n",
+              "      <td>ariano-0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>2839</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ARIANOdi Puglia. Ville de la prov. de principa...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>30</th>\n",
+              "      <td>athabaska-0</td>\n",
+              "      <td>4</td>\n",
+              "      <td>1118</td>\n",
+              "      <td>anthropology</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ATHABASKA. Col, rivière, lac, territoire et fa...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>31</th>\n",
+              "      <td>aslonnes-0</td>\n",
+              "      <td>4</td>\n",
+              "      <td>446</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>ASLONNES, corn, du dép. de la Vienne, arr. de ...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>32</th>\n",
+              "      <td>astr0rh1za-0</td>\n",
+              "      <td>4</td>\n",
+              "      <td>992</td>\n",
+              "      <td>zoology</td>\n",
+              "      <td>explicit domain</td>\n",
+              "      <td>ASTR0RH1ZA(Zool.).Genre deForaminifèresimperfo...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>33</th>\n",
+              "      <td>atthidographes-0</td>\n",
+              "      <td>4</td>\n",
+              "      <td>1397</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>cross reference</td>\n",
+              "      <td>ATTHIDOGRAPHES(V. Atthide).\\n</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>34</th>\n",
+              "      <td>aubery-2</td>\n",
+              "      <td>4</td>\n",
+              "      <td>1577</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>AUBERY(Antoine;, historien français, né le .18...</td>\n",
+              "      <td>Histoire</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>35</th>\n",
+              "      <td>aula-0</td>\n",
+              "      <td>4</td>\n",
+              "      <td>1992</td>\n",
+              "      <td>history</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>AULA. Mot latin signifiant cour, lieu découver...</td>\n",
+              "      <td>Architecture</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>36</th>\n",
+              "      <td>au-113</td>\n",
+              "      <td>4</td>\n",
+              "      <td>2112</td>\n",
+              "      <td>botany</td>\n",
+              "      <td>explicit domain</td>\n",
+              "      <td>AUNÉE (bot.). L'Aunée, Grande Année, Année off...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>37</th>\n",
+              "      <td>auriol-4</td>\n",
+              "      <td>4</td>\n",
+              "      <td>2224</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>cross reference</td>\n",
+              "      <td>AURIOL. Nom donné à Marseille au Maquereau (V....</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>38</th>\n",
+              "      <td>ave-lalleniant-0</td>\n",
+              "      <td>4</td>\n",
+              "      <td>2739</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>AVE-LALLENIANT(Robert-Christian-Barthold), méd...</td>\n",
+              "      <td>Histoire</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>39</th>\n",
+              "      <td>badin-2</td>\n",
+              "      <td>4</td>\n",
+              "      <td>3857</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>BADIN(Pierre-Adolphe), peintre français, né à ...</td>\n",
+              "      <td>Arts et métiers</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>40</th>\n",
+              "      <td>baizieux-0</td>\n",
+              "      <td>5</td>\n",
+              "      <td>133</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>BAIZIEUX(Bacium, Basium). Com. du dép. de la\\n...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>41</th>\n",
+              "      <td>balsam1te-0</td>\n",
+              "      <td>5</td>\n",
+              "      <td>677</td>\n",
+              "      <td>botany</td>\n",
+              "      <td>explicit domain</td>\n",
+              "      <td>BALSAM1TE(Bot.) (Balsamita Desf.). Genre de Co...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>42</th>\n",
+              "      <td>balze-0</td>\n",
+              "      <td>5</td>\n",
+              "      <td>757</td>\n",
+              "      <td>navy</td>\n",
+              "      <td>explicit domain</td>\n",
+              "      <td>BALZE(Mar.). Radeau delà côte occidentale de l...</td>\n",
+              "      <td>Marine</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>43</th>\n",
+              "      <td>bande-2</td>\n",
+              "      <td>5</td>\n",
+              "      <td>880</td>\n",
+              "      <td>history</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>BANDE(Ordre delà) ou de l’ECHARPE.Ordre milita...</td>\n",
+              "      <td>Histoire</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>44</th>\n",
+              "      <td>barbosa-5</td>\n",
+              "      <td>5</td>\n",
+              "      <td>1580</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>BARBOSA(Antonio), jésuite et orientaliste port...</td>\n",
+              "      <td>Religion</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>45</th>\n",
+              "      <td>bati-0</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2955</td>\n",
+              "      <td>architecture</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>BATIÈRE. Toit en forme de bât se terminant à c...</td>\n",
+              "      <td>Architecture</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>46</th>\n",
+              "      <td>baveuse-0</td>\n",
+              "      <td>5</td>\n",
+              "      <td>3457</td>\n",
+              "      <td>zoology</td>\n",
+              "      <td>explicit domain</td>\n",
+              "      <td>BAVEUSE(Zool.). Nom vulgaire par lequel les\\np...</td>\n",
+              "      <td>Histoire naturelle</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>47</th>\n",
+              "      <td>beard-2</td>\n",
+              "      <td>5</td>\n",
+              "      <td>3728</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>BEARD(James-Henry), peintre américain contempo...</td>\n",
+              "      <td>Beaux-arts</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>48</th>\n",
+              "      <td>beaufort-4</td>\n",
+              "      <td>5</td>\n",
+              "      <td>3838</td>\n",
+              "      <td>geography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>BEAUFORT. Com. du dép. de la Meuse, arr. de Mo...</td>\n",
+              "      <td>Géographie</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>49</th>\n",
+              "      <td>beaumont-26</td>\n",
+              "      <td>5</td>\n",
+              "      <td>4018</td>\n",
+              "      <td>biography</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>BEAUMONT(J.-G. Leprevôt de), secrétaire du cle...</td>\n",
+              "      <td>Histoire</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                    id  tome  rank        domain           remark  \\\n",
+              "0          abrabeses-0     1   623     geography              NaN   \n",
+              "1             accius-0     1  1076     biography              NaN   \n",
+              "2          achenbach-2     1  1357     biography              NaN   \n",
+              "3           acireale-0     1  1513     geography              NaN   \n",
+              "4              actée-0     1  1731        botany              NaN   \n",
+              "5       adulteration-0     1  2197           NaN  cross reference   \n",
+              "6            aérides-0     1  2334        botany              NaN   \n",
+              "7               ager-0     1  2710     biography              NaN   \n",
+              "8               aigu-1     1  3160           NaN  cross reference   \n",
+              "9            alavika-0     1  3664      theology              NaN   \n",
+              "10          allassac-0     2   755     geography              NaN   \n",
+              "11        allegretto-0     2   786           NaN  cross reference   \n",
+              "12           alleuze-0     2   908     geography              NaN   \n",
+              "13            alliat-0     2   933     geography              NaN   \n",
+              "14            amanty-0     2  1651     geography              NaN   \n",
+              "15         âmasserah-0     2  1701     geography  explicit domain   \n",
+              "16               a-118     2  2971       history              NaN   \n",
+              "17         androclès-0     2  3261     mythology  explicit domain   \n",
+              "18          anfouson-0     2  3394       zoology              NaN   \n",
+              "19  anicet-bourgeois-0     2  3717     biography              NaN   \n",
+              "20     anomalistique-0     3   238     astronomy  explicit domain   \n",
+              "21       anostostome-0     3   298       zoology              NaN   \n",
+              "22     anthoxanthème-0     3   571     chemistry              NaN   \n",
+              "23               aod-0     3  1024      theology              NaN   \n",
+              "24          aphellan-0     3  1177     astronomy              NaN   \n",
+              "25           appelle-0     3  1494     geography              NaN   \n",
+              "26           aragona-1     3  1841     biography              NaN   \n",
+              "27         araujuzon-0     3  1940     geography              NaN   \n",
+              "28            ardant-0     3  2421     biography              NaN   \n",
+              "29            ariano-0     3  2839     geography              NaN   \n",
+              "30         athabaska-0     4  1118  anthropology              NaN   \n",
+              "31          aslonnes-0     4   446     geography              NaN   \n",
+              "32        astr0rh1za-0     4   992       zoology  explicit domain   \n",
+              "33    atthidographes-0     4  1397           NaN  cross reference   \n",
+              "34            aubery-2     4  1577     biography              NaN   \n",
+              "35              aula-0     4  1992       history              NaN   \n",
+              "36              au-113     4  2112        botany  explicit domain   \n",
+              "37            auriol-4     4  2224           NaN  cross reference   \n",
+              "38    ave-lalleniant-0     4  2739     biography              NaN   \n",
+              "39             badin-2     4  3857     biography              NaN   \n",
+              "40          baizieux-0     5   133     geography              NaN   \n",
+              "41         balsam1te-0     5   677        botany  explicit domain   \n",
+              "42             balze-0     5   757          navy  explicit domain   \n",
+              "43             bande-2     5   880       history              NaN   \n",
+              "44           barbosa-5     5  1580     biography              NaN   \n",
+              "45              bati-0     5  2955  architecture              NaN   \n",
+              "46           baveuse-0     5  3457       zoology  explicit domain   \n",
+              "47             beard-2     5  3728     biography              NaN   \n",
+              "48          beaufort-4     5  3838     geography              NaN   \n",
+              "49         beaumont-26     5  4018     biography              NaN   \n",
+              "\n",
+              "                                              content  \\\n",
+              "0   ABRABESES. Village d’Espagne de la prov. de Za...   \n",
+              "1   ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...   \n",
+              "2   ACHENBACH(Henri), administrateur prussien, né ...   \n",
+              "3   ACIREALE. Yille de Sicile, de la province et d...   \n",
+              "4   ACTÉE(Actœa L.). Genre de plantes de la famill...   \n",
+              "5   ADULTERATION. Altération d’un médicament, d’un...   \n",
+              "6   AÉRIDES{Aérides Lour.). Genres de plantes de l...   \n",
+              "7   AGERouAGERIUS (Nicolaus), médecin alsacien, né...   \n",
+              "8                       AIGU1 LH E (V. Raimond d’).\\n   \n",
+              "9   ALAVIKA« qui est d'Alava »(V. ce mot) : Bhikch...   \n",
+              "10  ALLASSAC. Com. du dép. de la Corrèze, arr. de ...   \n",
+              "11                          ALLEGRETTO(V. Allegro).\\n   \n",
+              "12  ALLEUZE. Com. du dép. du Cantal, arr. et cant....   \n",
+              "13  ALLIAT. Com. du dép. de l’Ariège, arr. de Foix...   \n",
+              "14  AMANTY. Corn, du dép. de la Meuse, arr. de Com...   \n",
+              "15  ÂMASSERAH, AMASR1 ou AMASRAH (Géogr.). Ville d...   \n",
+              "16  AN Cl LIA. Boucliers sacrés des Romains, au no...   \n",
+              "17  ANDROCLÈS(Myth.), un fils d’Eole qui régna sur...   \n",
+              "18  ANFOUSON. Nom donné à Nice au Néron brun\\n(V. ...   \n",
+              "19  ANICET-BOURGEOIS(Auguste Anicet, connu sous le...   \n",
+              "20  ANOMALISTIQUE(Astron.). On appelle révolution\\...   \n",
+              "21  ANOSTOSTOME(Anostostoma Gray). Genre d’insecte...   \n",
+              "22  ANTHOXANTHÈME. L’un des deux principes coloran...   \n",
+              "23  AOD, plus exactement Ehoud. personnage des com...   \n",
+              "24  APHELLAN(Astron.). Un des noms de l’étoile a2 ...   \n",
+              "25  APPELLE. Com. du dép. du Tarn, arr. de Lavaux,...   \n",
+              "26  ARAGONA, cardinal d’origine sicilienne, né en ...   \n",
+              "27  ARAUJUZON. Com. du dép. des Basses-Pyrénées, a...   \n",
+              "28  ARDANT(Paul-Joseph), général français, né en 1...   \n",
+              "29  ARIANOdi Puglia. Ville de la prov. de principa...   \n",
+              "30  ATHABASKA. Col, rivière, lac, territoire et fa...   \n",
+              "31  ASLONNES, corn, du dép. de la Vienne, arr. de ...   \n",
+              "32  ASTR0RH1ZA(Zool.).Genre deForaminifèresimperfo...   \n",
+              "33                      ATTHIDOGRAPHES(V. Atthide).\\n   \n",
+              "34  AUBERY(Antoine;, historien français, né le .18...   \n",
+              "35  AULA. Mot latin signifiant cour, lieu découver...   \n",
+              "36  AUNÉE (bot.). L'Aunée, Grande Année, Année off...   \n",
+              "37  AURIOL. Nom donné à Marseille au Maquereau (V....   \n",
+              "38  AVE-LALLENIANT(Robert-Christian-Barthold), méd...   \n",
+              "39  BADIN(Pierre-Adolphe), peintre français, né à ...   \n",
+              "40  BAIZIEUX(Bacium, Basium). Com. du dép. de la\\n...   \n",
+              "41  BALSAM1TE(Bot.) (Balsamita Desf.). Genre de Co...   \n",
+              "42  BALZE(Mar.). Radeau delà côte occidentale de l...   \n",
+              "43  BANDE(Ordre delà) ou de l’ECHARPE.Ordre milita...   \n",
+              "44  BARBOSA(Antonio), jésuite et orientaliste port...   \n",
+              "45  BATIÈRE. Toit en forme de bât se terminant à c...   \n",
+              "46  BAVEUSE(Zool.). Nom vulgaire par lequel les\\np...   \n",
+              "47  BEARD(James-Henry), peintre américain contempo...   \n",
+              "48  BEAUFORT. Com. du dép. de la Meuse, arr. de Mo...   \n",
+              "49  BEAUMONT(J.-G. Leprevôt de), secrétaire du cle...   \n",
+              "\n",
+              "                                     class_bert  \n",
+              "0                                    Géographie  \n",
+              "1                       Belles-lettres - Poésie  \n",
+              "2                                      Histoire  \n",
+              "3                                    Géographie  \n",
+              "4                            Histoire naturelle  \n",
+              "5                                        Chimie  \n",
+              "6                            Histoire naturelle  \n",
+              "7                                      Histoire  \n",
+              "8                                        Marine  \n",
+              "9                                      Religion  \n",
+              "10                                   Géographie  \n",
+              "11                                      Musique  \n",
+              "12                                   Géographie  \n",
+              "13                                   Géographie  \n",
+              "14                                   Géographie  \n",
+              "15                                   Géographie  \n",
+              "16                                    Antiquité  \n",
+              "17                                    Antiquité  \n",
+              "18                           Histoire naturelle  \n",
+              "19                      Belles-lettres - Poésie  \n",
+              "20  Physique - [Sciences physico-mathématiques]  \n",
+              "21                           Histoire naturelle  \n",
+              "22                                    Pharmacie  \n",
+              "23                                     Histoire  \n",
+              "24  Physique - [Sciences physico-mathématiques]  \n",
+              "25                                   Géographie  \n",
+              "26                                     Religion  \n",
+              "27                                   Géographie  \n",
+              "28              Militaire (Art) - Guerre - Arme  \n",
+              "29                                   Géographie  \n",
+              "30                                   Géographie  \n",
+              "31                                   Géographie  \n",
+              "32                           Histoire naturelle  \n",
+              "33                                   Géographie  \n",
+              "34                                     Histoire  \n",
+              "35                                 Architecture  \n",
+              "36                           Histoire naturelle  \n",
+              "37                           Histoire naturelle  \n",
+              "38                                     Histoire  \n",
+              "39                              Arts et métiers  \n",
+              "40                                   Géographie  \n",
+              "41                           Histoire naturelle  \n",
+              "42                                       Marine  \n",
+              "43                                     Histoire  \n",
+              "44                                     Religion  \n",
+              "45                                 Architecture  \n",
+              "46                           Histoire naturelle  \n",
+              "47                                   Beaux-arts  \n",
+              "48                                   Géographie  \n",
+              "49                                     Histoire  "
+            ]
+          },
+          "execution_count": 26,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_LGE.head(50)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### 4.3 Save"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 27,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "filepath = path + \"results_LGE/LGE-metadata-withContent.csv\"\n",
+        "df_LGE.to_csv(filepath, sep=\"\\,\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df_LGE.drop(columns=['content'], inplace=True)\n",
+        "filepath = path + \"results_LGE/LGE-metadata.csv\"\n",
+        "df_LGE.to_csv(filepath, sep=\"\\,\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## 5. BERT XAI\n",
+        "\n",
+        "https://www.kaggle.com/code/rizwanhaidar/deep-learning-xai-models-loading-and-predictions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "cls_explainer = SequenceClassificationExplainer(\n",
+        "    model,\n",
+        "    tokenizer,\n",
+        "    custom_labels=encoder.classes_.tolist()\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>tome</th>\n",
+              "      <th>filename</th>\n",
+              "      <th>content</th>\n",
+              "      <th>nb_words</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>T1article_1</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_1</td>\n",
+              "      <td>F.-Camille DREYFUS, député de la Seine.\\n</td>\n",
+              "      <td>6</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>T1article_10</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_10</td>\n",
+              "      <td>quimarque un mouvement en avant de l’esprit hu...</td>\n",
+              "      <td>212</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>T1article_100</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_100</td>\n",
+              "      <td>ABACUS. L’abacus ou abaque était un instrument...</td>\n",
+              "      <td>1345</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>T1article_1000</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_1000</td>\n",
+              "      <td>H6SS6)\\n1780-1793 Choiseul-Goufficr\\n1780-1793...</td>\n",
+              "      <td>218</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>T1article_1001</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_1001</td>\n",
+              "      <td>1803Le Brun.\\n</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "               id tome      filename  \\\n",
+              "0     T1article_1   T1     article_1   \n",
+              "1    T1article_10   T1    article_10   \n",
+              "2   T1article_100   T1   article_100   \n",
+              "3  T1article_1000   T1  article_1000   \n",
+              "4  T1article_1001   T1  article_1001   \n",
+              "\n",
+              "                                             content  nb_words  \n",
+              "0          F.-Camille DREYFUS, député de la Seine.\\n         6  \n",
+              "1  quimarque un mouvement en avant de l’esprit hu...       212  \n",
+              "2  ABACUS. L’abacus ou abaque était un instrument...      1345  \n",
+              "3  H6SS6)\\n1780-1793 Choiseul-Goufficr\\n1780-1793...       218  \n",
+              "4                                     1803Le Brun.\\n         2  "
+            ]
+          },
+          "execution_count": 23,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_LGE.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "\"ABACUS. L’abacus ou abaque était un instrument en\\nusage dans l’antiquité pour faciliter les calculs arithmé¬\\ntiques. Il paraît que c’était dans l’origine une petite table\\nsur laquelle on traçait les figures et où l’on exécutait les\\nopérations. Cet instrument semble aussi ancien que l’arith¬\\nmétique, qui d’ailleurs s’appelait abacus au moyen âge ;\\non le trouve chez les Grecs, les Romains, les Chinois, les\\nAllemands et les Français. Sa forme varia avec le temps ;\\nil devint enfin un cadre long divisé par plusieurs cordes\\nparallèles, dans chacune desquelles étaient enfilées dix\\npetites boules. La première ligne à droite était celle des\\nunités, la seconde celle des dizaines, la troisième celle des\\ncentaines, etc. Pour écrire un premier nombre sur l’abacus\\non commençait par relever toutes les boules à la partie\\nsupérieure de l’instrument, et ensuite on abaissait sur\\nchaque ligne, à la partie inférieure, un nombre de boules\\négal aux unités de l’ordre de ces lignes. Ainsi par exemple,\\npour écrire le nombre 3,564, on abaissait 4 boules à la\\npartie inférieure de la première ligne, 6 à celle de la\\nseconde, 5 à celle de la troisième et 3 à celle de la qua¬\\ntrième. Le nombre 3,564 se trouvait ainsi représenté\\ncomme il l’est dans la figure 1. — Ce nombre étant\\nécrit, s’agissait-il de lui ajouter un autre nombre, 53,729,\\non commençait par abaisser 9 boules de la partie supé¬\\nrieure de la première ligne à la partie inférieure ;\\ncomme, dans le cas présent, il n’en restait que 6, après\\navoir abaissé ces 6 boules, on relevait les 10 à la\\npartie supérieure, en abaissant une boule pour cette\\ndizaine à la seconde colonne, et on achevait l’opération\\nsur la première en abaissant 3 boules pour compléter les\\n9 qu’il s’agissait d’abaisser. Passant à la seconde colonne\\non abaissait 2 boules pour le chiffre 2 des dizaines du\\nnombre 53,729. Arrivé à la troisième colonne, on abais¬\\nsait d’abord les 5 boules restantes, ensuite on remontait\\nle tout en abaissant pour la dizaine une boule dans la\\nquatrième colonne et on redescendait 2 boules à la troi¬\\nsième colonne pour compléter le chiffre 7. Passant à la\\nuatrième colonne, on abaissait 3 boules pour le chiffre\\ndes mille, et enfin on abaissait 5 boules à la cinquième\\ncolonne pour le chiffre 5 des dizaines de mille. L’appa¬\\nrence finale de l’abacus était, après cette opération, celle\\nde la figure 2, et le nombre 57,293, qui s’y trouve écrit\\nà la partie inférieure, est la somme des deux nombres\\n3,564 et 53,729.Pour ajouter un nouveau nombre à 57,293\\non agirait de la même manière et ainsi de suite. On voit\\ndonc qu’à l’aide de cet instrument, les additions des\\nnombres peuvent s’effectuer avec la plus grande facilité ;\\nil en est de même des soustractions qu’on peut exécuter\\npar une marche inverse de celle que nous venons de\\ndécrire. — L’abacus, abandonné par toutes les nations de\\nl’Europe à l’exception de la Russie, est encore extrême¬\\nment répandu en Chine, où on le trouve dans toutes les\\nmaisons de commerce; il est également en usage dans\\ncertaines parties de l’Inde et dans nos écoles primaires\\nsous le nom de boulier compteur.\\nL’usage de l'abacus suppose, comme on vient de le voir,\\nparfaitement établi le système de numération décimale.\\nA qui sommes-nous redevables de cette invention si\\nféconde, ou du moins de son introduction en Europe ?\\nChasles l’attribue à Doëce (V. ce mot) ; cette opinion a\\nété hautement combattue par Libri. — D’après Chasles,\\nBoèce se servait, sous le nom à'apices, de caractères\\nnommés igin, andras, ormis, arbas, quimas, calcis,\\nzénis, témenias et celentis, correspondant à nos chiffres\\n1, 2, 3, 4, 5, 6, 7, 8, 9 ; quant à Yabacus, ce serait\\nun tableau divisé par des lignes horizontales et verticales,\\nformant des cases dans lesquelles devaient être inscrits\\nces caractères, de façon que les unités de même ordre des\\ndifférents nombres sur lesquels devait porter l’opération\\nse trouvassent dans une même colonne verticale ; la case\\ncorrespondante à un certain ordre d’unités devait être\\npassée lorsque le nombre manquait dans cet ordre d’unités.\\nLes opérations d’ailleurs se seraient faites sur ce tableau,\\ncomme nous les faisons aujourd’hui. Ce serait, comme on\\nvoit, un immense progrès sur l’opération purement méca¬\\nnique que nous avons décrite précédemment. — D’après le\\nmême géomètre, le zéro n’aurait pas tardé à apparaître sous\\nle nom desipos, de sorte que les occidentaux auraient eu,\\nlongtemps avant leurs relations avec les Arabes, un système\\nde numération écrite entièrement identique à celui dont\\nnous nous servons aujourd’hui. — D’après Libri, tout\\ncela ne serait que chimères et visions. Notre système de\\nnumération, d’origine hindoue, nous serait venu des Arabes\\nau xii6 siècle ; tous les écrivains de cette époque le\\ndisent ; tous les traités d’arithmétique le proclament ;\\nla question ne saurait donc être douteuse.\\nCependant les preuves alléguées par Chasles sont\\ntirées d’anciennes copies manuscrites de l’Arithmétique de\\nBoëce, qu’il paraît avoir étudiées avec soin ; et il serait\\ndifficile d’admettre que ce qu’il dit y avoir vu ne s’y\\ntrouvât pas. Tout au plus pourrait-on prétendre que les\\nmanuscrits en question contiendraient des interpolations\\nfaites depuis Boëce. Mais cette hypothèse présente encore\\ndes difficultés parce que Boëce, d’après Chasles, attribue¬\\nrait la connaissance de ce système aux Grecs (à quelques\\nGrecs bien peu nombreux sans doute, car aucun ouvrage grec\\nantérieur à Boëce, écrit à Athènes, à Alexandrie ou à Con¬\\nstantinople, n’en fait mention, et Eutocius même n’y fait pas\\nallusion). — Que conclure ? Nous croyons avec Libri\\nque notre système de numération nous vient des Hindous\\net nous pensons que Chasles se trompe en lui donnant\\nune origine grecque ou latine. Mais on ne voit pas pourquoi\\nBoëce, qui avait voyagé en Orient, n’aurait pas pu être\\ninitié à l’arithmétique des Hindous par un marchand grec de\\nConstantinople, que ses voyages auraient conduit dans\\nl’Inde.On objecte, il est vrai, que Boëce est antérieur à Arya-\\nbhata, l’auteur du plus ancien traité d’arithmétique indien\\nconnu ; mais il n’est guère probable qu’Aryabhata ait\\ndécouvert seul tout ce qui se trouve dans son ouvrage\\noù, sans doute, figurent bien des choses connues avant\\nl’auteur en Hindoustan. — Cependant, pourquoi la tradi¬\\ntion ne ferait-elle remonter qu’au xne siècle l’introduction\\ndu système décimal de numération parmi les nations occi¬\\ndentales ? Pourquoi surtout cette introduction aurait-elle\\nété regardée alors comme un événement tout nouveau,\\npourquoi aurait-elle fait époque ? Cela s’expliquerait peut-\\nêtre parce que, à partir de Boëce, les ténèbres n’avaient\\nfait que s’épaissir sur toute l’Europe, jusqu’à l’invasion de\\nl’Espagne par les Arabes, et que les connaissances qu’il\\navait pu acquérir en Grèce, n’ayant pas eu le temps\\nde se répandre, avaient fini par ne plus Lisser de\\ntraces.\\nChasles, à l’appui de cette opinion, cite un Traité de\\nl’Abacus, de Raoul, évêque de Laon, où il serait dit\\nque ce système de numération était tombé dans l’oubli\\nchez les nations occidentales et que Gerbert et Hermann\\nl’avaient remis en pratique. — Nous ne voyons d’invrai¬\\nsemblable dans tout cela que l’idée de Chasles d’attri¬\\nbuer une origine grecque ou latine à notre système de\\nnumération et de pousser l’exagération de son système\\njusqu’à se demander sérieusement, à propos de l’Arénaire,\\nsi Archimède ne connaissait pas le système de l’abacus. —\\nSi Chasles avait seulement voulu dire qu’Archimède con¬\\nnaissait l’A6aÇ, comptoir, damier, buffet, qui est dénommé\\ndans le premier vers du Jardin des racines grecques,\\nsorte de machine à calculer que nous avons décrite au\\ncommencement de cet article et telle qu’elle existe aujour¬\\nd’hui en Chine, son hypothèse serait plus que probable.\\nMais nous ne pensons pas que ce soit ce qu’a voulu dire\\nChasles ; car alors il ne s’agirait plus d’un fait scien¬\\ntifique comparable à l’invention de la méthode qui permit\\nd’écrire tous les nombres avec neuf caractères seulement\\net un zéro. — Il ne s’agit pas en effet de la numération\\nparlée des Grecs, qui fut toujours décimale, il s’agit de\\nleur numération écrite. Or, que les abax, dans les colonnes\\nou les rainures desquels on faisait mouvoir des cailloux ou\\nde petites boules, rappelassent la numération parlée déci¬\\nmale, cela n’aurait même pas lieu d'étonner, mais ne prou¬\\nverait rien pour la numération écrite. — Au reste, on voit\\nquelquefois les nations perfectionner leurs méthodes,\\njamais on ne les voit en changer totalement les bases.\\nNous sommes assurément bien éloignés de vouloir faire\\naux Grecs, même à Pappus et à Eutocius, l’injure de\\ncroire qu’ils n’eussent pas été mille fois capables d’in-\\nventer le système décimal de numération avec les neuf\\ncliitfres et le zéro, si leur manière d’être et de penser les\\neût davantage portés aux spéculations arithmétiques. Il ne\\nfaut pas pour cela beaucoup de génie et ils auraient pu en\\nrevendre aux Latins de la décadence, aux Hindous, aux\\nArabes et à tous nos abacistes ; ce qu’il fallait, c’était une\\ncertaine disposition d’esprit, dépendant d une certaine con¬\\nformation cérébrale.\\n\""
+            ]
+          },
+          "execution_count": 24,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df_LGE.content[2]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 36,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "[('[CLS]', 0.0),\n",
+              " ('Instrument', 0.5555782891531604),\n",
+              " ('de', 0.24325958616394933),\n",
+              " ('musique', 0.7950833530900966),\n",
+              " ('[SEP]', 0.0)]"
+            ]
+          },
+          "execution_count": 36,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "content = \"Instrument de musique\" #df_LGE.content[2][:512]\n",
+        "word_attributions = cls_explainer(content if len(content) < 512 else content[:512])\n",
+        "word_attributions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 37,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "'Musique'"
+            ]
+          },
+          "execution_count": 37,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "cls_explainer.predicted_class_name"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 38,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>26</b></text></td><td><text style=\"padding-right:2em\"><b>Musique (0.96)</b></text></td><td><text style=\"padding-right:2em\"><b>Musique</b></text></td><td><text style=\"padding-right:2em\"><b>1.59</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(120, 75%, 73%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> Instrument                    </font></mark><mark style=\"background-color: hsl(120, 75%, 88%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> de                    </font></mark><mark style=\"background-color: hsl(120, 75%, 61%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> musique                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>26</b></text></td><td><text style=\"padding-right:2em\"><b>Musique (0.96)</b></text></td><td><text style=\"padding-right:2em\"><b>Musique</b></text></td><td><text style=\"padding-right:2em\"><b>1.59</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(120, 75%, 73%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> Instrument                    </font></mark><mark style=\"background-color: hsl(120, 75%, 88%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> de                    </font></mark><mark style=\"background-color: hsl(120, 75%, 61%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> musique                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "execution_count": 38,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "cls_explainer.visualize()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "* récupérer les mots positifs par domaine (EDdA et LGE)\n",
+        "* faire des nuages de mots et comparer les plus fréquents entre EDdA et LGE (corpus parallèle)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "machine_shape": "hm",
+      "name": "EDdA-Classification_BertFineTuning.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3.9.13 ('geode-classification-py39')",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.13"
+    },
+    "vscode": {
+      "interpreter": {
+        "hash": "16fac9c2d845f8e1f8c6fffffe3d3a0def61c7e42da17a08d00f279ad4dea797"
+      }
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "0279837673b446b09aac18346213eb7e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_69004a5069094f8c9d59d5136f627bef",
+              "IPY_MODEL_e96a95317b0945c58c8ff0e944c7593e",
+              "IPY_MODEL_68b69c9d3a274900bc2892848f725cb0"
+            ],
+            "layout": "IPY_MODEL_09b5f0bbd5c14bc289b0f92a22bb29ab"
+          }
+        },
+        "0779c8ea0ed24e64a800ae5dff6bc8ce": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_8d24b669a39b4876ac0a014dff678db1",
+            "max": 810912,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_99b785ea53744868b8b11e5e94936fcc",
+            "value": 810912
+          }
+        },
+        "09b5f0bbd5c14bc289b0f92a22bb29ab": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "12afa6b6474b401f9ff3f189cc0d3d11": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "152a31110bf9477989833eac91794688": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_826bd7d0a1f146ea9f7d53584468190c",
+            "max": 445032417,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_e86a1d4d268c4314897b58f7bba5ec25",
+            "value": 445032417
+          }
+        },
+        "1bf6a76237454349aafc1e9284376879": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "274e505b5f354efc8de3ef26cc43e617": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_ad5e0e1439a94578a31b80c90dbf3247",
+              "IPY_MODEL_0779c8ea0ed24e64a800ae5dff6bc8ce",
+              "IPY_MODEL_7870340ac12b469c8ac19de3a47b6e67"
+            ],
+            "layout": "IPY_MODEL_f1f9d5b32f60473b86ae6b340d6c0850"
+          }
+        },
+        "2c44d9c11e704b70aa32904a23d1790c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "2cf386a8d14d43389374f79bfa922675": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "2d1d632da0f740c38512c9ad779d3173": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "3592b1ed1d6d452b93c57b304943a1cb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "4a23110523184d019a77368116f738f3": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "500826e3813b414a820aa260bfde9e23": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "5032547e748f45a3b0cdd12fafe1dd05": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "58b4f9e0366f4d4eba7f902af84b8965": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_12afa6b6474b401f9ff3f189cc0d3d11",
+            "placeholder": "​",
+            "style": "IPY_MODEL_c4d981755d1d42b6940396b77bc251bc",
+            "value": "Downloading: 100%"
+          }
+        },
+        "5978954f56fb40928b970f32d1634aaf": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "5f321455342348f49879a9ca8b392077": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "68b69c9d3a274900bc2892848f725cb0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_5032547e748f45a3b0cdd12fafe1dd05",
+            "placeholder": "​",
+            "style": "IPY_MODEL_c4c1675163bd4997bb44d7ea3967a356",
+            "value": " 1.40M/1.40M [00:00&lt;00:00, 6.57MB/s]"
+          }
+        },
+        "69004a5069094f8c9d59d5136f627bef": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_cb447c62ce1d4c1ea760175ae619fbb9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_76007b17ffd2478fa4a86f959d4f1766",
+            "value": "Downloading: 100%"
+          }
+        },
+        "70dd7428d78c44409308d62ba04917de": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_4a23110523184d019a77368116f738f3",
+            "placeholder": "​",
+            "style": "IPY_MODEL_1bf6a76237454349aafc1e9284376879",
+            "value": "Downloading: 100%"
+          }
+        },
+        "76007b17ffd2478fa4a86f959d4f1766": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "7870340ac12b469c8ac19de3a47b6e67": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2c44d9c11e704b70aa32904a23d1790c",
+            "placeholder": "​",
+            "style": "IPY_MODEL_2cf386a8d14d43389374f79bfa922675",
+            "value": " 811k/811k [00:00&lt;00:00, 2.75MB/s]"
+          }
+        },
+        "826bd7d0a1f146ea9f7d53584468190c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8d24b669a39b4876ac0a014dff678db1": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8f467553598f4dcc9abf55da79c11018": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_58b4f9e0366f4d4eba7f902af84b8965",
+              "IPY_MODEL_9383e09698ae4bd1820a4bca22e78315",
+              "IPY_MODEL_a189838c4de648198b0f4fc99c29ced8"
+            ],
+            "layout": "IPY_MODEL_9d7a8b3ecfe74f66b4238fe085c05906"
+          }
+        },
+        "9383e09698ae4bd1820a4bca22e78315": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_fe0e3b1df104484c98fbdcd31a04e427",
+            "max": 508,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_5978954f56fb40928b970f32d1634aaf",
+            "value": 508
+          }
+        },
+        "9420a47a2bf44ead8cff283f20566cda": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "99b785ea53744868b8b11e5e94936fcc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "9d7a8b3ecfe74f66b4238fe085c05906": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a159d62667734657a49ba3a96494f137": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a189838c4de648198b0f4fc99c29ced8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_df95c20399dd4918bc7559a90886d4aa",
+            "placeholder": "​",
+            "style": "IPY_MODEL_2d1d632da0f740c38512c9ad779d3173",
+            "value": " 508/508 [00:00&lt;00:00, 16.9kB/s]"
+          }
+        },
+        "a9c47cb226ee41e18812f29f690992eb": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ad5e0e1439a94578a31b80c90dbf3247": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_9420a47a2bf44ead8cff283f20566cda",
+            "placeholder": "​",
+            "style": "IPY_MODEL_5f321455342348f49879a9ca8b392077",
+            "value": "Downloading: 100%"
+          }
+        },
+        "c4c1675163bd4997bb44d7ea3967a356": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "c4d981755d1d42b6940396b77bc251bc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "cb447c62ce1d4c1ea760175ae619fbb9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d09d664839d04303b8fef9ef895f6e4f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_70dd7428d78c44409308d62ba04917de",
+              "IPY_MODEL_152a31110bf9477989833eac91794688",
+              "IPY_MODEL_fcde5f4cf49846a0ad1b284aad98a38a"
+            ],
+            "layout": "IPY_MODEL_500826e3813b414a820aa260bfde9e23"
+          }
+        },
+        "d4ad1a78750d49feaea584a82940bb7d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "df95c20399dd4918bc7559a90886d4aa": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e86a1d4d268c4314897b58f7bba5ec25": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "e96a95317b0945c58c8ff0e944c7593e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_a9c47cb226ee41e18812f29f690992eb",
+            "max": 1395301,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_d4ad1a78750d49feaea584a82940bb7d",
+            "value": 1395301
+          }
+        },
+        "f1f9d5b32f60473b86ae6b340d6c0850": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "fcde5f4cf49846a0ad1b284aad98a38a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_a159d62667734657a49ba3a96494f137",
+            "placeholder": "​",
+            "style": "IPY_MODEL_3592b1ed1d6d452b93c57b304943a1cb",
+            "value": " 445M/445M [00:14&lt;00:00, 39.2MB/s]"
+          }
+        },
+        "fe0e3b1df104484c98fbdcd31a04e427": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
-- 
GitLab