From a6f959d271324391e8636f074fbb545f4ce785ab Mon Sep 17 00:00:00 2001 From: Ludovic Moncla <moncla.ludovic@gmail.com> Date: Tue, 14 Mar 2023 22:16:30 +0100 Subject: [PATCH] Delete Predict_Classification.ipynb --- notebooks/Predict_Classification.ipynb | 3786 ------------------------ 1 file changed, 3786 deletions(-) delete mode 100644 notebooks/Predict_Classification.ipynb diff --git a/notebooks/Predict_Classification.ipynb b/notebooks/Predict_Classification.ipynb deleted file mode 100644 index df05631..0000000 --- a/notebooks/Predict_Classification.ipynb +++ /dev/null @@ -1,3786 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "m39L6DJ2r0zN" - }, - "source": [ - "# BERT Predict classification\n", - "\n", - "## 1. Setup the environment\n", - "\n", - "### 1.1 Setup colab environment\n", - "\n", - "#### 1.1.1 Install packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "pwmZ5bBvgGNh", - "outputId": "1a080856-4e47-4e1d-81d1-d38bb58948a5" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting transformers==4.10.3\n", - " Downloading transformers-4.10.3-py3-none-any.whl (2.8 MB)\n", - "\u001b[2K \u001b[90mâ”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (2.25.1)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (1.21.6)\n", - "Collecting sacremoses\n", - " Downloading sacremoses-0.0.53.tar.gz (880 kB)\n", - "\u001b[2K \u001b[90mâ”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”\u001b[0m \u001b[32m880.6/880.6 KB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (23.0)\n", - "Collecting tokenizers<0.11,>=0.10.1\n", - " Downloading tokenizers-0.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n", - "\u001b[2K \u001b[90mâ”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”\u001b[0m \u001b[32m3.3/3.3 MB\u001b[0m \u001b[31m57.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (4.64.1)\n", - "Collecting huggingface-hub>=0.0.12\n", - " Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)\n", - "\u001b[2K \u001b[90mâ”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”\u001b[0m \u001b[32m190.3/190.3 KB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (3.9.0)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (2022.6.2)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers==4.10.3) (6.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.0.12->transformers==4.10.3) (4.4.0)\n", - "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.10.3) (4.0.0)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.10.3) (1.24.3)\n", - "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.10.3) (2.10)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==4.10.3) (2022.12.7)\n", - "Requirement already satisfied: six in /usr/local/lib/python3.8/dist-packages (from sacremoses->transformers==4.10.3) (1.15.0)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.8/dist-packages (from sacremoses->transformers==4.10.3) (7.1.2)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.8/dist-packages (from sacremoses->transformers==4.10.3) (1.2.0)\n", - "Building wheels for collected packages: sacremoses\n", - " Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895260 sha256=1a6d3101ab60a657a64074bebed597b1987c115de1092b993a013ae317d882f9\n", - " Stored in directory: /root/.cache/pip/wheels/82/ab/9b/c15899bf659ba74f623ac776e861cf2eb8608c1825ddec66a4\n", - "Successfully built sacremoses\n", - "Installing collected packages: tokenizers, sacremoses, huggingface-hub, transformers\n", - "Successfully installed huggingface-hub-0.12.0 sacremoses-0.0.53 tokenizers-0.10.3 transformers-4.10.3\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting sentencepiece\n", - " Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90mâ”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m33.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: sentencepiece\n", - "Successfully installed sentencepiece-0.1.97\n" - ] - } - ], - "source": [ - "!pip install transformers==4.10.3\n", - "!pip install sentencepiece" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "57zgbn_jr0zR" - }, - "source": [ - "#### 1.1.2 Use more RAM" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WF0qFN_g3ekz", - "outputId": "56e76858-932c-42fd-ace0-37bf11c7b4ce" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Your runtime has 27.3 gigabytes of available RAM\n", - "\n", - "You are using a high-RAM runtime!\n" - ] - } - ], - "source": [ - "from psutil import virtual_memory\n", - "ram_gb = virtual_memory().total / 1e9\n", - "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n", - "\n", - "if ram_gb < 20:\n", - " print('Not using a high-RAM runtime')\n", - "else:\n", - " print('You are using a high-RAM runtime!')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vpr71iWGr0zS" - }, - "source": [ - "#### 1.1.3 Mount GoogleDrive" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "vL0S-s9Uofvn", - "outputId": "dbe3e901-da63-48b5-d8c6-b8cbda503fef" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Mounted at /content/drive\n" - ] - } - ], - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/drive')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8hzEGHl7gmzk" - }, - "source": [ - "### 1.2 Setup GPU" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "dPOU-Efhf4ui", - "outputId": "0bb7fd0e-e2fb-4477-e5f7-b408d0a1ced7" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "There are 1 GPU(s) available.\n", - "We will use the GPU: Tesla T4\n" - ] - } - ], - "source": [ - "import torch\n", - "\n", - "# If there's a GPU available...\n", - "if torch.cuda.is_available(): \n", - " # Tell PyTorch to use the GPU. \n", - " device = torch.device(\"cuda\")\n", - " print('There are %d GPU(s) available.' % torch.cuda.device_count())\n", - " print('We will use the GPU:', torch.cuda.get_device_name(0))\n", - "\n", - "# for MacOS\n", - "elif torch.backends.mps.is_available() and torch.backends.mps.is_built():\n", - " device = torch.device(\"mps\")\n", - " print('We will use the GPU')\n", - "else:\n", - " device = torch.device(\"cpu\")\n", - " print('No GPU available, using the CPU instead.')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wSqbrupGMc1M" - }, - "source": [ - "### 1.3 Import librairies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SkErnwgMMbRj" - }, - "outputs": [], - "source": [ - "import pandas as pd \n", - "import numpy as np\n", - "\n", - "from transformers import BertTokenizer, BertForSequenceClassification, CamembertTokenizer, CamembertForSequenceClassification\n", - "from torch.utils.data import TensorDataset, DataLoader, SequentialSampler" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "c5QKcXulhNJ-" - }, - "source": [ - "## 2. Load Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "M2awiee1r0zV" - }, - "outputs": [], - "source": [ - "drive_path = \"drive/MyDrive/Classification-EDdA/\"\n", - "path = \"./\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "X1A_J8MGr0zV", - "outputId": "ca5c966c-00a2-4d74-cd1c-576c18f98d3d" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2023-02-15 07:14:06-- https://geode.liris.cnrs.fr/EDdA-Classification/datasets/Parallel_datatset_articles_230215.tsv\n", - "Resolving geode.liris.cnrs.fr (geode.liris.cnrs.fr)... 134.214.142.28\n", - "Connecting to geode.liris.cnrs.fr (geode.liris.cnrs.fr)|134.214.142.28|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 42343065 (40M) [text/tab-separated-values]\n", - "Saving to: ‘Parallel_datatset_articles_230215.tsv’\n", - "\n", - "Parallel_datatset_a 100%[===================>] 40.38M 74.9MB/s in 0.5s \n", - "\n", - "2023-02-15 07:14:07 (74.9 MB/s) - ‘Parallel_datatset_articles_230215.tsv’ saved [42343065/42343065]\n", - "\n" - ] - } - ], - "source": [ - "#!wget https://geode.liris.cnrs.fr/files/datasets/EDdA/Classification/LGE_withContent.tsv\n", - "#!wget https://geode.liris.cnrs.fr/EDdA-Classification/datasets/EDdA_dataset_articles_no_superdomain.tsv\n", - "!wget https://geode.liris.cnrs.fr/EDdA-Classification/datasets/Parallel_datatset_articles_230215.tsv" - ] - }, - { - "cell_type": "code", - "source": [ - "#filepath = \"data/LGE_withContent.tsv\"\n", - "#filepath = \"EDdA_dataset_articles_no_superdomain.tsv\"\n", - "filepath = \"Parallel_datatset_articles_230215.tsv\"" - ], - "metadata": { - "id": "eea7F4vato1x" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 333 - }, - "id": "erjPU3y8r0zW", - "outputId": "e2b4a39d-a72b-4e7a-8b26-e709eb983df3" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " idLGE tomeLGE rankLGE \\\n", - "0 aam-0 1 63 \n", - "1 abaco-0 1 92 \n", - "2 abacot-0 1 96 \n", - "3 abaddon-0 1 104 \n", - "4 abandonnement-0 1 138 \n", - "\n", - " contentLGE volumeEDdA numeroEDdA \\\n", - "0 AAM. Mesure de capacité pour les liquides en u... 1 31 \n", - "1 ABACO, architecte italien du xvi siècle (V. La... 1 42 \n", - "2 ABACOT. Double couronne que portaient autrefoi... 1 44 \n", - "3 ABADDONou APOLYON le Destructeur. « Elles\\nava... 1 46 \n", - "4 ABANDONNEMENT. I. Droit civil. — Ce mot est un... 1 75 \n", - "\n", - " headEDdA authorEDdA normclassEDdA \\\n", - "0 AAM Diderot unclassified \n", - "1 ABACO d'Alembert unclassified \n", - "2 ABACOT Diderot unclassified \n", - "3 ABADDON Diderot unclassified \n", - "4 ABANDONNEMENT Toussaint Droit \n", - "\n", - " contentEDdA nbWordsEDdA \\\n", - "0 \\n* AAM, s. mesure des Liquides, en usage à Am... 18 \n", - "1 \\nABACO, s. m. Quelques anciens Auteurs se ser... 26 \n", - "2 \\n* ABACOT, s. m. nom de l'ancienne parure dè\\... 22 \n", - "3 \\n* ABADDON, s. m. vient d'abad, perdre. C'est... 25 \n", - "4 \\nABANDONNEMENT, s. m. en Droit, est le délais... 77 \n", - "\n", - " superdomainEDdA \n", - "0 Unclassified \n", - "1 Unclassified \n", - "2 Unclassified \n", - "3 Unclassified \n", - "4 Droit Jurisprudence " - ], - "text/html": [ - "\n", - " <div id=\"df-be30bfa5-3524-40b4-abed-43faebfa6628\">\n", - " <div class=\"colab-df-container\">\n", - " <div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>idLGE</th>\n", - " <th>tomeLGE</th>\n", - " <th>rankLGE</th>\n", - " <th>contentLGE</th>\n", - " <th>volumeEDdA</th>\n", - " <th>numeroEDdA</th>\n", - " <th>headEDdA</th>\n", - " <th>authorEDdA</th>\n", - " <th>normclassEDdA</th>\n", - " <th>contentEDdA</th>\n", - " <th>nbWordsEDdA</th>\n", - " <th>superdomainEDdA</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>aam-0</td>\n", - " <td>1</td>\n", - " <td>63</td>\n", - " <td>AAM. Mesure de capacité pour les liquides en u...</td>\n", - " <td>1</td>\n", - " <td>31</td>\n", - " <td>AAM</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* AAM, s. mesure des Liquides, en usage à Am...</td>\n", - " <td>18</td>\n", - " <td>Unclassified</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>abaco-0</td>\n", - " <td>1</td>\n", - " <td>92</td>\n", - " <td>ABACO, architecte italien du xvi siècle (V. La...</td>\n", - " <td>1</td>\n", - " <td>42</td>\n", - " <td>ABACO</td>\n", - " <td>d'Alembert</td>\n", - " <td>unclassified</td>\n", - " <td>\\nABACO, s. m. Quelques anciens Auteurs se ser...</td>\n", - " <td>26</td>\n", - " <td>Unclassified</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>abacot-0</td>\n", - " <td>1</td>\n", - " <td>96</td>\n", - " <td>ABACOT. Double couronne que portaient autrefoi...</td>\n", - " <td>1</td>\n", - " <td>44</td>\n", - " <td>ABACOT</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* ABACOT, s. m. nom de l'ancienne parure dè\\...</td>\n", - " <td>22</td>\n", - " <td>Unclassified</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>abaddon-0</td>\n", - " <td>1</td>\n", - " <td>104</td>\n", - " <td>ABADDONou APOLYON le Destructeur. « Elles\\nava...</td>\n", - " <td>1</td>\n", - " <td>46</td>\n", - " <td>ABADDON</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* ABADDON, s. m. vient d'abad, perdre. C'est...</td>\n", - " <td>25</td>\n", - " <td>Unclassified</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>abandonnement-0</td>\n", - " <td>1</td>\n", - " <td>138</td>\n", - " <td>ABANDONNEMENT. I. Droit civil. — Ce mot est un...</td>\n", - " <td>1</td>\n", - " <td>75</td>\n", - " <td>ABANDONNEMENT</td>\n", - " <td>Toussaint</td>\n", - " <td>Droit</td>\n", - " <td>\\nABANDONNEMENT, s. m. en Droit, est le délais...</td>\n", - " <td>77</td>\n", - " <td>Droit Jurisprudence</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>\n", - " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-be30bfa5-3524-40b4-abed-43faebfa6628')\"\n", - " title=\"Convert this dataframe to an interactive table.\"\n", - " style=\"display:none;\">\n", - " \n", - " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", - " width=\"24px\">\n", - " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", - " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", - " </svg>\n", - " </button>\n", - " \n", - " <style>\n", - " .colab-df-container {\n", - " display:flex;\n", - " flex-wrap:wrap;\n", - " gap: 12px;\n", - " }\n", - "\n", - " .colab-df-convert {\n", - " background-color: #E8F0FE;\n", - " border: none;\n", - " border-radius: 50%;\n", - " cursor: pointer;\n", - " display: none;\n", - " fill: #1967D2;\n", - " height: 32px;\n", - " padding: 0 0 0 0;\n", - " width: 32px;\n", - " }\n", - "\n", - " .colab-df-convert:hover {\n", - " background-color: #E2EBFA;\n", - " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", - " fill: #174EA6;\n", - " }\n", - "\n", - " [theme=dark] .colab-df-convert {\n", - " background-color: #3B4455;\n", - " fill: #D2E3FC;\n", - " }\n", - "\n", - " [theme=dark] .colab-df-convert:hover {\n", - " background-color: #434B5C;\n", - " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", - " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", - " fill: #FFFFFF;\n", - " }\n", - " </style>\n", - "\n", - " <script>\n", - " const buttonEl =\n", - " document.querySelector('#df-be30bfa5-3524-40b4-abed-43faebfa6628 button.colab-df-convert');\n", - " buttonEl.style.display =\n", - " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", - "\n", - " async function convertToInteractive(key) {\n", - " const element = document.querySelector('#df-be30bfa5-3524-40b4-abed-43faebfa6628');\n", - " const dataTable =\n", - " await google.colab.kernel.invokeFunction('convertToInteractive',\n", - " [key], {});\n", - " if (!dataTable) return;\n", - "\n", - " const docLinkHtml = 'Like what you see? Visit the ' +\n", - " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", - " + ' to learn more about interactive tables.';\n", - " element.innerHTML = '';\n", - " dataTable['output_type'] = 'display_data';\n", - " await google.colab.output.renderOutput(dataTable, element);\n", - " const docLink = document.createElement('div');\n", - " docLink.innerHTML = docLinkHtml;\n", - " element.appendChild(docLink);\n", - " }\n", - " </script>\n", - " </div>\n", - " </div>\n", - " " - ] - }, - "metadata": {}, - "execution_count": 13 - } - ], - "source": [ - "df = pd.read_csv(path + filepath, sep=\"\\t\")\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "source": [ - "corpus = 'LGE'\n", - "#corpus = 'EDdA'\n", - "data = df['content'+corpus].values\n" - ], - "metadata": { - "id": "Ndw4UtgWt_MJ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U6KSUho7r0zX" - }, - "source": [ - "## 3. Load model and predict\n", - "\n", - "### 3.1 BERT / CamemBERT" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0qDZ86qTr0zX" - }, - "outputs": [], - "source": [ - "model_name = \"bert-base-multilingual-cased\"\n", - "#model_name = \"camembert-base\"\n", - "#model_path = path + \"models/model_\" + model_name + \"_s10000.pt\"\n", - "\n", - "model_path = drive_path + \"models/model_\" + model_name + \"_s10000_superdomains.pt\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KEljGX0br0zX" - }, - "outputs": [], - "source": [ - "def generate_dataloader(tokenizer, sentences, batch_size = 8, max_len = 512):\n", - "\n", - " # Tokenize all of the sentences and map the tokens to thier word IDs.\n", - " input_ids_test = []\n", - " # For every sentence...\n", - " for sent in sentences:\n", - " # `encode` will:\n", - " # (1) Tokenize the sentence.\n", - " # (2) Prepend the `[CLS]` token to the start.\n", - " # (3) Append the `[SEP]` token to the end.\n", - " # (4) Map tokens to their IDs.\n", - " encoded_sent = tokenizer.encode(\n", - " sent, # Sentence to encode.\n", - " add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n", - " # This function also supports truncation and conversion\n", - " # to pytorch tensors, but I need to do padding, so I\n", - " # can't use these features.\n", - " #max_length = max_len, # Truncate all sentences.\n", - " #return_tensors = 'pt', # Return pytorch tensors.\n", - " )\n", - " input_ids_test.append(encoded_sent)\n", - "\n", - " # Pad our input tokens\n", - " padded_test = []\n", - " for i in input_ids_test:\n", - " if len(i) > max_len:\n", - " padded_test.extend([i[:max_len]])\n", - " else:\n", - " padded_test.extend([i + [0] * (max_len - len(i))])\n", - " input_ids_test = np.array(padded_test)\n", - "\n", - " # Create attention masks\n", - " attention_masks = []\n", - "\n", - " # Create a mask of 1s for each token followed by 0s for padding\n", - " for seq in input_ids_test:\n", - " seq_mask = [float(i>0) for i in seq]\n", - " attention_masks.append(seq_mask)\n", - "\n", - " # Convert to tensors.\n", - " inputs = torch.tensor(input_ids_test)\n", - " masks = torch.tensor(attention_masks)\n", - " #set batch size\n", - "\n", - " # Create the DataLoader.\n", - " data = TensorDataset(inputs, masks)\n", - " prediction_sampler = SequentialSampler(data)\n", - "\n", - " return DataLoader(data, sampler=prediction_sampler, batch_size=batch_size)\n", - "\n", - "\n", - "\n", - "def predict(model, dataloader, device):\n", - "\n", - " # Put model in evaluation mode\n", - " model.eval()\n", - "\n", - " # Tracking variables\n", - " predictions_test , true_labels = [], []\n", - " pred_labels_ = []\n", - " # Predict\n", - " for batch in dataloader:\n", - " # Add batch to GPU\n", - " batch = tuple(t.to(device) for t in batch)\n", - "\n", - " # Unpack the inputs from the dataloader\n", - " b_input_ids, b_input_mask = batch\n", - "\n", - " # Telling the model not to compute or store gradients, saving memory and\n", - " # speeding up prediction\n", - " with torch.no_grad():\n", - " # Forward pass, calculate logit predictions\n", - " outputs = model(b_input_ids, token_type_ids=None,\n", - " attention_mask=b_input_mask)\n", - "\n", - " logits = outputs[0]\n", - " #print(logits)\n", - "\n", - " # Move logits and labels to CPU ???\n", - " logits = logits.detach().cpu().numpy()\n", - " #print(logits)\n", - "\n", - " # Store predictions and true labels\n", - " predictions_test.append(logits)\n", - "\n", - " pred_labels = []\n", - " \n", - " for i in range(len(predictions_test)):\n", - " # The predictions for this batch are a 2-column ndarray (one column for \"0\"\n", - " # and one column for \"1\"). Pick the label with the highest value and turn this\n", - " # in to a list of 0s and 1s.\n", - " pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()\n", - " pred_labels.append(pred_labels_i)\n", - "\n", - " pred_labels_ += [item for sublist in pred_labels for item in sublist]\n", - " return pred_labels_" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 162, - "referenced_widgets": [ - "11c285bed74e46a08fbb7bf88715aafa", - "3fde7318ebc3458cb64f8927fdcbaee3", - "8d57eb44d9394604981a8f8f97f48b7c", - "1cb6ed877c2b455b9463b12c2da877d8", - "5e03651dca944a5f91b675c503feeeac", - "0521c3cc6abd44ae989ac0701100045d", - "d12a8ef069af4d79870bd783f2343184", - "28d38094dcd54d6694e2efad7fea6abb", - "6f80ea06220b4a498e6169e55cd8800f", - "3de8b4b0d6494c058589c535dc24dc3e", - "e0df5e2d4ebd4eb3b126c16dadb2ba62", - "9be44ba364a344f2b6b2546ae9d61ba8", - "fe472df31774495c83aa159e116ba2ee", - "0180ffc200e8466191a11a723c82e43f", - "a07ac2935a3f4d84971ae9147a854969", - "af4ae976808042bf929ab17df10530b2", - "b2277b3d600c43f999b3a07215ac2e13", - "ebe5e6f8af1e4e04a8a2b5939ac09039", - "c4ea841cb43747cdbce35f8f9c711cde", - "2d937fce2e6c4b69816352bd264ded41", - "64b57e3be2c743b3b0e58d338243c656", - "6ca9688ac7fa4e638994b91242c0ac87", - "aa6a7a9106554f85a91150bd65c271d0", - "ea3f471546734f5994edfdc214319368", - "04a86b4164fa49de8fd47d4d373e1d81", - "be067a8a406f41779e42bd35abcbfcf0", - "7df91507e47d4a6992464293ce002a29", - "ecef81814a7c4481aa49eb73807bfe4d", - "2b9b4eac7994405ca9bce38332df2629", - "4edc5b66f0eb44a0b05876fda90f0d1b", - "5285a390fb42415289d89585e04c8994", - "53643db8401846f2af6f15f5cd0c9998", - "bc4825e1a43f4a20b496d82ea3687e6f", - "4c46904f8e944d2b834ba9d384b00a8c", - "ef37bbf1f34e4765b1803a607716d0d1", - "c2d6041cd6674043953e094791ab9659", - "e4c43817f44743388e6fd98b8dbb2eda", - "39636049d60a4bb4bde7d0ef1af25d78", - "c3e73d423c2c41c0a942331070fda723", - "087ebcb093bb41c28485bdc762fb5da6", - "de270f0aa8194e0bb470e693a35d7d6e", - "2924cdc1348942cfb23f28a5383af3e4", - "209ff109c8e142dfba37baea2d3d5de7", - "4203b950e245481590e8105f31301782" - ] - }, - "id": "eGKU1J9Ar0zY", - "outputId": "0a5f7fe5-7b5e-4c11-8a6e-7e85e8478b92" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Loading Bert Tokenizer...\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading: 0%| | 0.00/996k [00:00<?, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "11c285bed74e46a08fbb7bf88715aafa" - } - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading: 0%| | 0.00/29.0 [00:00<?, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "9be44ba364a344f2b6b2546ae9d61ba8" - } - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading: 0%| | 0.00/1.96M [00:00<?, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "aa6a7a9106554f85a91150bd65c271d0" - } - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading: 0%| | 0.00/625 [00:00<?, ?B/s]" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "4c46904f8e944d2b834ba9d384b00a8c" - } - }, - "metadata": {} - } - ], - "source": [ - "if model_name == 'bert-base-multilingual-cased' :\n", - " print('Loading Bert Tokenizer...')\n", - " tokenizer = BertTokenizer.from_pretrained(model_name)\n", - "elif model_name == 'camembert-base':\n", - " print('Loading Camembert Tokenizer...')\n", - " tokenizer = CamembertTokenizer.from_pretrained(model_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-O6NspVTr0zZ" - }, - "outputs": [], - "source": [ - "data_loader = generate_dataloader(tokenizer, data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4lv8lvUar0zZ" - }, - "source": [ - "\n", - "https://discuss.huggingface.co/t/an-efficient-way-of-loading-a-model-that-was-saved-with-torch-save/9814\n", - "\n", - "https://github.com/huggingface/transformers/issues/2094\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CN8EZst-r0zZ" - }, - "outputs": [], - "source": [ - "#model = torch.load(model_path, map_location=torch.device('mps'))\n", - "#model.load_state_dict(torch.load(model_path, map_location=torch.device('mps')))\n", - "\n", - "model = BertForSequenceClassification.from_pretrained(model_path).to(\"cuda\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_fzgS5USJeAF" - }, - "outputs": [], - "source": [ - "pred = predict(model, data_loader, device)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ISkijyclr0za", - "outputId": "8120e858-9950-4380-f887-70ca47360c76" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[4,\n", - " 1,\n", - " 7,\n", - " 16,\n", - " 5,\n", - " 7,\n", - " 7,\n", - " 8,\n", - " 6,\n", - " 6,\n", - " 0,\n", - " 9,\n", - " 7,\n", - " 5,\n", - " 6,\n", - " 3,\n", - " 11,\n", - " 7,\n", - " 11,\n", - " 9,\n", - " 12,\n", - " 5,\n", - " 5,\n", - " 13,\n", - " 9,\n", - " 16,\n", - " 6,\n", - " 5,\n", - " 9,\n", - " 1,\n", - " 7,\n", - " 11,\n", - " 4,\n", - " 5,\n", - " 6,\n", - " 8,\n", - " 14,\n", - " 1,\n", - " 8,\n", - " 13,\n", - " 14,\n", - " 16,\n", - " 16,\n", - " 13,\n", - " 8,\n", - " 8,\n", - " 8,\n", - " 8,\n", - " 6,\n", - " 8,\n", - " 13,\n", - " 10,\n", - " 13,\n", - " 5,\n", - " 5,\n", - " 13,\n", - " 13,\n", - " 2,\n", - " 1,\n", - " 14,\n", - " 4,\n", - " 13,\n", - " 7,\n", - " 0,\n", - " 1,\n", - " 11,\n", - " 12,\n", - " 9,\n", - " 10,\n", - " 7,\n", - " 12,\n", - " 3,\n", - " 9,\n", - " 5,\n", - " 5,\n", - " 13,\n", - " 11,\n", - " 8,\n", - " 7,\n", - " 6,\n", - " 4,\n", - " 7,\n", - " 7,\n", - " 7,\n", - " 11,\n", - " 7,\n", - " 14,\n", - " 6,\n", - " 5,\n", - " 5,\n", - " 5,\n", - " 4,\n", - " 16,\n", - " 2,\n", - " 13,\n", - " 7,\n", - " 14,\n", - " 2,\n", - " 10,\n", - " 7,\n", - " 8,\n", - " 14,\n", - " 5,\n", - " 1,\n", - " 6,\n", - " 16,\n", - " 14,\n", - " 13,\n", - " 6,\n", - " 7,\n", - " 5,\n", - " 5,\n", - " 11,\n", - " 5,\n", - " 0,\n", - " 6,\n", - " 5,\n", - " 13,\n", - " 9,\n", - " 4,\n", - " 8,\n", - " 7,\n", - " 6,\n", - " 5,\n", - " 13,\n", - " 6,\n", - " 5,\n", - " 5,\n", - " 5,\n", - " 7,\n", - " 11,\n", - " 2,\n", - " 7,\n", - " 8,\n", - " 7,\n", - " 13,\n", - " 5,\n", - " 4,\n", - " 8,\n", - " 6,\n", - " 6,\n", - " 5,\n", - " 12,\n", - " 8,\n", - " 7,\n", - " 13,\n", - " 6,\n", - " 7,\n", - " 9,\n", - " 10,\n", - " 13,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 9,\n", - " 9,\n", - " 8,\n", - " 8,\n", - " 6,\n", - " 8,\n", - " 13,\n", - " 14,\n", - " 11,\n", - " 13,\n", - " 6,\n", - " 1,\n", - " 11,\n", - " 1,\n", - " 4,\n", - " 8,\n", - " 6,\n", - " 1,\n", - " 9,\n", - " 2,\n", - " 8,\n", - " 6,\n", - " 5,\n", - " 4,\n", - " 8,\n", - " 7,\n", - " 4,\n", - " 7,\n", - " 14,\n", - " 14,\n", - " 8,\n", - " 7,\n", - " 7,\n", - " 16,\n", - " 6,\n", - " 13,\n", - " 9,\n", - " 9,\n", - " 9,\n", - " 16,\n", - " 6,\n", - " 6,\n", - " 14,\n", - " 6,\n", - " 8,\n", - " 6,\n", - " 14,\n", - " 7,\n", - " 8,\n", - " 5,\n", - " 6,\n", - " 6,\n", - " 14,\n", - " 14,\n", - " 6,\n", - " 0,\n", - " 4,\n", - " 10,\n", - " 6,\n", - " 10,\n", - " 14,\n", - " 8,\n", - " 6,\n", - " 2,\n", - " 3,\n", - " 8,\n", - " 3,\n", - " 2,\n", - " 8,\n", - " 8,\n", - " 13,\n", - " 7,\n", - " 6,\n", - " 7,\n", - " 4,\n", - " 8,\n", - " 8,\n", - " 6,\n", - " 13,\n", - " 11,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 13,\n", - " 5,\n", - " 9,\n", - " 12,\n", - " 11,\n", - " 7,\n", - " 2,\n", - " 11,\n", - " 8,\n", - " 3,\n", - " 3,\n", - " 9,\n", - " 2,\n", - " 8,\n", - " 7,\n", - " 5,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 13,\n", - " 9,\n", - " 6,\n", - " 14,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 8,\n", - " 6,\n", - " 5,\n", - " 6,\n", - " 7,\n", - " 3,\n", - " 8,\n", - " 7,\n", - " 5,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 5,\n", - " 5,\n", - " 8,\n", - " 14,\n", - " 6,\n", - " 6,\n", - " 7,\n", - " 11,\n", - " 8,\n", - " 7,\n", - " 6,\n", - " 1,\n", - " 7,\n", - " 5,\n", - " 2,\n", - " 11,\n", - " 6,\n", - " 11,\n", - " 16,\n", - " 2,\n", - " 7,\n", - " 2,\n", - " 4,\n", - " 2,\n", - " 7,\n", - " 13,\n", - " 6,\n", - " 11,\n", - " 13,\n", - " 13,\n", - " 2,\n", - " 13,\n", - " 11,\n", - " 11,\n", - " 6,\n", - " 11,\n", - " 6,\n", - " 8,\n", - " 8,\n", - " 1,\n", - " 6,\n", - " 9,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 7,\n", - " 0,\n", - " 6,\n", - " 11,\n", - " 6,\n", - " 7,\n", - " 11,\n", - " 6,\n", - " 6,\n", - " 7,\n", - " 12,\n", - " 9,\n", - " 11,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 8,\n", - " 14,\n", - " 6,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 5,\n", - " 7,\n", - " 6,\n", - " 2,\n", - " 7,\n", - " 6,\n", - " 14,\n", - " 5,\n", - " 8,\n", - " 14,\n", - " 8,\n", - " 11,\n", - " 9,\n", - " 9,\n", - " 11,\n", - " 2,\n", - " 14,\n", - " 7,\n", - " 9,\n", - " 7,\n", - " 8,\n", - " 16,\n", - " 11,\n", - " 13,\n", - " 14,\n", - " 5,\n", - " 6,\n", - " 6,\n", - " 14,\n", - " 10,\n", - " 7,\n", - " 7,\n", - " 8,\n", - " 6,\n", - " 2,\n", - " 7,\n", - " 6,\n", - " 13,\n", - " 13,\n", - " 10,\n", - " 6,\n", - " 11,\n", - " 16,\n", - " 6,\n", - " 6,\n", - " 12,\n", - " 2,\n", - " 6,\n", - " 11,\n", - " 13,\n", - " 6,\n", - " 11,\n", - " 2,\n", - " 6,\n", - " 5,\n", - " 13,\n", - " 7,\n", - " 6,\n", - " 11,\n", - " 11,\n", - " 7,\n", - " 6,\n", - " 14,\n", - " 8,\n", - " 8,\n", - " 7,\n", - " 7,\n", - " 7,\n", - " 2,\n", - " 7,\n", - " 7,\n", - " 7,\n", - " 6,\n", - " 7,\n", - " 16,\n", - " 2,\n", - " 2,\n", - " 11,\n", - " 11,\n", - " 10,\n", - " 11,\n", - " 16,\n", - " 3,\n", - " 16,\n", - " 11,\n", - " 7,\n", - " 5,\n", - " 5,\n", - " 3,\n", - " 6,\n", - " 8,\n", - " 1,\n", - " 11,\n", - " 6,\n", - " 13,\n", - " 14,\n", - " 5,\n", - " 5,\n", - " 12,\n", - " 9,\n", - " 14,\n", - " 5,\n", - " 13,\n", - " 6,\n", - " 8,\n", - " 11,\n", - " 14,\n", - " 8,\n", - " 9,\n", - " 7,\n", - " 7,\n", - " 6,\n", - " 3,\n", - " 1,\n", - " 1,\n", - " 6,\n", - " 14,\n", - " 6,\n", - " 5,\n", - " 13,\n", - " 6,\n", - " 8,\n", - " 12,\n", - " 1,\n", - " 6,\n", - " 7,\n", - " 3,\n", - " 7,\n", - " 16,\n", - " 14,\n", - " 3,\n", - " 7,\n", - " 10,\n", - " 5,\n", - " 7,\n", - " 7,\n", - " 7,\n", - " 7,\n", - " 9,\n", - " 7,\n", - " 3,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 5,\n", - " 10,\n", - " 5,\n", - " 7,\n", - " 12,\n", - " 12,\n", - " 6,\n", - " 14,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 8,\n", - " 6,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 1,\n", - " 6,\n", - " 8,\n", - " 7,\n", - " 14,\n", - " 8,\n", - " 7,\n", - " 2,\n", - " 12,\n", - " 7,\n", - " 16,\n", - " 6,\n", - " 10,\n", - " 8,\n", - " 7,\n", - " 14,\n", - " 6,\n", - " 9,\n", - " 1,\n", - " 9,\n", - " 9,\n", - " 16,\n", - " 13,\n", - " 5,\n", - " 7,\n", - " 6,\n", - " 9,\n", - " 7,\n", - " 6,\n", - " 11,\n", - " 8,\n", - " 9,\n", - " 9,\n", - " 5,\n", - " 2,\n", - " 5,\n", - " 5,\n", - " 9,\n", - " 3,\n", - " 0,\n", - " 5,\n", - " 8,\n", - " 7,\n", - " 2,\n", - " 2,\n", - " 7,\n", - " 11,\n", - " 11,\n", - " 13,\n", - " 13,\n", - " 14,\n", - " 3,\n", - " 13,\n", - " 1,\n", - " 6,\n", - " 7,\n", - " 7,\n", - " 14,\n", - " 7,\n", - " 11,\n", - " 8,\n", - " 16,\n", - " 6,\n", - " 6,\n", - " 1,\n", - " 8,\n", - " 13,\n", - " 7,\n", - " 8,\n", - " 4,\n", - " 11,\n", - " 6,\n", - " 7,\n", - " 5,\n", - " 5,\n", - " 5,\n", - " 4,\n", - " 5,\n", - " 6,\n", - " 5,\n", - " 8,\n", - " 2,\n", - " 13,\n", - " 6,\n", - " 13,\n", - " 12,\n", - " 16,\n", - " 8,\n", - " 14,\n", - " 7,\n", - " 3,\n", - " 13,\n", - " 11,\n", - " 6,\n", - " 7,\n", - " 6,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 1,\n", - " 7,\n", - " 11,\n", - " 14,\n", - " 7,\n", - " 11,\n", - " 1,\n", - " 9,\n", - " 0,\n", - " 11,\n", - " 5,\n", - " 1,\n", - " 0,\n", - " 5,\n", - " 12,\n", - " 1,\n", - " 14,\n", - " 12,\n", - " 8,\n", - " 13,\n", - " 13,\n", - " 4,\n", - " 12,\n", - " 3,\n", - " 1,\n", - " 6,\n", - " 7,\n", - " 5,\n", - " 5,\n", - " 5,\n", - " 5,\n", - " 13,\n", - " 5,\n", - " 12,\n", - " 7,\n", - " 8,\n", - " 6,\n", - " 2,\n", - " 5,\n", - " 6,\n", - " 9,\n", - " 13,\n", - " 7,\n", - " 16,\n", - " 6,\n", - " 7,\n", - " 7,\n", - " 4,\n", - " 11,\n", - " 6,\n", - " 12,\n", - " 2,\n", - " 7,\n", - " 6,\n", - " 2,\n", - " 14,\n", - " 7,\n", - " 7,\n", - " 14,\n", - " 13,\n", - " 11,\n", - " 5,\n", - " 6,\n", - " 7,\n", - " 13,\n", - " 7,\n", - " 7,\n", - " 8,\n", - " 13,\n", - " 8,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 8,\n", - " 11,\n", - " 7,\n", - " 6,\n", - " 0,\n", - " 9,\n", - " 7,\n", - " 6,\n", - " 7,\n", - " 7,\n", - " 9,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 1,\n", - " 4,\n", - " 1,\n", - " 7,\n", - " 6,\n", - " 1,\n", - " 8,\n", - " 9,\n", - " 7,\n", - " 5,\n", - " 5,\n", - " 8,\n", - " 7,\n", - " 0,\n", - " 10,\n", - " 9,\n", - " 9,\n", - " 3,\n", - " 6,\n", - " 9,\n", - " 9,\n", - " 1,\n", - " 9,\n", - " 0,\n", - " 2,\n", - " 2,\n", - " 6,\n", - " 3,\n", - " 8,\n", - " 7,\n", - " 7,\n", - " 3,\n", - " 1,\n", - " 1,\n", - " 6,\n", - " 5,\n", - " 6,\n", - " 1,\n", - " 6,\n", - " 11,\n", - " 9,\n", - " 8,\n", - " 7,\n", - " 5,\n", - " 5,\n", - " 5,\n", - " 5,\n", - " 1,\n", - " 13,\n", - " 6,\n", - " 5,\n", - " 6,\n", - " 7,\n", - " 2,\n", - " 6,\n", - " 6,\n", - " 13,\n", - " 1,\n", - " 7,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 7,\n", - " 7,\n", - " 6,\n", - " 13,\n", - " 8,\n", - " 8,\n", - " 1,\n", - " 6,\n", - " 2,\n", - " 3,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 8,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 7,\n", - " 14,\n", - " 6,\n", - " 4,\n", - " 8,\n", - " 11,\n", - " 7,\n", - " 7,\n", - " 6,\n", - " 7,\n", - " 6,\n", - " 3,\n", - " 6,\n", - " 14,\n", - " 6,\n", - " 6,\n", - " 10,\n", - " 1,\n", - " 14,\n", - " 4,\n", - " 11,\n", - " 12,\n", - " 1,\n", - " 6,\n", - " 7,\n", - " 6,\n", - " 9,\n", - " 6,\n", - " 6,\n", - " 8,\n", - " 6,\n", - " 6,\n", - " 8,\n", - " 2,\n", - " 7,\n", - " 6,\n", - " 5,\n", - " 12,\n", - " 7,\n", - " 1,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 1,\n", - " 10,\n", - " 16,\n", - " 5,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 0,\n", - " 12,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 13,\n", - " 6,\n", - " 6,\n", - " 9,\n", - " 3,\n", - " 7,\n", - " 3,\n", - " 13,\n", - " 6,\n", - " 6,\n", - " 7,\n", - " 7,\n", - " 6,\n", - " 8,\n", - " 8,\n", - " 7,\n", - " 7,\n", - " 10,\n", - " 6,\n", - " 16,\n", - " 2,\n", - " 7,\n", - " 6,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 13,\n", - " 6,\n", - " 2,\n", - " 6,\n", - " 5,\n", - " 3,\n", - " 12,\n", - " 6,\n", - " 8,\n", - " 4,\n", - " 6,\n", - " 10,\n", - " 11,\n", - " 11,\n", - " 8,\n", - " 5,\n", - " 1,\n", - " 1,\n", - " 13,\n", - " 5,\n", - " 14,\n", - " 6,\n", - " 12,\n", - " 6,\n", - " 11,\n", - " 12,\n", - " 6,\n", - " 0,\n", - " 0,\n", - " 9,\n", - " 11,\n", - " 1,\n", - " 6,\n", - " 6,\n", - " 3,\n", - " 3,\n", - " 8,\n", - " 6,\n", - " 8,\n", - " 6,\n", - " 12,\n", - " 8,\n", - " 9,\n", - " 6,\n", - " 8,\n", - " 7,\n", - " 8,\n", - " 8,\n", - " 1,\n", - " 9,\n", - " 12,\n", - " 8,\n", - " 6,\n", - " 14,\n", - " 12,\n", - " 0,\n", - " 4,\n", - " 6,\n", - " 6,\n", - " 5,\n", - " 7,\n", - " 3,\n", - " 7,\n", - " 7,\n", - " 6,\n", - " 9,\n", - " 9,\n", - " 9,\n", - " 12,\n", - " 3,\n", - " 6,\n", - " 8,\n", - " 8,\n", - " 8,\n", - " 12,\n", - " 12,\n", - " 9,\n", - " 7,\n", - " 7,\n", - " 8,\n", - " 6,\n", - " 14,\n", - " 3,\n", - " 3,\n", - " 5,\n", - " 9,\n", - " 3,\n", - " 1,\n", - " 7,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 7,\n", - " 7,\n", - " 6,\n", - " 1,\n", - " 6,\n", - " 6,\n", - " 6,\n", - " 1,\n", - " 9,\n", - " 14,\n", - " 9,\n", - " 0,\n", - " 9,\n", - " 1,\n", - " 3,\n", - " 6,\n", - " 7,\n", - " 5,\n", - " 6,\n", - " 8,\n", - " 7,\n", - " 7,\n", - " 4,\n", - " 0,\n", - " 6,\n", - " 6,\n", - " 9,\n", - " 9,\n", - " 9,\n", - " 0,\n", - " 11,\n", - " 8,\n", - " 7,\n", - " 4,\n", - " 5,\n", - " 6,\n", - " 6,\n", - " 8,\n", - " 6,\n", - " 9,\n", - " 10,\n", - " 12,\n", - " 4,\n", - " 14,\n", - " 6,\n", - " 10,\n", - " 5,\n", - " 7,\n", - " 1,\n", - " 1,\n", - " 6,\n", - " 0,\n", - " 6,\n", - " 3,\n", - " 6,\n", - " ...]" - ] - }, - "metadata": {}, - "execution_count": 32 - } - ], - "source": [ - "pred" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "fo6k4li1r0za" - }, - "outputs": [], - "source": [ - "import pickle \n", - "#encoder_filename = \"models/label_encoder.pkl\"\n", - "encoder_filename = \"models/label_encoder_superdomains.pkl\"\n", - "with open(drive_path + encoder_filename, 'rb') as file:\n", - " encoder = pickle.load(file)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UU7qg7zVr0zb" - }, - "outputs": [], - "source": [ - "p2 = list(encoder.inverse_transform(pred))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "w4eHpBztr0zb" - }, - "outputs": [], - "source": [ - "df['superdomainBert'+corpus] = p2" - ] - }, - { - "cell_type": "code", - "source": [ - "df[df.numero == 2835]['content'+corpus].values" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "KsJQMhCBxpSF", - "outputId": "2ffa7475-e6de-4c42-a413-22c0d4b2d45f" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([\"\\nQueue, terme de Chancellerie, ce mot se dit de la\\nmaniere de sceller les lettres. Une lettre est scellée à \\nsimple queue, quand le sceau est attaché à un coin du\\nparchemin de la lettre qu'on a fendu exprès ; & elle\\nest scellée à double queue, quand le sceau est pendant\\nà une bande en double de parchemin passée au-travers de la lettre, comme on fait dans les expéditions\\nimportantes.\\n\",\n", - " \"\\nPiquer, v. act. (Charp. & Maçon.) piquer en Charpenterie, c'est marquer un piece de bois, pour la\\ntailler & la façonner. Piquer en Maçonnerie, c'est\\nrustiquer le parement ou les lits d'une pierre, c'est-à -dire que piquer signifie en fait de moilon le tailler\\ngrossierement ; on emploie le moilon piqué de la sorte\\naux voûtes de caves, aux puits & aux murs de clôture.\\nPiquer signifie aussi faire sur les matériaux destinés à \\nla construction extérieure les bâtimens, les petits\\npoints ou creux nécessaires pour leur servir d'ornement ; \\non pique de cette maniere la pierre de taille,\\n\\nle grès & le moilon particulierement pour l'ordre\\ntoscan. (D. J.)\\n\"],\n", - " dtype=object)" - ] - }, - "metadata": {}, - "execution_count": 34 - } - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 797 - }, - "id": "OCy54lRLr0zb", - "outputId": "a42d8a75-48b9-431a-9b8e-71e4d7018c6b" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " idLGE tomeLGE rankLGE \\\n", - "0 aam-0 1 63 \n", - "1 abaco-0 1 92 \n", - "2 abacot-0 1 96 \n", - "3 abaddon-0 1 104 \n", - "4 abandonnement-0 1 138 \n", - "5 abantes-0 1 143 \n", - "6 abaque-0 1 146 \n", - "7 abaremo-temo-0 1 152 \n", - "8 abares-0 1 153 \n", - "9 abarim-0 1 154 \n", - "\n", - " contentLGE volumeEDdA numeroEDdA \\\n", - "0 AAM. Mesure de capacité pour les liquides en u... 1 31 \n", - "1 ABACO, architecte italien du xvi siècle (V. La... 1 42 \n", - "2 ABACOT. Double couronne que portaient autrefoi... 1 44 \n", - "3 ABADDONou APOLYON le Destructeur. « Elles\\nava... 1 46 \n", - "4 ABANDONNEMENT. I. Droit civil. — Ce mot est un... 1 75 \n", - "5 ABANTES. Peuplade d’origine douteuse que l’on ... 1 81 \n", - "6 ABAQUE. I. Antiquité.— Dans l’antiquité on don... 1 84 \n", - "7 ABAREMO-TEMO(Bot.). Nom sous lequel Pison\\n(Br... 1 90 \n", - "8 ABARES. Nom de deux peuples distincts, habitan... 1 91 \n", - "9 ABARIM. Chaîne de montagnes de la Palestine au... 1 92 \n", - "\n", - " headEDdA authorEDdA normclassEDdA \\\n", - "0 AAM Diderot unclassified \n", - "1 ABACO d'Alembert unclassified \n", - "2 ABACOT Diderot unclassified \n", - "3 ABADDON Diderot unclassified \n", - "4 ABANDONNEMENT Toussaint Droit \n", - "5 ABANTES Diderot unclassified \n", - "6 ABAQUE d'Alembert2 unclassified \n", - "7 ABAREMO-TEMO Diderot unclassified \n", - "8 ABARES Diderot unclassified \n", - "9 ABARIM Diderot unclassified \n", - "\n", - " contentEDdA nbWordsEDdA \\\n", - "0 \\n* AAM, s. mesure des Liquides, en usage à Am... 18 \n", - "1 \\nABACO, s. m. Quelques anciens Auteurs se ser... 26 \n", - "2 \\n* ABACOT, s. m. nom de l'ancienne parure dè\\... 22 \n", - "3 \\n* ABADDON, s. m. vient d'abad, perdre. C'est... 25 \n", - "4 \\nABANDONNEMENT, s. m. en Droit, est le délais... 77 \n", - "5 \\n* ABANTES, s. m. pl. Peuples de Thrace qui p... 26 \n", - "6 \\nABAQUE, s. m. chez les anciens Mathématicien... 52 \n", - "7 \\n* ABAREMO-TEMO, s. m. arbre qui croît, dit-o... 55 \n", - "8 \\n* ABARES, restes de la Nation des Huns qui s... 24 \n", - "9 \\n* ABARIM, montagne de l'Arabie d'où Moyse vi... 23 \n", - "\n", - " superdomainEDdA superdomainBertEDdA superdomainBertLGE \n", - "0 Unclassified Commerce Commerce \n", - "1 Unclassified Physique Beaux-arts \n", - "2 Unclassified Histoire Histoire \n", - "3 Unclassified Histoire Religion \n", - "4 Droit Jurisprudence Droit Jurisprudence Droit Jurisprudence \n", - "5 Unclassified Histoire Histoire \n", - "6 Unclassified Physique Histoire \n", - "7 Unclassified Histoire naturelle Histoire naturelle \n", - "8 Unclassified Histoire Géographie \n", - "9 Unclassified Géographie Géographie " - ], - "text/html": [ - "\n", - " <div id=\"df-825c5672-f5f9-49ed-95eb-fdcae67ba1f1\">\n", - " <div class=\"colab-df-container\">\n", - " <div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>idLGE</th>\n", - " <th>tomeLGE</th>\n", - " <th>rankLGE</th>\n", - " <th>contentLGE</th>\n", - " <th>volumeEDdA</th>\n", - " <th>numeroEDdA</th>\n", - " <th>headEDdA</th>\n", - " <th>authorEDdA</th>\n", - " <th>normclassEDdA</th>\n", - " <th>contentEDdA</th>\n", - " <th>nbWordsEDdA</th>\n", - " <th>superdomainEDdA</th>\n", - " <th>superdomainBertEDdA</th>\n", - " <th>superdomainBertLGE</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>aam-0</td>\n", - " <td>1</td>\n", - " <td>63</td>\n", - " <td>AAM. Mesure de capacité pour les liquides en u...</td>\n", - " <td>1</td>\n", - " <td>31</td>\n", - " <td>AAM</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* AAM, s. mesure des Liquides, en usage à Am...</td>\n", - " <td>18</td>\n", - " <td>Unclassified</td>\n", - " <td>Commerce</td>\n", - " <td>Commerce</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>abaco-0</td>\n", - " <td>1</td>\n", - " <td>92</td>\n", - " <td>ABACO, architecte italien du xvi siècle (V. La...</td>\n", - " <td>1</td>\n", - " <td>42</td>\n", - " <td>ABACO</td>\n", - " <td>d'Alembert</td>\n", - " <td>unclassified</td>\n", - " <td>\\nABACO, s. m. Quelques anciens Auteurs se ser...</td>\n", - " <td>26</td>\n", - " <td>Unclassified</td>\n", - " <td>Physique</td>\n", - " <td>Beaux-arts</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>abacot-0</td>\n", - " <td>1</td>\n", - " <td>96</td>\n", - " <td>ABACOT. Double couronne que portaient autrefoi...</td>\n", - " <td>1</td>\n", - " <td>44</td>\n", - " <td>ABACOT</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* ABACOT, s. m. nom de l'ancienne parure dè\\...</td>\n", - " <td>22</td>\n", - " <td>Unclassified</td>\n", - " <td>Histoire</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>abaddon-0</td>\n", - " <td>1</td>\n", - " <td>104</td>\n", - " <td>ABADDONou APOLYON le Destructeur. « Elles\\nava...</td>\n", - " <td>1</td>\n", - " <td>46</td>\n", - " <td>ABADDON</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* ABADDON, s. m. vient d'abad, perdre. C'est...</td>\n", - " <td>25</td>\n", - " <td>Unclassified</td>\n", - " <td>Histoire</td>\n", - " <td>Religion</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>abandonnement-0</td>\n", - " <td>1</td>\n", - " <td>138</td>\n", - " <td>ABANDONNEMENT. I. Droit civil. — Ce mot est un...</td>\n", - " <td>1</td>\n", - " <td>75</td>\n", - " <td>ABANDONNEMENT</td>\n", - " <td>Toussaint</td>\n", - " <td>Droit</td>\n", - " <td>\\nABANDONNEMENT, s. m. en Droit, est le délais...</td>\n", - " <td>77</td>\n", - " <td>Droit Jurisprudence</td>\n", - " <td>Droit Jurisprudence</td>\n", - " <td>Droit Jurisprudence</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>abantes-0</td>\n", - " <td>1</td>\n", - " <td>143</td>\n", - " <td>ABANTES. Peuplade d’origine douteuse que l’on ...</td>\n", - " <td>1</td>\n", - " <td>81</td>\n", - " <td>ABANTES</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* ABANTES, s. m. pl. Peuples de Thrace qui p...</td>\n", - " <td>26</td>\n", - " <td>Unclassified</td>\n", - " <td>Histoire</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>abaque-0</td>\n", - " <td>1</td>\n", - " <td>146</td>\n", - " <td>ABAQUE. I. Antiquité.— Dans l’antiquité on don...</td>\n", - " <td>1</td>\n", - " <td>84</td>\n", - " <td>ABAQUE</td>\n", - " <td>d'Alembert2</td>\n", - " <td>unclassified</td>\n", - " <td>\\nABAQUE, s. m. chez les anciens Mathématicien...</td>\n", - " <td>52</td>\n", - " <td>Unclassified</td>\n", - " <td>Physique</td>\n", - " <td>Histoire</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>abaremo-temo-0</td>\n", - " <td>1</td>\n", - " <td>152</td>\n", - " <td>ABAREMO-TEMO(Bot.). Nom sous lequel Pison\\n(Br...</td>\n", - " <td>1</td>\n", - " <td>90</td>\n", - " <td>ABAREMO-TEMO</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* ABAREMO-TEMO, s. m. arbre qui croît, dit-o...</td>\n", - " <td>55</td>\n", - " <td>Unclassified</td>\n", - " <td>Histoire naturelle</td>\n", - " <td>Histoire naturelle</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>abares-0</td>\n", - " <td>1</td>\n", - " <td>153</td>\n", - " <td>ABARES. Nom de deux peuples distincts, habitan...</td>\n", - " <td>1</td>\n", - " <td>91</td>\n", - " <td>ABARES</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* ABARES, restes de la Nation des Huns qui s...</td>\n", - " <td>24</td>\n", - " <td>Unclassified</td>\n", - " <td>Histoire</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>abarim-0</td>\n", - " <td>1</td>\n", - " <td>154</td>\n", - " <td>ABARIM. Chaîne de montagnes de la Palestine au...</td>\n", - " <td>1</td>\n", - " <td>92</td>\n", - " <td>ABARIM</td>\n", - " <td>Diderot</td>\n", - " <td>unclassified</td>\n", - " <td>\\n* ABARIM, montagne de l'Arabie d'où Moyse vi...</td>\n", - " <td>23</td>\n", - " <td>Unclassified</td>\n", - " <td>Géographie</td>\n", - " <td>Géographie</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>\n", - " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-825c5672-f5f9-49ed-95eb-fdcae67ba1f1')\"\n", - " title=\"Convert this dataframe to an interactive table.\"\n", - " style=\"display:none;\">\n", - " \n", - " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", - " width=\"24px\">\n", - " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", - " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", - " </svg>\n", - " </button>\n", - " \n", - " <style>\n", - " .colab-df-container {\n", - " display:flex;\n", - " flex-wrap:wrap;\n", - " gap: 12px;\n", - " }\n", - "\n", - " .colab-df-convert {\n", - " background-color: #E8F0FE;\n", - " border: none;\n", - " border-radius: 50%;\n", - " cursor: pointer;\n", - " display: none;\n", - " fill: #1967D2;\n", - " height: 32px;\n", - " padding: 0 0 0 0;\n", - " width: 32px;\n", - " }\n", - "\n", - " .colab-df-convert:hover {\n", - " background-color: #E2EBFA;\n", - " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", - " fill: #174EA6;\n", - " }\n", - "\n", - " [theme=dark] .colab-df-convert {\n", - " background-color: #3B4455;\n", - " fill: #D2E3FC;\n", - " }\n", - "\n", - " [theme=dark] .colab-df-convert:hover {\n", - " background-color: #434B5C;\n", - " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", - " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", - " fill: #FFFFFF;\n", - " }\n", - " </style>\n", - "\n", - " <script>\n", - " const buttonEl =\n", - " document.querySelector('#df-825c5672-f5f9-49ed-95eb-fdcae67ba1f1 button.colab-df-convert');\n", - " buttonEl.style.display =\n", - " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", - "\n", - " async function convertToInteractive(key) {\n", - " const element = document.querySelector('#df-825c5672-f5f9-49ed-95eb-fdcae67ba1f1');\n", - " const dataTable =\n", - " await google.colab.kernel.invokeFunction('convertToInteractive',\n", - " [key], {});\n", - " if (!dataTable) return;\n", - "\n", - " const docLinkHtml = 'Like what you see? Visit the ' +\n", - " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", - " + ' to learn more about interactive tables.';\n", - " element.innerHTML = '';\n", - " dataTable['output_type'] = 'display_data';\n", - " await google.colab.output.renderOutput(dataTable, element);\n", - " const docLink = document.createElement('div');\n", - " docLink.innerHTML = docLinkHtml;\n", - " element.appendChild(docLink);\n", - " }\n", - " </script>\n", - " </div>\n", - " </div>\n", - " " - ] - }, - "metadata": {}, - "execution_count": 36 - } - ], - "source": [ - "df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "J9rObbvVr0zc" - }, - "outputs": [], - "source": [ - "df.to_csv(drive_path + \"/predictions/predictions_parallel_superdomain.tsv\", sep=\"\\t\")" - ] - }, - { - "cell_type": "code", - "source": [ - "df.drop(columns=['contentLGE', 'contentEDdA'], inplace=True)" - ], - "metadata": { - "id": "8cX6XBq8_F5T" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "df.to_csv(drive_path + \"/predictions/metadata_parallel_predictions_superdomain.csv\", sep=\",\", index=False)" - ], - "metadata": { - "id": "7fx6BPpg0iNc" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "7TD1mbKj_fXH" - }, - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "machine_shape": "hm", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3.9.13 ('geode-classification-py39')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "vscode": { - "interpreter": { - "hash": "16fac9c2d845f8e1f8c6fffffe3d3a0def61c7e42da17a08d00f279ad4dea797" - } - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "11c285bed74e46a08fbb7bf88715aafa": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3fde7318ebc3458cb64f8927fdcbaee3", - "IPY_MODEL_8d57eb44d9394604981a8f8f97f48b7c", - "IPY_MODEL_1cb6ed877c2b455b9463b12c2da877d8" - ], - "layout": "IPY_MODEL_5e03651dca944a5f91b675c503feeeac" - } - }, - "3fde7318ebc3458cb64f8927fdcbaee3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0521c3cc6abd44ae989ac0701100045d", - "placeholder": "​", - "style": "IPY_MODEL_d12a8ef069af4d79870bd783f2343184", - "value": "Downloading: 100%" - } - }, - "8d57eb44d9394604981a8f8f97f48b7c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_28d38094dcd54d6694e2efad7fea6abb", - "max": 995526, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6f80ea06220b4a498e6169e55cd8800f", - "value": 995526 - } - }, - "1cb6ed877c2b455b9463b12c2da877d8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3de8b4b0d6494c058589c535dc24dc3e", - "placeholder": "​", - "style": "IPY_MODEL_e0df5e2d4ebd4eb3b126c16dadb2ba62", - "value": " 996k/996k [00:00<00:00, 2.00MB/s]" - } - }, - "5e03651dca944a5f91b675c503feeeac": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0521c3cc6abd44ae989ac0701100045d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d12a8ef069af4d79870bd783f2343184": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "28d38094dcd54d6694e2efad7fea6abb": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6f80ea06220b4a498e6169e55cd8800f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "3de8b4b0d6494c058589c535dc24dc3e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e0df5e2d4ebd4eb3b126c16dadb2ba62": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9be44ba364a344f2b6b2546ae9d61ba8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_fe472df31774495c83aa159e116ba2ee", - "IPY_MODEL_0180ffc200e8466191a11a723c82e43f", - "IPY_MODEL_a07ac2935a3f4d84971ae9147a854969" - ], - "layout": "IPY_MODEL_af4ae976808042bf929ab17df10530b2" - } - }, - "fe472df31774495c83aa159e116ba2ee": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b2277b3d600c43f999b3a07215ac2e13", - "placeholder": "​", - "style": "IPY_MODEL_ebe5e6f8af1e4e04a8a2b5939ac09039", - "value": "Downloading: 100%" - } - }, - "0180ffc200e8466191a11a723c82e43f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c4ea841cb43747cdbce35f8f9c711cde", - "max": 29, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2d937fce2e6c4b69816352bd264ded41", - "value": 29 - } - }, - "a07ac2935a3f4d84971ae9147a854969": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_64b57e3be2c743b3b0e58d338243c656", - "placeholder": "​", - "style": "IPY_MODEL_6ca9688ac7fa4e638994b91242c0ac87", - "value": " 29.0/29.0 [00:00<00:00, 1.88kB/s]" - } - }, - "af4ae976808042bf929ab17df10530b2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b2277b3d600c43f999b3a07215ac2e13": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ebe5e6f8af1e4e04a8a2b5939ac09039": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "c4ea841cb43747cdbce35f8f9c711cde": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2d937fce2e6c4b69816352bd264ded41": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "64b57e3be2c743b3b0e58d338243c656": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6ca9688ac7fa4e638994b91242c0ac87": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "aa6a7a9106554f85a91150bd65c271d0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ea3f471546734f5994edfdc214319368", - "IPY_MODEL_04a86b4164fa49de8fd47d4d373e1d81", - "IPY_MODEL_be067a8a406f41779e42bd35abcbfcf0" - ], - "layout": "IPY_MODEL_7df91507e47d4a6992464293ce002a29" - } - }, - "ea3f471546734f5994edfdc214319368": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ecef81814a7c4481aa49eb73807bfe4d", - "placeholder": "​", - "style": "IPY_MODEL_2b9b4eac7994405ca9bce38332df2629", - "value": "Downloading: 100%" - } - }, - "04a86b4164fa49de8fd47d4d373e1d81": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4edc5b66f0eb44a0b05876fda90f0d1b", - "max": 1961828, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_5285a390fb42415289d89585e04c8994", - "value": 1961828 - } - }, - "be067a8a406f41779e42bd35abcbfcf0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_53643db8401846f2af6f15f5cd0c9998", - "placeholder": "​", - "style": "IPY_MODEL_bc4825e1a43f4a20b496d82ea3687e6f", - "value": " 1.96M/1.96M [00:00<00:00, 2.16MB/s]" - } - }, - "7df91507e47d4a6992464293ce002a29": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ecef81814a7c4481aa49eb73807bfe4d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2b9b4eac7994405ca9bce38332df2629": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4edc5b66f0eb44a0b05876fda90f0d1b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5285a390fb42415289d89585e04c8994": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "53643db8401846f2af6f15f5cd0c9998": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bc4825e1a43f4a20b496d82ea3687e6f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4c46904f8e944d2b834ba9d384b00a8c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ef37bbf1f34e4765b1803a607716d0d1", - "IPY_MODEL_c2d6041cd6674043953e094791ab9659", - "IPY_MODEL_e4c43817f44743388e6fd98b8dbb2eda" - ], - "layout": "IPY_MODEL_39636049d60a4bb4bde7d0ef1af25d78" - } - }, - "ef37bbf1f34e4765b1803a607716d0d1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c3e73d423c2c41c0a942331070fda723", - "placeholder": "​", - "style": "IPY_MODEL_087ebcb093bb41c28485bdc762fb5da6", - "value": "Downloading: 100%" - } - }, - "c2d6041cd6674043953e094791ab9659": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_de270f0aa8194e0bb470e693a35d7d6e", - "max": 625, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2924cdc1348942cfb23f28a5383af3e4", - "value": 625 - } - }, - "e4c43817f44743388e6fd98b8dbb2eda": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_209ff109c8e142dfba37baea2d3d5de7", - "placeholder": "​", - "style": "IPY_MODEL_4203b950e245481590e8105f31301782", - "value": " 625/625 [00:00<00:00, 35.2kB/s]" - } - }, - "39636049d60a4bb4bde7d0ef1af25d78": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c3e73d423c2c41c0a942331070fda723": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "087ebcb093bb41c28485bdc762fb5da6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "de270f0aa8194e0bb470e693a35d7d6e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2924cdc1348942cfb23f28a5383af3e4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "209ff109c8e142dfba37baea2d3d5de7": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4203b950e245481590e8105f31301782": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file -- GitLab