Predict_LGE.ipynb

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# BERT Predict classification\n",
        "\n",
        "## 1. Setup the environment\n",
        "\n",
        "### 1.1 Setup colab environment\n",
        "\n",
        "#### 1.1.1 Install packages"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pwmZ5bBvgGNh",
        "outputId": "fce0a8bf-1779-4079-c7ac-200ebb2678c5"
      },
      "outputs": [],
      "source": [
        "!pip install transformers==4.10.3\n",
        "!pip install sentencepiece"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### 1.1.2 Use more RAM"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WF0qFN_g3ekz",
        "outputId": "f3a5f049-24ee-418f-fe5e-84c633234ad8"
      },
      "outputs": [],
      "source": [
        "from psutil import virtual_memory\n",
        "ram_gb = virtual_memory().total / 1e9\n",
        "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
        "\n",
        "if ram_gb < 20:\n",
        "  print('Not using a high-RAM runtime')\n",
        "else:\n",
        "  print('You are using a high-RAM runtime!')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### 1.1.3 Mount GoogleDrive"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "vL0S-s9Uofvn",
        "outputId": "4b7efa4d-7f09-4c8e-bc98-99e6099ede32"
      },
      "outputs": [],
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "wSqbrupGMc1M"
      },
      "source": [
        "### 1.2 Import librairies"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "SkErnwgMMbRj"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "import pandas as pd \n",
        "import numpy as np\n",
        "import pickle \n",
        "import torch\n",
        "from tqdm import tqdm\n",
        "\n",
        "from transformers import BertTokenizer, BertForSequenceClassification, CamembertTokenizer, CamembertForSequenceClassification\n",
        "from torch.utils.data import TensorDataset, DataLoader, SequentialSampler"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "8hzEGHl7gmzk"
      },
      "source": [
        "### 1.3 Setup GPU"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dPOU-Efhf4ui",
        "outputId": "121dd21e-f98c-483d-d6d1-2838f732a4e2"
      },
      "outputs": [],
      "source": [
        "  # If there's a GPU available...\n",
        "if torch.cuda.is_available():    \n",
        "    # Tell PyTorch to use the GPU.    \n",
        "    device = torch.device(\"cuda\")\n",
        "    gpu_name = \"cuda\"\n",
        "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
        "    print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
        "# for MacOS\n",
        "elif torch.backends.mps.is_available() and torch.backends.mps.is_built():\n",
        "    device = torch.device(\"mps\")\n",
        "    gpu_name = \"mps\"\n",
        "    print('We will use the GPU')\n",
        "else:\n",
        "    device = torch.device(\"cpu\")\n",
        "    gpu_name = \"cpu\"\n",
        "    print('No GPU available, using the CPU instead.')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 2. Utils"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def generate_dataloader(tokenizer, sentences, batch_size = 8, max_len = 512):\n",
        "\n",
        "    # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
        "    input_ids_test = []\n",
        "    # For every sentence...\n",
        "    for sent in sentences:\n",
        "        # `encode` will:\n",
        "        #   (1) Tokenize the sentence.\n",
        "        #   (2) Prepend the `[CLS]` token to the start.\n",
        "        #   (3) Append the `[SEP]` token to the end.\n",
        "        #   (4) Map tokens to their IDs.\n",
        "        encoded_sent = tokenizer.encode(\n",
        "                            sent,                      # Sentence to encode.\n",
        "                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
        "                            # This function also supports truncation and conversion\n",
        "                            # to pytorch tensors, but I need to do padding, so I\n",
        "                            # can't use these features.\n",
        "                            #max_length = max_len,          # Truncate all sentences.\n",
        "                            #return_tensors = 'pt',     # Return pytorch tensors.\n",
        "                    )\n",
        "        input_ids_test.append(encoded_sent)\n",
        "\n",
        "    # Pad our input tokens\n",
        "    padded_test = []\n",
        "    for i in input_ids_test:\n",
        "        if len(i) > max_len:\n",
        "            padded_test.extend([i[:max_len]])\n",
        "        else:\n",
        "            padded_test.extend([i + [0] * (max_len - len(i))])\n",
        "    input_ids_test = np.array(padded_test)\n",
        "\n",
        "    # Create attention masks\n",
        "    attention_masks = []\n",
        "\n",
        "    # Create a mask of 1s for each token followed by 0s for padding\n",
        "    for seq in input_ids_test:\n",
        "        seq_mask = [float(i>0) for i in seq]\n",
        "        attention_masks.append(seq_mask)\n",
        "\n",
        "    # Convert to tensors.\n",
        "    inputs = torch.tensor(input_ids_test)\n",
        "    masks = torch.tensor(attention_masks)\n",
        "    #set batch size\n",
        "\n",
        "    # Create the DataLoader.\n",
        "    data = TensorDataset(inputs, masks)\n",
        "    prediction_sampler = SequentialSampler(data)\n",
        "\n",
        "    return DataLoader(data, sampler=prediction_sampler, batch_size=batch_size)\n",
        "\n",
        "\n",
        "def predict(model, dataloader, device):\n",
        "\n",
        "    # Put model in evaluation mode\n",
        "    model.eval()\n",
        "\n",
        "    # Tracking variables\n",
        "    predictions_test , true_labels = [], []\n",
        "    pred_labels_ = []\n",
        "    # Predict\n",
        "    for batch in dataloader:\n",
        "    # Add batch to GPU\n",
        "        batch = tuple(t.to(device) for t in batch)\n",
        "\n",
        "        # Unpack the inputs from the dataloader\n",
        "        b_input_ids, b_input_mask = batch\n",
        "\n",
        "        # Telling the model not to compute or store gradients, saving memory and\n",
        "        # speeding up prediction\n",
        "        with torch.no_grad():\n",
        "            # Forward pass, calculate logit predictions\n",
        "            outputs = model(b_input_ids, token_type_ids=None,\n",
        "                            attention_mask=b_input_mask)\n",
        "        logits = outputs[0]\n",
        "        #print(logits)\n",
        "\n",
        "        # Move logits and labels to CPU ???\n",
        "        logits = logits.detach().cpu().numpy()\n",
        "        #print(logits)\n",
        "\n",
        "        # Store predictions and true labels\n",
        "        predictions_test.append(logits)\n",
        "\n",
        "        pred_labels = []\n",
        "        \n",
        "        for i in range(len(predictions_test)):\n",
        "            # The predictions for this batch are a 2-column ndarray (one column for \"0\"\n",
        "            # and one column for \"1\"). Pick the label with the highest value and turn this\n",
        "            # in to a list of 0s and 1s.\n",
        "            pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()\n",
        "            pred_labels.append(pred_labels_i)\n",
        "\n",
        "    pred_labels_ += [item for sublist in pred_labels for item in sublist]\n",
        "    return pred_labels_\n",
        "\n",
        "\n",
        "def text_folder_to_dataframe(path):\n",
        "\n",
        "  data = []\n",
        "  # id,tome,filename,nb_words,content,domain\n",
        "\n",
        "  for tome in sorted(os.listdir(path)):\n",
        "    try:\n",
        "        for article in tqdm(sorted(os.listdir(path + \"/\" + tome))):\n",
        "            filename = article[:-4]\n",
        "            id = tome + filename\n",
        "\n",
        "            if article[-4:] == \".txt\":\n",
        "                with open(path + \"/\" + tome + \"/\" + article) as f:\n",
        "                    content = f.read()\n",
        "\n",
        "                    data.append([id, tome, filename, content, len(content.split(' '))])\n",
        "    except NotADirectoryError:\n",
        "        pass\n",
        "  return pd.DataFrame(data, columns=['id', 'tome', 'filename', 'content', 'nb_words'])\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "c5QKcXulhNJ-"
      },
      "source": [
        "## 3. Load Data"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### 3.1 LGE (Nakala)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "!wget https://api.nakala.fr/data/10.34847/nkl.74eb1xfd/e522413b58b04ab7c283f8fa68642e9cb69ab5c5"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "!unzip e522413b58b04ab7c283f8fa68642e9cb69ab5c5"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "input_path = \"/Users/lmoncla/Documents/Data/Corpus/LGE/Text\"\n",
        "#input_path = \"./Text\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "df_LGE = text_folder_to_dataframe(input_path)\n",
        "#df_LGE = pd.read_csv(path + \"data/LGE_withContent.tsv\", sep=\"\\t\")\n",
        "data_LGE = df_LGE[\"content\"].values"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "df_LGE.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "df_LGE.shape"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 3. Load model and predict\n",
        "\n",
        "### 3.1 BERT / CamemBERT"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#path = \"drive/MyDrive/Classification-EDdA/\"\n",
        "path = \"../\"\n",
        "model_name = \"bert-base-multilingual-cased\"\n",
        "model_path = path + \"models/model_\" + model_name + \"_s10000.pt\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print('Loading Bert Tokenizer...')\n",
        "tokenizer = BertTokenizer.from_pretrained(model_name)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "data_loader = generate_dataloader(tokenizer, data_LGE)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "model = BertForSequenceClassification.from_pretrained(model_path).to(gpu_name) #.to(\"cuda\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_fzgS5USJeAF",
        "outputId": "be4a5506-76ed-4eef-bb3c-fe2bb77c6e4d"
      },
      "outputs": [],
      "source": [
        "pred = predict(model, data_loader, device)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "encoder_filename = \"models/label_encoder.pkl\"\n",
        "with open(path + encoder_filename, 'rb') as file:\n",
        "      encoder = pickle.load(file)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "p2 = list(encoder.inverse_transform(pred))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "df_LGE['domain'] = p2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "df_LGE.head(50)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "filepath = path + \"results_LGE/LGE-metadata-withContent.csv\"\n",
        "df_LGE.to_csv(filepath, sep=\"\\,\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "df_LGE.drop(columns=['content'], inplace=True)\n",
        "filepath = path + \"results_LGE/LGE-metadata.csv\"\n",
        "df_LGE.to_csv(filepath, sep=\"\\,\")"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "collapsed_sections": [],
      "machine_shape": "hm",
      "name": "EDdA-Classification_BertFineTuning.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3.9.13 ('geode-classification-py39')",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.13"
    },
    "vscode": {
      "interpreter": {
        "hash": "16fac9c2d845f8e1f8c6fffffe3d3a0def61c7e42da17a08d00f279ad4dea797"
      }
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "0279837673b446b09aac18346213eb7e": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_69004a5069094f8c9d59d5136f627bef",
              "IPY_MODEL_e96a95317b0945c58c8ff0e944c7593e",
              "IPY_MODEL_68b69c9d3a274900bc2892848f725cb0"
            ],
            "layout": "IPY_MODEL_09b5f0bbd5c14bc289b0f92a22bb29ab"
          }
        },
        "0779c8ea0ed24e64a800ae5dff6bc8ce": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8d24b669a39b4876ac0a014dff678db1",
            "max": 810912,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_99b785ea53744868b8b11e5e94936fcc",
            "value": 810912
          }
        },
        "09b5f0bbd5c14bc289b0f92a22bb29ab": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "12afa6b6474b401f9ff3f189cc0d3d11": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "152a31110bf9477989833eac91794688": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_826bd7d0a1f146ea9f7d53584468190c",
            "max": 445032417,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_e86a1d4d268c4314897b58f7bba5ec25",
            "value": 445032417
          }
        },
        "1bf6a76237454349aafc1e9284376879": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "274e505b5f354efc8de3ef26cc43e617": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ad5e0e1439a94578a31b80c90dbf3247",
              "IPY_MODEL_0779c8ea0ed24e64a800ae5dff6bc8ce",
              "IPY_MODEL_7870340ac12b469c8ac19de3a47b6e67"
            ],
            "layout": "IPY_MODEL_f1f9d5b32f60473b86ae6b340d6c0850"
          }
        },
        "2c44d9c11e704b70aa32904a23d1790c": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2cf386a8d14d43389374f79bfa922675": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2d1d632da0f740c38512c9ad779d3173": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "3592b1ed1d6d452b93c57b304943a1cb": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "4a23110523184d019a77368116f738f3": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "500826e3813b414a820aa260bfde9e23": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5032547e748f45a3b0cdd12fafe1dd05": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "58b4f9e0366f4d4eba7f902af84b8965": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_12afa6b6474b401f9ff3f189cc0d3d11",
            "placeholder": "",
            "style": "IPY_MODEL_c4d981755d1d42b6940396b77bc251bc",
            "value": "Downloading: 100%"
          }
        },
        "5978954f56fb40928b970f32d1634aaf": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",