{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "4YCMlsNwOWs0" }, "source": [ "# BERT fine-tuning for EDdA classification" ] }, { "cell_type": "markdown", "metadata": { "id": "6xdYI9moOQSv" }, "source": [ "## Setup colab environment" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "WF0qFN_g3ekz", "outputId": "f3a5f049-24ee-418f-fe5e-84c633234ad8" }, "outputs": [], "source": [ "from psutil import virtual_memory\n", "ram_gb = virtual_memory().total / 1e9\n", "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n", "\n", "if ram_gb < 20:\n", " print('Not using a high-RAM runtime')\n", "else:\n", " print('You are using a high-RAM runtime!')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vL0S-s9Uofvn", "outputId": "4b7efa4d-7f09-4c8e-bc98-99e6099ede32" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "markdown", "metadata": { "id": "8hzEGHl7gmzk" }, "source": [ "## Setup GPU" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "dPOU-Efhf4ui", "outputId": "121dd21e-f98c-483d-d6d1-2838f732a4e2" }, "outputs": [], "source": [ "import torch\n", "\n", "# If there's a GPU available...\n", "if torch.cuda.is_available(): \n", " # Tell PyTorch to use the GPU. \n", " device = torch.device(\"cuda\")\n", " print('There are %d GPU(s) available.' % torch.cuda.device_count())\n", " print('We will use the GPU:', torch.cuda.get_device_name(0))\n", "\n", "# for MacOS\n", "elif torch.backends.mps.is_available() and torch.backends.mps.is_built():\n", " device = torch.device(\"mps\")\n", " print('We will use the GPU')\n", "else:\n", " device = torch.device(\"cpu\")\n", " print('No GPU available, using the CPU instead.')" ] }, { "cell_type": "markdown", "metadata": { "id": "Jr-S9yYIgGkA" }, "source": [ "## Install packages" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pwmZ5bBvgGNh", "outputId": "fce0a8bf-1779-4079-c7ac-200ebb2678c5" }, "outputs": [], "source": [ "!pip install transformers==4.10.3\n", "!pip install sentencepiece" ] }, { "cell_type": "markdown", "metadata": { "id": "wSqbrupGMc1M" }, "source": [ "## Import librairies" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SkErnwgMMbRj" }, "outputs": [], "source": [ "import pandas as pd \n", "import numpy as np\n", "import csv\n", "import os\n", "import pickle\n", "from sklearn import preprocessing\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import *\n", "\n", "from transformers import BertTokenizer, CamembertTokenizer, BertForSequenceClassification, AdamW, BertConfig, CamembertForSequenceClassification\n", "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n", "from transformers import get_linear_schedule_with_warmup\n", "\n", "import time\n", "import datetime\n", "\n", "import random\n", "\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import plot_confusion_matrix\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import classification_report\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": { "id": "12SA-qPFgsVo" }, "source": [ "## Utils functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "WkIVcabUgxIl" }, "outputs": [], "source": [ "def resample_classes(df, classColumnName, numberOfInstances):\n", " #random numberOfInstances elements\n", " replace = False # with replacement\n", " fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n", " return df.groupby(classColumnName, as_index=False).apply(fn)\n", " \n", "# Function to calculate the accuracy of our predictions vs labels\n", "def flat_accuracy(preds, labels):\n", " pred_flat = np.argmax(preds, axis=1).flatten()\n", " labels_flat = labels.flatten()\n", " return np.sum(pred_flat == labels_flat) / len(labels_flat) \n", "\n", "def format_time(elapsed):\n", " '''\n", " Takes a time in seconds and returns a string hh:mm:ss\n", " '''\n", " # Round to the nearest second.\n", " elapsed_rounded = int(round((elapsed)))\n", "\n", " # Format as hh:mm:ss\n", " return str(datetime.timedelta(seconds=elapsed_rounded))" ] }, { "cell_type": "markdown", "metadata": { "id": "c5QKcXulhNJ-" }, "source": [ "## Load Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jdCdUVOTZrqh", "outputId": "ac52be55-ed4b-4c50-dc8c-9124ca6b71e5" }, "outputs": [], "source": [ "!wget https://geode.liris.cnrs.fr/EDdA-Classification/datasets/training_set.tsv\n", "!wget https://geode.liris.cnrs.fr/EDdA-Classification/datasets/test_set.tsv" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!wget https://geode.liris.cnrs.fr/EDdA-Classification/datasets/training_set_superdomains.tsv\n", "!wget https://geode.liris.cnrs.fr/EDdA-Classification/datasets/test_set_superdomains.tsv" ] }, { "cell_type": "markdown", "metadata": { "id": "9d1IxD_bLEvp" }, "source": [ "### Loading dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "7JEnKknRoClH" }, "outputs": [], "source": [ "#train_path = '../data/training_set.tsv'\n", "#test_path = '../data/test_set.tsv'\n", "\n", "train_path = '../data/training_set_superdomains.tsv'\n", "test_path = '../data/test_set_superdomains.tsv'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 496 }, "id": "5u1acjunhoxe", "outputId": "3038048d-6506-473d-85c9-2d3ebdcc6a72" }, "outputs": [], "source": [ "df_train = pd.read_csv(train_path, sep=\"\\t\")\n", "df_train.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zj3JDoJNfx1f", "outputId": "f1ec1fcf-b287-460a-8110-dbb00091c203" }, "outputs": [], "source": [ "print(df_train.shape)" ] }, { "cell_type": "markdown", "metadata": { "id": "dADYGtTcn4AB" }, "source": [ "## Configuration" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "I0OrfFsBn4Io" }, "outputs": [], "source": [ "columnText = 'contentWithoutClass'\n", "#columnClass = 'ensemble_domaine_enccre'\n", "columnClass = 'super_domain'\n", "\n", "maxOfInstancePerClass = 10000\n", "\n", "model_chosen = \"bert\"\n", "#model_chosen = \"camembert\"\n", "\n", "batch_size = 16 # 16 or 32 recommended\n", "max_len = 512\n", "\n", "#path = \"drive/MyDrive/Classification-EDdA/\"\n", "path = \"../models/new/\"\n", "encoder_filename = \"label_encoder.pkl\"" ] }, { "cell_type": "markdown", "metadata": { "id": "t3brU-Yvn4XS" }, "source": [ "## Preprocessing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "aQCLJE4Jtm7v" }, "outputs": [], "source": [ "if maxOfInstancePerClass != 10000:\n", " df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "zrjZvs2dhzAy" }, "outputs": [], "source": [ "labels = df_train[columnClass]\n", "numberOfClasses = labels.nunique()\n", "\n", "\n", "if os.path.isfile(path+encoder_filename): \n", " # load existing encoder \n", " with open(path+encoder_filename, 'rb') as file:\n", " encoder = pickle.load(file)\n", "\n", "else:\n", " encoder = preprocessing.LabelEncoder()\n", " encoder.fit(labels)\n", "\n", " with open(path+encoder_filename, 'wb') as file:\n", " pickle.dump(encoder, file)\n", "\n", "\n", "labels = encoder.transform(labels)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Xt_PhH_6h1_3" }, "outputs": [], "source": [ "sentences_train = df_train[columnText].values\n", "labels_train = labels.tolist()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Dq_KF5WAsbpC", "outputId": "7925ce5a-4b9e-4147-fdc1-f2916d0e6600" }, "outputs": [], "source": [ "sentences_train" ] }, { "cell_type": "markdown", "metadata": { "id": "Gs4Agx_5h43M" }, "source": [ "# Model\n", "## Tokenisation & Input Formatting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YZ5PhEYZiCEA" }, "outputs": [], "source": [ "if model_chosen == \"bert\":\n", " tokeniser_bert = 'bert-base-multilingual-cased'\n", " model_bert = \"bert-base-multilingual-cased\"\n", "elif model_chosen == \"camembert\":\n", " tokeniser_bert = 'camembert-base'\n", " model_bert = 'camembert-base'\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 131, "referenced_widgets": [ "274e505b5f354efc8de3ef26cc43e617", "f1f9d5b32f60473b86ae6b340d6c0850", "ad5e0e1439a94578a31b80c90dbf3247", "0779c8ea0ed24e64a800ae5dff6bc8ce", "7870340ac12b469c8ac19de3a47b6e67", "5f321455342348f49879a9ca8b392077", "9420a47a2bf44ead8cff283f20566cda", "99b785ea53744868b8b11e5e94936fcc", "8d24b669a39b4876ac0a014dff678db1", "2cf386a8d14d43389374f79bfa922675", "2c44d9c11e704b70aa32904a23d1790c", "0279837673b446b09aac18346213eb7e", "09b5f0bbd5c14bc289b0f92a22bb29ab", "69004a5069094f8c9d59d5136f627bef", "e96a95317b0945c58c8ff0e944c7593e", "68b69c9d3a274900bc2892848f725cb0", "76007b17ffd2478fa4a86f959d4f1766", "cb447c62ce1d4c1ea760175ae619fbb9", "d4ad1a78750d49feaea584a82940bb7d", "a9c47cb226ee41e18812f29f690992eb", "c4c1675163bd4997bb44d7ea3967a356", "5032547e748f45a3b0cdd12fafe1dd05", "8f467553598f4dcc9abf55da79c11018", "9d7a8b3ecfe74f66b4238fe085c05906", "58b4f9e0366f4d4eba7f902af84b8965", "9383e09698ae4bd1820a4bca22e78315", "a189838c4de648198b0f4fc99c29ced8", "c4d981755d1d42b6940396b77bc251bc", "12afa6b6474b401f9ff3f189cc0d3d11", "5978954f56fb40928b970f32d1634aaf", "fe0e3b1df104484c98fbdcd31a04e427", "2d1d632da0f740c38512c9ad779d3173", "df95c20399dd4918bc7559a90886d4aa" ] }, "id": "C4bigx_3ibuN", "outputId": "ebcca5ee-85d8-4525-c9ad-9fc3b5c1741d" }, "outputs": [], "source": [ "# Load the BERT tokenizer.\n", "if model_chosen == \"bert\":\n", " print('Loading BERT tokenizer...')\n", " tokenizer = BertTokenizer.from_pretrained(tokeniser_bert)\n", "elif model_chosen == \"camembert\":\n", " print('Loading CamemBERT tokenizer...')\n", " tokenizer = CamembertTokenizer.from_pretrained(tokeniser_bert)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5hNod5X9jDZN", "outputId": "bca0db0e-7463-40cd-8052-1712965c7a95" }, "outputs": [], "source": [ " # Tokenize all of the sentences and map the tokens to thier word IDs.\n", "input_ids_train = []\n", "\n", "# For every sentence...\n", "for sent in sentences_train:\n", " # `encode` will:\n", " # (1) Tokenize the sentence.\n", " # (2) Prepend the `[CLS]` token to the start.\n", " # (3) Append the `[SEP]` token to the end.\n", " # (4) Map tokens to their IDs.\n", " encoded_sent_train = tokenizer.encode(\n", " str(sent), # Sentence to encode.\n", " add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n", "\n", " # This function also supports truncation and conversion\n", " # to pytorch tensors, but I need to do padding, so I\n", " # can't use these features.\n", " #max_length = 128, # Truncate all sentences.\n", " #return_tensors = 'pt', # Return pytorch tensors.\n", " )\n", " \n", " # Add the encoded sentence to the list.\n", " input_ids_train.append(encoded_sent_train)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "W9EWv5JvjGH3", "outputId": "dde87708-7bcb-47c7-af71-2ec2b2e0c2db" }, "outputs": [], "source": [ "print('Max sentence length train: ', max([len(sen) for sen in input_ids_train]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xh1TQJyvjOx5" }, "outputs": [], "source": [ "\n", "padded_train = []\n", "for i in input_ids_train:\n", "\n", " if len(i) > max_len:\n", " padded_train.extend([i[:max_len]])\n", " else:\n", " padded_train.extend([i + [0] * (max_len - len(i))])\n", "\n", "\n", "padded_train = input_ids_train = np.array(padded_train)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ZiwY6gn0jUkD" }, "outputs": [], "source": [ " # Create attention masks\n", "attention_masks_train = []\n", "\n", "# For each sentence...\n", "for sent in padded_train:\n", " \n", " # Create the attention mask.\n", " # - If a token ID is 0, then it's padding, set the mask to 0.\n", " # - If a token ID is > 0, then it's a real token, set the mask to 1.\n", " att_mask = [int(token_id > 0) for token_id in sent]\n", " \n", " # Store the attention mask for this sentence.\n", " attention_masks_train.append(att_mask)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "oBTR5AfAjXJe" }, "outputs": [], "source": [ "# Use 70% for training and 30% for validation.\n", "#train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(padded, labels, \n", "# random_state=2018, test_size=0.3, stratify = labels)\n", "# Do the same for the masks.\n", "#train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,\n", "# random_state=2018, test_size=0.3, stratify = labels)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "b9Mw5kq3jhTb" }, "outputs": [], "source": [ "# Convert all inputs and labels into torch tensors, the required datatype \n", "# for my model.\n", "train_inputs = torch.tensor(padded_train)\n", "\n", "train_labels = torch.tensor(labels_train)\n", "\n", "train_masks = torch.tensor(attention_masks_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "UfFWzbENjnkw" }, "outputs": [], "source": [ "# The DataLoader needs to know the batch size for training, so I specify it here.\n", "# For fine-tuning BERT on a specific task, the authors recommend a batch size of\n", "# 16 or 32.\n", "\n", "# Create the DataLoader for training set.\n", "train_data = TensorDataset(train_inputs, train_masks, train_labels)\n", "train_sampler = RandomSampler(train_data)\n", "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "id": "x45JNGqhkUn2" }, "source": [ "## Training" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "d09d664839d04303b8fef9ef895f6e4f", "500826e3813b414a820aa260bfde9e23", "70dd7428d78c44409308d62ba04917de", "152a31110bf9477989833eac91794688", "fcde5f4cf49846a0ad1b284aad98a38a", "1bf6a76237454349aafc1e9284376879", "4a23110523184d019a77368116f738f3", "e86a1d4d268c4314897b58f7bba5ec25", "826bd7d0a1f146ea9f7d53584468190c", "3592b1ed1d6d452b93c57b304943a1cb", "a159d62667734657a49ba3a96494f137" ] }, "id": "C7M2Er1ajsTf", "outputId": "151034cd-9a77-413e-a61e-561c97b4072e" }, "outputs": [], "source": [ "# Load BertForSequenceClassification, the pretrained BERT model with a single \n", "# linear classification layer on top.\n", "\n", "#model = CamembertForSequenceClassification.from_pretrained(\n", "if model_chosen == \"bert\":\n", " model = BertForSequenceClassification.from_pretrained(\n", " model_bert, # Use the 12-layer BERT model, with an uncased vocab.\n", " num_labels = numberOfClasses, # The number of output labels--2 for binary classification.\n", " # You can increase this for multi-class tasks. \n", " output_attentions = False, # Whether the model returns attentions weights.\n", " output_hidden_states = False, # Whether the model returns all hidden-states.\n", " )\n", "elif model_chosen == \"camembert\":\n", " model = CamembertForSequenceClassification.from_pretrained(\n", " model_bert, # Use the 12-layer BERT model, with an uncased vocab.\n", " num_labels = numberOfClasses, # The number of output labels--2 for binary classification.\n", " # You can increase this for multi-class tasks. \n", " output_attentions = False, # Whether the model returns attentions weights.\n", " output_hidden_states = False, # Whether the model returns all hidden-states.\n", " )\n", "\n", "# Tell pytorch to run this model on the GPU.\n", "#model.cuda()\n", "model.to(\"mps\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xd_cG-8pj4Iw" }, "outputs": [], "source": [ "#Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n", "# I believe the 'W' stands for 'Weight Decay fix\"\n", "optimizer = AdamW(model.parameters(),\n", " lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n", " eps = 1e-8 # args.adam_epsilon - default is 1e-8.\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "65G-uHuLj4_6" }, "outputs": [], "source": [ "# Number of training epochs (authors recommend between 2 and 4)\n", "epochs = 4\n", "\n", "# Total number of training steps is number of batches * number of epochs.\n", "total_steps = len(train_dataloader) * epochs\n", "\n", "# Create the learning rate scheduler.\n", "scheduler = get_linear_schedule_with_warmup(optimizer, \n", " num_warmup_steps = 0, # Default value in run_glue.py\n", " num_training_steps = total_steps)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SbHBbYpwkKaA", "outputId": "4cd1be4a-6014-4804-df56-f38e98039797" }, "outputs": [], "source": [ "# This training code is based on the `run_glue.py` script here:\n", "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n", "\n", "# Set the seed value all over the place to make this reproducible.\n", "seed_val = 42\n", "\n", "random.seed(seed_val)\n", "np.random.seed(seed_val)\n", "torch.manual_seed(seed_val)\n", "torch.cuda.manual_seed_all(seed_val)\n", "\n", "# Store the average loss after each epoch so I can plot them.\n", "loss_values = []\n", "\n", "# For each epoch...\n", "for epoch_i in range(0, epochs):\n", " \n", " # ========================================\n", " # Training\n", " # ========================================\n", " \n", " # Perform one full pass over the training set.\n", "\n", " print(\"\")\n", " print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n", " print('Training...')\n", "\n", " # Measure how long the training epoch takes.\n", " t0 = time.time()\n", "\n", " # Reset the total loss for this epoch.\n", " total_loss = 0\n", "\n", " # Put the model into training mode.\n", " model.train()\n", "\n", " # For each batch of training data...\n", " for step, batch in enumerate(train_dataloader):\n", "\n", " # Progress update every 40 batches.\n", " if step % 5 == 0 and not step == 0:\n", " # Calculate elapsed time in minutes.\n", " elapsed = format_time(time.time() - t0)\n", " \n", " # Report progress.\n", " print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n", "\n", " # Unpack this training batch from the dataloader. \n", " #\n", " # As I unpack the batch, I'll also copy each tensor to the GPU using the \n", " # `to` method.\n", " #\n", " # `batch` contains three pytorch tensors:\n", " # [0]: input ids \n", " # [1]: attention masks\n", " # [2]: labels \n", " b_input_ids = batch[0].to(device)\n", " b_input_mask = batch[1].to(device)\n", " b_labels = batch[2].to(device)\n", "\n", " # Always clear any previously calculated gradients before performing a\n", " # backward pass. PyTorch doesn't do this automatically because \n", " # accumulating the gradients is \"convenient while training RNNs\". \n", " # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n", " model.zero_grad() \n", "\n", " # Perform a forward pass (evaluate the model on this training batch).\n", " # This will return the loss (rather than the model output) because I\n", " # have provided the `labels`.\n", " # The documentation for this `model` function is here: \n", " # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n", " outputs = model(b_input_ids, \n", " token_type_ids=None, \n", " attention_mask=b_input_mask, \n", " labels=b_labels)\n", " \n", " # The call to `model` always returns a tuple, so I need to pull the \n", " # loss value out of the tuple.\n", " loss = outputs[0]\n", "\n", " # Accumulate the training loss over all of the batches so that I can\n", " # calculate the average loss at the end. `loss` is a Tensor containing a\n", " # single value; the `.item()` function just returns the Python value \n", " # from the tensor.\n", " total_loss += loss.item()\n", "\n", " # Perform a backward pass to calculate the gradients.\n", " loss.backward()\n", "\n", " # Clip the norm of the gradients to 1.0.\n", " # This is to help prevent the \"exploding gradients\" problem.\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n", "\n", " # Update parameters and take a step using the computed gradient.\n", " # The optimizer dictates the \"update rule\"--how the parameters are\n", " # modified based on their gradients, the learning rate, etc.\n", " optimizer.step()\n", "\n", " # Update the learning rate.\n", " scheduler.step()\n", "\n", " # Calculate the average loss over the training data.\n", " avg_train_loss = total_loss / len(train_dataloader) \n", " \n", " # Store the loss value for plotting the learning curve.\n", " loss_values.append(avg_train_loss)\n", "\n", " print(\"\")\n", " print(\" Average training loss: {0:.2f}\".format(avg_train_loss))\n", " print(\" Training epoch took: {:}\".format(format_time(time.time() - t0)))\n", " \n", "print(\"\")\n", "print(\"Training complete!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "uEe7lPtVKpIY" }, "source": [ "## Saving model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "AYCSVm_wKnuM" }, "outputs": [], "source": [ "name = model_bert + \"_s\" + str(maxOfInstancePerClass)\n", "model_path = path + \"model_\"+name+\".pt\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "qmsxrOqjCsGo" }, "outputs": [], "source": [ "#torch.save(model, model_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.save_pretrained(model_path)\n", "#ludo: changement de la façon de sauver le modèle" ] }, { "cell_type": "markdown", "metadata": { "id": "pM9bSsckCndR" }, "source": [ "## Loading model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "cEycmiS8Cnjw" }, "outputs": [], "source": [ "#model = torch.load(model_path)\n", "model = BertForSequenceClassification.from_pretrained(model_path).to(\"mps\") #.to(\"cuda\")" ] }, { "cell_type": "markdown", "metadata": { "id": "VJwyfmakkQyj" }, "source": [ "## Evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "K9qdtYexIIvk" }, "outputs": [], "source": [ "def evaluate_bert(data, labels, model, batch_size):\n", " # Tokenize all of the sentences and map the tokens to thier word IDs.\n", " input_ids = []\n", " # For every sentence...\n", " for sent in data:\n", " # `encode` will:\n", " # (1) Tokenize the sentence.\n", " # (2) Prepend the `[CLS]` token to the start.\n", " # (3) Append the `[SEP]` token to the end.\n", " # (4) Map tokens to their IDs.\n", " encoded_sent = tokenizer.encode(\n", " str(sent), # Sentence to encode.\n", " add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n", " )\n", " \n", " input_ids.append(encoded_sent)\n", "\n", " # Pad our input tokens\n", " padded = []\n", " for i in input_ids:\n", "\n", " if len(i) > max_len:\n", " padded.extend([i[:max_len]])\n", " else:\n", " padded.extend([i + [0] * (max_len - len(i))])\n", " input_ids = np.array(padded)\n", "\n", " # Create attention masks\n", " attention_masks = []\n", "\n", " # Create a mask of 1s for each token followed by 0s for padding\n", " for seq in input_ids:\n", " seq_mask = [float(i>0) for i in seq]\n", " attention_masks.append(seq_mask) \n", "\n", " # Convert to tensors.\n", " prediction_inputs = torch.tensor(input_ids)\n", " prediction_masks = torch.tensor(attention_masks)\n", " prediction_labels = torch.tensor(labels)\n", "\n", " # Create the DataLoader.\n", " prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)\n", " prediction_sampler = SequentialSampler(prediction_data)\n", " prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n", "\n", " print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs)))\n", "\n", " # Put model in evaluation mode\n", " model.eval()\n", "\n", " # Tracking variables \n", " predictions , true_labels = [], []\n", "\n", " # Predict \n", " for batch in prediction_dataloader:\n", " # Add batch to GPU\n", " batch = tuple(t.to(device) for t in batch)\n", " \n", " # Unpack the inputs from the dataloader\n", " b_input_ids, b_input_mask, b_labels = batch\n", " \n", " # Telling the model not to compute or store gradients, saving memory and \n", " # speeding up prediction\n", " with torch.no_grad():\n", " # Forward pass, calculate logit predictions\n", " outputs = model(b_input_ids, token_type_ids=None, \n", " attention_mask=b_input_mask)\n", "\n", " logits = outputs[0]\n", " #print(logits)\n", "\n", " # Move logits and labels to CPU\n", " logits = logits.detach().cpu().numpy()\n", " label_ids = b_labels.to('cpu').numpy()\n", " #print(logits)\n", " \n", " # Store predictions and true labels\n", " predictions.append(logits)\n", " true_labels.append(label_ids)\n", "\n", " print(' DONE.')\n", "\n", "\n", " pred_labels = []\n", "\n", " # Evaluate each test batch using many matrics\n", " print('Calculating the matrics for each batch...')\n", "\n", " for i in range(len(true_labels)):\n", " \n", " # The predictions for this batch are a 2-column ndarray (one column for \"0\" \n", " # and one column for \"1\"). Pick the label with the highest value and turn this\n", " # in to a list of 0s and 1s.\n", " pred_labels_i = np.argmax(predictions[i], axis=1).flatten()\n", " pred_labels.append(pred_labels_i)\n", "\n", "\n", " pred_labels_ = [item for sublist in pred_labels for item in sublist]\n", " true_labels_ = [item for sublist in true_labels for item in sublist]\n", "\n", " return pred_labels_, true_labels_" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dPjV_5g8DDQy" }, "outputs": [], "source": [ "dataset = \"test\"\n", "\n", "df_eval = pd.read_csv(dataset+\"_set.tsv\", sep=\"\\t\")\n", "\n", "data_eval = df_eval[columnText].values\n", "\n", "y = df_eval[columnClass]\n", "\n", "\n", "\n", "y = encoder.transform(y)\n", "labels = y.tolist()\n", "\n", "\n", "model_path = path+\"/model_\"+model_bert+\"_s\"+str(maxOfInstancePerClass)+\".pt\"\n", "model = torch.load(model_path)\n", "\n", "if model_bert == \"bert-base-multilingual-cased\":\n", " tokenizer = BertTokenizer.from_pretrained(model_bert)\n", "elif model_bert == \"camembert-base\":\n", " tokenizer = CamembertTokenizer.from_pretrained(model_bert)\n", "\n", "pred_labels_, true_labels_ = evaluate_bert(data_eval, labels, model, batch_size)\n", "\n", "\n", "report = classification_report(true_labels_, pred_labels_, output_dict = True)\n", " \n", "classes = [str(e) for e in encoder.transform(encoder.classes_)]\n", "classesName = encoder.classes_\n", "\n", "precision = []\n", "recall = []\n", "f1 = []\n", "support = []\n", "dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n", "for c in classes:\n", " precision.append(report[c]['precision'])\n", " recall.append(report[c]['recall'])\n", " f1.append(report[c]['f1-score'])\n", " support.append(report[c]['support'])\n", "\n", "accuracy = report['accuracy']\n", "weighted_avg = report['weighted avg']\n", "cnf_matrix = confusion_matrix(true_labels_, pred_labels_)\n", "FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n", "FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n", "TP = np.diag(cnf_matrix)\n", "TN = cnf_matrix.sum() - (FP + FN + TP)\n", "\n", "dff['className'] = classesName\n", "dff['precision'] = precision\n", "dff['recall'] = recall\n", "dff['f1-score'] = f1\n", "dff['support'] = support\n", "dff['FP'] = FP\n", "dff['FN'] = FN\n", "dff['TP'] = TP\n", "dff['TN'] = TN\n", "\n", "print(name)\n", "\n", "name = \"test_\"+ name\n", "content = name + \"\\n\"\n", "print(name)\n", "content += str(weighted_avg) + \"\\n\"\n", "\n", "print(weighted_avg)\n", "print(accuracy)\n", "print(dff)\n", "\n", "dff.to_csv(path+\"/report_\"+name+\".csv\", index=False)\n", "# enregistrer les predictions\n", "pd.DataFrame({'labels': pd.Series(true_labels_), 'predictions': pd.Series(pred_labels_)}).to_csv(path+\"/predictions/predictions_\"+name+\".csv\")\n", "\n", "with open(path+\"reports/report_\"+name+\".txt\", 'w') as f:\n", " f.write(content)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "cVdM4eT6I8g2" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "HzxyFO3knanV" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "KDRPPw4Wnap7" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "DX81R2dcnasF" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "wgfqJFVeJMK1" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "GqEf5_41JMNZ" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "x_n57EvhJMQh" }, "outputs": [], "source": [ "model_path = \"drive/MyDrive/Classification-EDdA/model_bert-base-multilingual-cased_s10000.pt\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "R3_9tA9MI8ju" }, "outputs": [], "source": [ "model = torch.load(model_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_fzgS5USJeAF", "outputId": "be4a5506-76ed-4eef-bb3c-fe2bb77c6e4d" }, "outputs": [], "source": [ "!wget https://projet.liris.cnrs.fr/geode/files/datasets/EDdA/Classification/LGE_withContent.tsv" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "8WEJjQC7I8mP" }, "outputs": [], "source": [ "df_LGE = pd.read_csv(\"LGE_withContent.tsv\", sep=\"\\t\")\n", "data_LGE = df_LGE[\"content\"].values\n", "\n", "\n", "#pred_labels_, true_labels_ = evaluate_bert(data_eval, labels, model, batch_size)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "9qJDTU-6vzkk", "outputId": "1b279f0e-7715-4d23-f524-08e8ba327f6c" }, "outputs": [], "source": [ "df_LGE.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "71-fP61-OOwQ", "outputId": "ef08b49e-0a9f-4653-e303-3163250af35b" }, "outputs": [], "source": [ "df_LGE.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "lFFed2EAI8oq" }, "outputs": [], "source": [ "def generate_prediction_dataloader(chosen_model, sentences_to_predict, batch_size = 8, max_len = 512):\n", "\n", " if chosen_model == 'bert-base-multilingual-cased' :\n", " print('Loading Bert Tokenizer...')\n", " tokenizer = BertTokenizer.from_pretrained(chosen_model)\n", " elif chosen_model == 'camembert-base':\n", " print('Loading Camembert Tokenizer...')\n", " tokenizer = CamembertTokenizer.from_pretrained(chosen_model)\n", "\n", " # Tokenize all of the sentences and map the tokens to thier word IDs.\n", " input_ids_test = []\n", " # For every sentence...\n", " for sent in sentences_to_predict:\n", " # `encode` will:\n", " # (1) Tokenize the sentence.\n", " # (2) Prepend the `[CLS]` token to the start.\n", " # (3) Append the `[SEP]` token to the end.\n", " # (4) Map tokens to their IDs.\n", " encoded_sent = tokenizer.encode(\n", " sent, # Sentence to encode.\n", " add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n", " )\n", "\n", " input_ids_test.append(encoded_sent)\n", "\n", " # Pad our input tokens\n", " padded_test = []\n", " for i in input_ids_test:\n", "\n", " if len(i) > max_len:\n", " padded_test.extend([i[:max_len]])\n", " else:\n", "\n", " padded_test.extend([i + [0] * (max_len - len(i))])\n", " input_ids_test = np.array(padded_test)\n", "\n", " # Create attention masks\n", " attention_masks = []\n", "\n", " # Create a mask of 1s for each token followed by 0s for padding\n", " for seq in input_ids_test:\n", " seq_mask = [float(i>0) for i in seq]\n", " attention_masks.append(seq_mask)\n", "\n", " # Convert to tensors.\n", " prediction_inputs = torch.tensor(input_ids_test)\n", " prediction_masks = torch.tensor(attention_masks)\n", " #set batch size\n", "\n", "\n", " # Create the DataLoader.\n", " prediction_data = TensorDataset(prediction_inputs, prediction_masks)\n", " prediction_sampler = SequentialSampler(prediction_data)\n", " prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)\n", "\n", " return prediction_dataloader\n", "\n", "\n", "\n", "def predict_class_bertFineTuning(model, sentences_to_predict_dataloader):\n", "\n", "\n", " # If there's a GPU available...\n", " if torch.cuda.is_available():\n", "\n", " # Tell PyTorch to use the GPU.\n", " device = torch.device(\"cuda\")\n", "\n", " print('There are %d GPU(s) available.' % torch.cuda.device_count())\n", "\n", " print('We will use the GPU:', torch.cuda.get_device_name(0))\n", "\n", " # If not...\n", " else:\n", " print('No GPU available, using the CPU instead.')\n", " device = torch.device(\"cpu\")\n", "\n", " # Put model in evaluation mode\n", " model.eval()\n", "\n", " # Tracking variables\n", " predictions_test , true_labels = [], []\n", " pred_labels_ = []\n", " # Predict\n", " for batch in sentences_to_predict_dataloader:\n", " # Add batch to GPU\n", " batch = tuple(t.to(device) for t in batch)\n", "\n", " # Unpack the inputs from the dataloader\n", " b_input_ids, b_input_mask = batch\n", "\n", " # Telling the model not to compute or store gradients, saving memory and\n", " # speeding up prediction\n", " with torch.no_grad():\n", " # Forward pass, calculate logit predictions\n", " outputs = model(b_input_ids, token_type_ids=None,\n", " attention_mask=b_input_mask)\n", "\n", " logits = outputs[0]\n", " #print(logits)\n", "\n", " # Move logits and labels to CPU\n", " logits = logits.detach().cpu().numpy()\n", " #print(logits)\n", "\n", " # Store predictions and true labels\n", " predictions_test.append(logits)\n", "\n", " #print(' DONE.')\n", "\n", " pred_labels = []\n", " \n", " for i in range(len(predictions_test)):\n", "\n", " # The predictions for this batch are a 2-column ndarray (one column for \"0\"\n", " # and one column for \"1\"). Pick the label with the highest value and turn this\n", " # in to a list of 0s and 1s.\n", " pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()\n", " pred_labels.append(pred_labels_i)\n", "\n", " pred_labels_ += [item for sublist in pred_labels for item in sublist]\n", " return pred_labels_\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "O9eer_kgI8rC", "outputId": "94ea7418-14a8-4918-e210-caf0018f5989" }, "outputs": [], "source": [ "data_loader = generate_prediction_dataloader('bert-base-multilingual-cased', data_LGE)\n", "#data_loader = generate_prediction_dataloader('camembert-base', data_LGE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sFpAwbrBwF2h", "outputId": "8d210732-619d-41f0-b6e2-ad9d06a85069" }, "outputs": [], "source": [ "p = predict_class_bertFineTuning( model, data_loader )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "51HF6-8UPSTc", "outputId": "26bff792-eb8d-4e1a-efa4-a7a6c9d32bf9" }, "outputs": [], "source": [ "len(p)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "rFFGhaCvQHfh" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qgJ-O4rcQHiI", "outputId": "bfe93dd6-4d89-4d5c-be0d-45e1c98c6b14" }, "outputs": [], "source": [ "# Il faudrait enregistrer l'encoder, \n", "# sinon on est obligé de le refaire à partir du jeu d'entrainement pour récupérer le noms des classes.\n", "encoder" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QuST9wJoQHnS" }, "outputs": [], "source": [ "p2 = list(encoder.inverse_transform(p))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6ek7suq9QHqE", "outputId": "6636983a-7eba-48c8-d884-f8fb437294dc" }, "outputs": [], "source": [ "p2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "XvdDj5PBQHtk" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "t39Xs0j7QHXJ" }, "outputs": [], "source": [ "df_LGE['class_bert'] = p2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "-VZ7geRmQHaD", "outputId": "350a4122-5b1f-43e2-e372-2f628f665c4a" }, "outputs": [], "source": [ "df_LGE.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3xkzdkrKQHwA" }, "outputs": [], "source": [ "df_LGE.to_csv(\"drive/MyDrive/Classification-EDdA/classification_LGE.tsv\", sep=\"\\t\")" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "machine_shape": "hm", "name": "EDdA-Classification_BertFineTuning.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3.9.13 ('geode-classification-py39')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:01:00) \n[Clang 13.0.1 ]" }, "vscode": { "interpreter": { "hash": "16fac9c2d845f8e1f8c6fffffe3d3a0def61c7e42da17a08d00f279ad4dea797" } }, "widgets": { "application/vnd.jupyter.widget-state+json": { "0279837673b446b09aac18346213eb7e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_69004a5069094f8c9d59d5136f627bef", "IPY_MODEL_e96a95317b0945c58c8ff0e944c7593e", "IPY_MODEL_68b69c9d3a274900bc2892848f725cb0" ], "layout": "IPY_MODEL_09b5f0bbd5c14bc289b0f92a22bb29ab" } }, "0779c8ea0ed24e64a800ae5dff6bc8ce": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8d24b669a39b4876ac0a014dff678db1", "max": 810912, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_99b785ea53744868b8b11e5e94936fcc", "value": 810912 } }, "09b5f0bbd5c14bc289b0f92a22bb29ab": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "12afa6b6474b401f9ff3f189cc0d3d11": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "152a31110bf9477989833eac91794688": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_826bd7d0a1f146ea9f7d53584468190c", "max": 445032417, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e86a1d4d268c4314897b58f7bba5ec25", "value": 445032417 } }, "1bf6a76237454349aafc1e9284376879": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "274e505b5f354efc8de3ef26cc43e617": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_ad5e0e1439a94578a31b80c90dbf3247", "IPY_MODEL_0779c8ea0ed24e64a800ae5dff6bc8ce", "IPY_MODEL_7870340ac12b469c8ac19de3a47b6e67" ], "layout": "IPY_MODEL_f1f9d5b32f60473b86ae6b340d6c0850" } }, "2c44d9c11e704b70aa32904a23d1790c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2cf386a8d14d43389374f79bfa922675": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2d1d632da0f740c38512c9ad779d3173": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3592b1ed1d6d452b93c57b304943a1cb": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "4a23110523184d019a77368116f738f3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "500826e3813b414a820aa260bfde9e23": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5032547e748f45a3b0cdd12fafe1dd05": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "58b4f9e0366f4d4eba7f902af84b8965": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_12afa6b6474b401f9ff3f189cc0d3d11", "placeholder": "", "style": "IPY_MODEL_c4d981755d1d42b6940396b77bc251bc", "value": "Downloading: 100%" } }, "5978954f56fb40928b970f32d1634aaf": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "5f321455342348f49879a9ca8b392077": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "68b69c9d3a274900bc2892848f725cb0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5032547e748f45a3b0cdd12fafe1dd05", "placeholder": "", "style": "IPY_MODEL_c4c1675163bd4997bb44d7ea3967a356", "value": " 1.40M/1.40M [00:00<00:00, 6.57MB/s]" } }, "69004a5069094f8c9d59d5136f627bef": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cb447c62ce1d4c1ea760175ae619fbb9", "placeholder": "", "style": "IPY_MODEL_76007b17ffd2478fa4a86f959d4f1766", "value": "Downloading: 100%" } }, "70dd7428d78c44409308d62ba04917de": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4a23110523184d019a77368116f738f3", "placeholder": "", "style": "IPY_MODEL_1bf6a76237454349aafc1e9284376879", "value": "Downloading: 100%" } }, "76007b17ffd2478fa4a86f959d4f1766": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7870340ac12b469c8ac19de3a47b6e67": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2c44d9c11e704b70aa32904a23d1790c", "placeholder": "", "style": "IPY_MODEL_2cf386a8d14d43389374f79bfa922675", "value": " 811k/811k [00:00<00:00, 2.75MB/s]" } }, "826bd7d0a1f146ea9f7d53584468190c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8d24b669a39b4876ac0a014dff678db1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8f467553598f4dcc9abf55da79c11018": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_58b4f9e0366f4d4eba7f902af84b8965", "IPY_MODEL_9383e09698ae4bd1820a4bca22e78315", "IPY_MODEL_a189838c4de648198b0f4fc99c29ced8" ], "layout": "IPY_MODEL_9d7a8b3ecfe74f66b4238fe085c05906" } }, "9383e09698ae4bd1820a4bca22e78315": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_fe0e3b1df104484c98fbdcd31a04e427", "max": 508, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_5978954f56fb40928b970f32d1634aaf", "value": 508 } }, "9420a47a2bf44ead8cff283f20566cda": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "99b785ea53744868b8b11e5e94936fcc": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9d7a8b3ecfe74f66b4238fe085c05906": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a159d62667734657a49ba3a96494f137": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a189838c4de648198b0f4fc99c29ced8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_df95c20399dd4918bc7559a90886d4aa", "placeholder": "", "style": "IPY_MODEL_2d1d632da0f740c38512c9ad779d3173", "value": " 508/508 [00:00<00:00, 16.9kB/s]" } }, "a9c47cb226ee41e18812f29f690992eb": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ad5e0e1439a94578a31b80c90dbf3247": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9420a47a2bf44ead8cff283f20566cda", "placeholder": "", "style": "IPY_MODEL_5f321455342348f49879a9ca8b392077", "value": "Downloading: 100%" } }, "c4c1675163bd4997bb44d7ea3967a356": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c4d981755d1d42b6940396b77bc251bc": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "cb447c62ce1d4c1ea760175ae619fbb9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d09d664839d04303b8fef9ef895f6e4f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_70dd7428d78c44409308d62ba04917de", "IPY_MODEL_152a31110bf9477989833eac91794688", "IPY_MODEL_fcde5f4cf49846a0ad1b284aad98a38a" ], "layout": "IPY_MODEL_500826e3813b414a820aa260bfde9e23" } }, "d4ad1a78750d49feaea584a82940bb7d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "df95c20399dd4918bc7559a90886d4aa": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e86a1d4d268c4314897b58f7bba5ec25": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e96a95317b0945c58c8ff0e944c7593e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a9c47cb226ee41e18812f29f690992eb", "max": 1395301, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d4ad1a78750d49feaea584a82940bb7d", "value": 1395301 } }, "f1f9d5b32f60473b86ae6b340d6c0850": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fcde5f4cf49846a0ad1b284aad98a38a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a159d62667734657a49ba3a96494f137", "placeholder": "", "style": "IPY_MODEL_3592b1ed1d6d452b93c57b304943a1cb", "value": " 445M/445M [00:14<00:00, 39.2MB/s]" } }, "fe0e3b1df104484c98fbdcd31a04e427": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } } } } }, "nbformat": 4, "nbformat_minor": 0 }