From cafa9021ccb34aa0fedfd7f542f04d6c69333db7 Mon Sep 17 00:00:00 2001 From: Ludovic Moncla <moncla.ludovic@gmail.com> Date: Fri, 25 Nov 2022 12:28:38 +0100 Subject: [PATCH] Create Predict_LGE.ipynb --- notebooks/Predict_LGE.ipynb | 3010 +++++++++++++++++++++++++++++++++++ 1 file changed, 3010 insertions(+) create mode 100644 notebooks/Predict_LGE.ipynb diff --git a/notebooks/Predict_LGE.ipynb b/notebooks/Predict_LGE.ipynb new file mode 100644 index 0000000..dc8acc9 --- /dev/null +++ b/notebooks/Predict_LGE.ipynb @@ -0,0 +1,3010 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BERT Predict classification\n", + "\n", + "## 1. Setup the environment\n", + "\n", + "### 1.1 Setup colab environment\n", + "\n", + "#### 1.1.1 Install packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pwmZ5bBvgGNh", + "outputId": "fce0a8bf-1779-4079-c7ac-200ebb2678c5" + }, + "outputs": [], + "source": [ + "!pip install transformers==4.10.3\n", + "!pip install sentencepiece" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.1.2 Use more RAM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WF0qFN_g3ekz", + "outputId": "f3a5f049-24ee-418f-fe5e-84c633234ad8" + }, + "outputs": [], + "source": [ + "from psutil import virtual_memory\n", + "ram_gb = virtual_memory().total / 1e9\n", + "print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n", + "\n", + "if ram_gb < 20:\n", + " print('Not using a high-RAM runtime')\n", + "else:\n", + " print('You are using a high-RAM runtime!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.1.3 Mount GoogleDrive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vL0S-s9Uofvn", + "outputId": "4b7efa4d-7f09-4c8e-bc98-99e6099ede32" + }, + "outputs": [], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8hzEGHl7gmzk" + }, + "source": [ + "### 1.2 Setup GPU" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dPOU-Efhf4ui", + "outputId": "121dd21e-f98c-483d-d6d1-2838f732a4e2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We will use the GPU\n" + ] + } + ], + "source": [ + "import torch\n", + "\n", + "# If there's a GPU available...\n", + "if torch.cuda.is_available(): \n", + " # Tell PyTorch to use the GPU. \n", + " device = torch.device(\"cuda\")\n", + " print('There are %d GPU(s) available.' % torch.cuda.device_count())\n", + " print('We will use the GPU:', torch.cuda.get_device_name(0))\n", + "\n", + "# for MacOS\n", + "elif torch.backends.mps.is_available() and torch.backends.mps.is_built():\n", + " device = torch.device(\"mps\")\n", + " print('We will use the GPU')\n", + "else:\n", + " device = torch.device(\"cpu\")\n", + " print('No GPU available, using the CPU instead.')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wSqbrupGMc1M" + }, + "source": [ + "### 1.3 Import librairies" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "SkErnwgMMbRj" + }, + "outputs": [], + "source": [ + "import pandas as pd \n", + "import numpy as np\n", + "\n", + "from transformers import BertTokenizer, BertForSequenceClassification, CamembertTokenizer, CamembertForSequenceClassification\n", + "from torch.utils.data import TensorDataset, DataLoader, SequentialSampler" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c5QKcXulhNJ-" + }, + "source": [ + "## 2. Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#path = \"drive/MyDrive/Classification-EDdA/\"\n", + "path = \"../\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget https://projet.liris.cnrs.fr/geode/files/datasets/EDdA/Classification/LGE_withContent.tsv" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df_LGE = pd.read_csv(path + \"data/LGE_withContent.tsv\", sep=\"\\t\")\n", + "data_LGE = df_LGE[\"content\"].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>id</th>\n", + " <th>tome</th>\n", + " <th>rank</th>\n", + " <th>domain</th>\n", + " <th>remark</th>\n", + " <th>content</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>abrabeses-0</td>\n", + " <td>1</td>\n", + " <td>623</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>accius-0</td>\n", + " <td>1</td>\n", + " <td>1076</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>achenbach-2</td>\n", + " <td>1</td>\n", + " <td>1357</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>acireale-0</td>\n", + " <td>1</td>\n", + " <td>1513</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>actée-0</td>\n", + " <td>1</td>\n", + " <td>1731</td>\n", + " <td>botany</td>\n", + " <td>NaN</td>\n", + " <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " id tome rank domain remark \\\n", + "0 abrabeses-0 1 623 geography NaN \n", + "1 accius-0 1 1076 biography NaN \n", + "2 achenbach-2 1 1357 biography NaN \n", + "3 acireale-0 1 1513 geography NaN \n", + "4 actée-0 1 1731 botany NaN \n", + "\n", + " content \n", + "0 ABRABESES. Village d’Espagne de la prov. de Za... \n", + "1 ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po... \n", + "2 ACHENBACH(Henri), administrateur prussien, né ... \n", + "3 ACIREALE. Yille de Sicile, de la province et d... \n", + "4 ACTÉE(Actœa L.). Genre de plantes de la famill... " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_LGE.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(310, 6)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_LGE.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load model and predict\n", + "\n", + "### 3.1 BERT / CamemBERT" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "model_name = \"bert-base-multilingual-cased\"\n", + "#model_name = \"camembert-base\"\n", + "model_path = path + \"models/model_\" + model_name + \"_s10000.pt\"" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_dataloader(tokenizer, sentences, batch_size = 8, max_len = 512):\n", + "\n", + " # Tokenize all of the sentences and map the tokens to thier word IDs.\n", + " input_ids_test = []\n", + " # For every sentence...\n", + " for sent in sentences:\n", + " # `encode` will:\n", + " # (1) Tokenize the sentence.\n", + " # (2) Prepend the `[CLS]` token to the start.\n", + " # (3) Append the `[SEP]` token to the end.\n", + " # (4) Map tokens to their IDs.\n", + " encoded_sent = tokenizer.encode(\n", + " sent, # Sentence to encode.\n", + " add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n", + " # This function also supports truncation and conversion\n", + " # to pytorch tensors, but I need to do padding, so I\n", + " # can't use these features.\n", + " #max_length = max_len, # Truncate all sentences.\n", + " #return_tensors = 'pt', # Return pytorch tensors.\n", + " )\n", + " input_ids_test.append(encoded_sent)\n", + "\n", + " # Pad our input tokens\n", + " padded_test = []\n", + " for i in input_ids_test:\n", + " if len(i) > max_len:\n", + " padded_test.extend([i[:max_len]])\n", + " else:\n", + " padded_test.extend([i + [0] * (max_len - len(i))])\n", + " input_ids_test = np.array(padded_test)\n", + "\n", + " # Create attention masks\n", + " attention_masks = []\n", + "\n", + " # Create a mask of 1s for each token followed by 0s for padding\n", + " for seq in input_ids_test:\n", + " seq_mask = [float(i>0) for i in seq]\n", + " attention_masks.append(seq_mask)\n", + "\n", + " # Convert to tensors.\n", + " inputs = torch.tensor(input_ids_test)\n", + " masks = torch.tensor(attention_masks)\n", + " #set batch size\n", + "\n", + " # Create the DataLoader.\n", + " data = TensorDataset(inputs, masks)\n", + " prediction_sampler = SequentialSampler(data)\n", + "\n", + " return DataLoader(data, sampler=prediction_sampler, batch_size=batch_size)\n", + "\n", + "\n", + "\n", + "def predict(model, dataloader, device):\n", + "\n", + " # Put model in evaluation mode\n", + " model.eval()\n", + "\n", + " # Tracking variables\n", + " predictions_test , true_labels = [], []\n", + " pred_labels_ = []\n", + " # Predict\n", + " for batch in dataloader:\n", + " # Add batch to GPU\n", + " batch = tuple(t.to(device) for t in batch)\n", + "\n", + " # Unpack the inputs from the dataloader\n", + " b_input_ids, b_input_mask = batch\n", + "\n", + " # Telling the model not to compute or store gradients, saving memory and\n", + " # speeding up prediction\n", + " with torch.no_grad():\n", + " # Forward pass, calculate logit predictions\n", + " outputs = model(b_input_ids, token_type_ids=None,\n", + " attention_mask=b_input_mask)\n", + "\n", + " logits = outputs[0]\n", + " #print(logits)\n", + "\n", + " # Move logits and labels to CPU ???\n", + " logits = logits.detach().cpu().numpy()\n", + " #print(logits)\n", + "\n", + " # Store predictions and true labels\n", + " predictions_test.append(logits)\n", + "\n", + " pred_labels = []\n", + " \n", + " for i in range(len(predictions_test)):\n", + " # The predictions for this batch are a 2-column ndarray (one column for \"0\"\n", + " # and one column for \"1\"). Pick the label with the highest value and turn this\n", + " # in to a list of 0s and 1s.\n", + " pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()\n", + " pred_labels.append(pred_labels_i)\n", + "\n", + " pred_labels_ += [item for sublist in pred_labels for item in sublist]\n", + " return pred_labels_" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading Bert Tokenizer...\n" + ] + } + ], + "source": [ + "if model_name == 'bert-base-multilingual-cased' :\n", + " print('Loading Bert Tokenizer...')\n", + " tokenizer = BertTokenizer.from_pretrained(model_name)\n", + "elif model_name == 'camembert-base':\n", + " print('Loading Camembert Tokenizer...')\n", + " tokenizer = CamembertTokenizer.from_pretrained(model_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Token indices sequence length is longer than the specified maximum sequence length for this model (1204 > 512). Running this sequence through the model will result in indexing errors\n" + ] + } + ], + "source": [ + "data_loader = generate_dataloader(tokenizer, data_LGE)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "https://discuss.huggingface.co/t/an-efficient-way-of-loading-a-model-that-was-saved-with-torch-save/9814\n", + "\n", + "https://github.com/huggingface/transformers/issues/2094\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "#model = torch.load(model_path, map_location=torch.device('mps'))\n", + "#model.load_state_dict(torch.load(model_path, map_location=torch.device('mps')))\n", + "\n", + "model = BertForSequenceClassification.from_pretrained(model_path).to(\"mps\") #.to(\"cuda\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_fzgS5USJeAF", + "outputId": "be4a5506-76ed-4eef-bb3c-fe2bb77c6e4d" + }, + "outputs": [], + "source": [ + "pred = predict(model, data_loader, device)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[15,\n", + " 6,\n", + " 16,\n", + " 15,\n", + " 17,\n", + " 10,\n", + " 17,\n", + " 16,\n", + " 19,\n", + " 35,\n", + " 15,\n", + " 26,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 2,\n", + " 2,\n", + " 17,\n", + " 6,\n", + " 32,\n", + " 17,\n", + " 30,\n", + " 16,\n", + " 32,\n", + " 15,\n", + " 35,\n", + " 15,\n", + " 23,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 17,\n", + " 15,\n", + " 16,\n", + " 3,\n", + " 17,\n", + " 17,\n", + " 16,\n", + " 4,\n", + " 15,\n", + " 17,\n", + " 19,\n", + " 16,\n", + " 35,\n", + " 3,\n", + " 17,\n", + " 5,\n", + " 15,\n", + " 16,\n", + " 16,\n", + " 15,\n", + " 16,\n", + " 6,\n", + " 16,\n", + " 5,\n", + " 16,\n", + " 15,\n", + " 28,\n", + " 16,\n", + " 17,\n", + " 10,\n", + " 15,\n", + " 15,\n", + " 32,\n", + " 15,\n", + " 17,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 12,\n", + " 15,\n", + " 18,\n", + " 15,\n", + " 35,\n", + " 26,\n", + " 16,\n", + " 16,\n", + " 15,\n", + " 5,\n", + " 15,\n", + " 15,\n", + " 5,\n", + " 17,\n", + " 15,\n", + " 17,\n", + " 35,\n", + " 15,\n", + " 16,\n", + " 16,\n", + " 17,\n", + " 2,\n", + " 17,\n", + " 15,\n", + " 16,\n", + " 23,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 16,\n", + " 6,\n", + " 15,\n", + " 35,\n", + " 15,\n", + " 32,\n", + " 16,\n", + " 6,\n", + " 16,\n", + " 23,\n", + " 36,\n", + " 5,\n", + " 35,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 16,\n", + " 17,\n", + " 2,\n", + " 15,\n", + " 5,\n", + " 17,\n", + " 16,\n", + " 15,\n", + " 17,\n", + " 6,\n", + " 15,\n", + " 16,\n", + " 10,\n", + " 16,\n", + " 15,\n", + " 35,\n", + " 17,\n", + " 15,\n", + " 15,\n", + " 6,\n", + " 28,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 16,\n", + " 5,\n", + " 15,\n", + " 21,\n", + " 5,\n", + " 1,\n", + " 7,\n", + " 16,\n", + " 15,\n", + " 17,\n", + " 23,\n", + " 15,\n", + " 5,\n", + " 0,\n", + " 10,\n", + " 16,\n", + " 16,\n", + " 15,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 3,\n", + " 3,\n", + " 17,\n", + " 36,\n", + " 16,\n", + " 15,\n", + " 12,\n", + " 6,\n", + " 15,\n", + " 4,\n", + " 16,\n", + " 16,\n", + " 26,\n", + " 15,\n", + " 15,\n", + " 32,\n", + " 15,\n", + " 10,\n", + " 15,\n", + " 5,\n", + " 26,\n", + " 5,\n", + " 15,\n", + " 15,\n", + " 26,\n", + " 15,\n", + " 35,\n", + " 15,\n", + " 16,\n", + " 16,\n", + " 15,\n", + " 6,\n", + " 16,\n", + " 12,\n", + " 16,\n", + " 28,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 16,\n", + " 6,\n", + " 10,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 16,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 5,\n", + " 16,\n", + " 16,\n", + " 17,\n", + " 15,\n", + " 16,\n", + " 35,\n", + " 16,\n", + " 16,\n", + " 15,\n", + " 6,\n", + " 29,\n", + " 16,\n", + " 15,\n", + " 5,\n", + " 5,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 16,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 31,\n", + " 16,\n", + " 15,\n", + " 16,\n", + " 15,\n", + " 6,\n", + " 16,\n", + " 3,\n", + " 15,\n", + " 2,\n", + " 15,\n", + " 15,\n", + " 28,\n", + " 17,\n", + " 15,\n", + " 15,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 10,\n", + " 15,\n", + " 5,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 17,\n", + " 15,\n", + " 5,\n", + " 15,\n", + " 3,\n", + " 15,\n", + " 2,\n", + " 15,\n", + " 15,\n", + " 6,\n", + " 15,\n", + " 28,\n", + " 15,\n", + " 6,\n", + " 15,\n", + " 32,\n", + " 16,\n", + " 15,\n", + " 2,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 16,\n", + " 17,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 16,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 35,\n", + " 15,\n", + " 15,\n", + " 35,\n", + " 16,\n", + " 28,\n", + " 15,\n", + " 15,\n", + " 15,\n", + " 5,\n", + " 15,\n", + " 15,\n", + " 19,\n", + " 15]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pred" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle \n", + "encoder_filename = \"models/label_encoder.pkl\"\n", + "with open(path+encoder_filename, 'rb') as file:\n", + " encoder = pickle.load(file)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "p2 = list(encoder.inverse_transform(pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "df_LGE['class_bert'] = p2" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>id</th>\n", + " <th>tome</th>\n", + " <th>rank</th>\n", + " <th>domain</th>\n", + " <th>remark</th>\n", + " <th>content</th>\n", + " <th>class_bert</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>abrabeses-0</td>\n", + " <td>1</td>\n", + " <td>623</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>accius-0</td>\n", + " <td>1</td>\n", + " <td>1076</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n", + " <td>Belles-lettres - Poésie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>achenbach-2</td>\n", + " <td>1</td>\n", + " <td>1357</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>acireale-0</td>\n", + " <td>1</td>\n", + " <td>1513</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>actée-0</td>\n", + " <td>1</td>\n", + " <td>1731</td>\n", + " <td>botany</td>\n", + " <td>NaN</td>\n", + " <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>adulteration-0</td>\n", + " <td>1</td>\n", + " <td>2197</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>ADULTERATION. Altération d’un médicament, d’un...</td>\n", + " <td>Chimie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>aérides-0</td>\n", + " <td>1</td>\n", + " <td>2334</td>\n", + " <td>botany</td>\n", + " <td>NaN</td>\n", + " <td>AÉRIDES{Aérides Lour.). Genres de plantes de l...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>ager-0</td>\n", + " <td>1</td>\n", + " <td>2710</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>AGERouAGERIUS (Nicolaus), médecin alsacien, né...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>aigu-1</td>\n", + " <td>1</td>\n", + " <td>3160</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>AIGU1 LH E (V. Raimond d’).\\n</td>\n", + " <td>Marine</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>alavika-0</td>\n", + " <td>1</td>\n", + " <td>3664</td>\n", + " <td>theology</td>\n", + " <td>NaN</td>\n", + " <td>ALAVIKA« qui est d'Alava »(V. ce mot) : Bhikch...</td>\n", + " <td>Religion</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>allassac-0</td>\n", + " <td>2</td>\n", + " <td>755</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ALLASSAC. Com. du dép. de la Corrèze, arr. de ...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>allegretto-0</td>\n", + " <td>2</td>\n", + " <td>786</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>ALLEGRETTO(V. Allegro).\\n</td>\n", + " <td>Musique</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>alleuze-0</td>\n", + " <td>2</td>\n", + " <td>908</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ALLEUZE. Com. du dép. du Cantal, arr. et cant....</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>alliat-0</td>\n", + " <td>2</td>\n", + " <td>933</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ALLIAT. Com. du dép. de l’Ariège, arr. de Foix...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>amanty-0</td>\n", + " <td>2</td>\n", + " <td>1651</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>AMANTY. Corn, du dép. de la Meuse, arr. de Com...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>âmasserah-0</td>\n", + " <td>2</td>\n", + " <td>1701</td>\n", + " <td>geography</td>\n", + " <td>explicit domain</td>\n", + " <td>ÂMASSERAH, AMASR1 ou AMASRAH (Géogr.). Ville d...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>a-118</td>\n", + " <td>2</td>\n", + " <td>2971</td>\n", + " <td>history</td>\n", + " <td>NaN</td>\n", + " <td>AN Cl LIA. Boucliers sacrés des Romains, au no...</td>\n", + " <td>Antiquité</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>androclès-0</td>\n", + " <td>2</td>\n", + " <td>3261</td>\n", + " <td>mythology</td>\n", + " <td>explicit domain</td>\n", + " <td>ANDROCLÈS(Myth.), un fils d’Eole qui régna sur...</td>\n", + " <td>Antiquité</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>anfouson-0</td>\n", + " <td>2</td>\n", + " <td>3394</td>\n", + " <td>zoology</td>\n", + " <td>NaN</td>\n", + " <td>ANFOUSON. Nom donné à Nice au Néron brun\\n(V. ...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>anicet-bourgeois-0</td>\n", + " <td>2</td>\n", + " <td>3717</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ANICET-BOURGEOIS(Auguste Anicet, connu sous le...</td>\n", + " <td>Belles-lettres - Poésie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>anomalistique-0</td>\n", + " <td>3</td>\n", + " <td>238</td>\n", + " <td>astronomy</td>\n", + " <td>explicit domain</td>\n", + " <td>ANOMALISTIQUE(Astron.). On appelle révolution\\...</td>\n", + " <td>Physique - [Sciences physico-mathématiques]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>anostostome-0</td>\n", + " <td>3</td>\n", + " <td>298</td>\n", + " <td>zoology</td>\n", + " <td>NaN</td>\n", + " <td>ANOSTOSTOME(Anostostoma Gray). Genre d’insecte...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>anthoxanthème-0</td>\n", + " <td>3</td>\n", + " <td>571</td>\n", + " <td>chemistry</td>\n", + " <td>NaN</td>\n", + " <td>ANTHOXANTHÈME. L’un des deux principes coloran...</td>\n", + " <td>Pharmacie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>aod-0</td>\n", + " <td>3</td>\n", + " <td>1024</td>\n", + " <td>theology</td>\n", + " <td>NaN</td>\n", + " <td>AOD, plus exactement Ehoud. personnage des com...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>aphellan-0</td>\n", + " <td>3</td>\n", + " <td>1177</td>\n", + " <td>astronomy</td>\n", + " <td>NaN</td>\n", + " <td>APHELLAN(Astron.). Un des noms de l’étoile a2 ...</td>\n", + " <td>Physique - [Sciences physico-mathématiques]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>appelle-0</td>\n", + " <td>3</td>\n", + " <td>1494</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>APPELLE. Com. du dép. du Tarn, arr. de Lavaux,...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>aragona-1</td>\n", + " <td>3</td>\n", + " <td>1841</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ARAGONA, cardinal d’origine sicilienne, né en ...</td>\n", + " <td>Religion</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>araujuzon-0</td>\n", + " <td>3</td>\n", + " <td>1940</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ARAUJUZON. Com. du dép. des Basses-Pyrénées, a...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>ardant-0</td>\n", + " <td>3</td>\n", + " <td>2421</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>ARDANT(Paul-Joseph), général français, né en 1...</td>\n", + " <td>Militaire (Art) - Guerre - Arme</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>ariano-0</td>\n", + " <td>3</td>\n", + " <td>2839</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ARIANOdi Puglia. Ville de la prov. de principa...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>athabaska-0</td>\n", + " <td>4</td>\n", + " <td>1118</td>\n", + " <td>anthropology</td>\n", + " <td>NaN</td>\n", + " <td>ATHABASKA. Col, rivière, lac, territoire et fa...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>aslonnes-0</td>\n", + " <td>4</td>\n", + " <td>446</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>ASLONNES, corn, du dép. de la Vienne, arr. de ...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>astr0rh1za-0</td>\n", + " <td>4</td>\n", + " <td>992</td>\n", + " <td>zoology</td>\n", + " <td>explicit domain</td>\n", + " <td>ASTR0RH1ZA(Zool.).Genre deForaminifèresimperfo...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>atthidographes-0</td>\n", + " <td>4</td>\n", + " <td>1397</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>ATTHIDOGRAPHES(V. Atthide).\\n</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>aubery-2</td>\n", + " <td>4</td>\n", + " <td>1577</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>AUBERY(Antoine;, historien français, né le .18...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>aula-0</td>\n", + " <td>4</td>\n", + " <td>1992</td>\n", + " <td>history</td>\n", + " <td>NaN</td>\n", + " <td>AULA. Mot latin signifiant cour, lieu découver...</td>\n", + " <td>Architecture</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>au-113</td>\n", + " <td>4</td>\n", + " <td>2112</td>\n", + " <td>botany</td>\n", + " <td>explicit domain</td>\n", + " <td>AUNÉE (bot.). L'Aunée, Grande Année, Année off...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>auriol-4</td>\n", + " <td>4</td>\n", + " <td>2224</td>\n", + " <td>NaN</td>\n", + " <td>cross reference</td>\n", + " <td>AURIOL. Nom donné à Marseille au Maquereau (V....</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>ave-lalleniant-0</td>\n", + " <td>4</td>\n", + " <td>2739</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>AVE-LALLENIANT(Robert-Christian-Barthold), méd...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>badin-2</td>\n", + " <td>4</td>\n", + " <td>3857</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>BADIN(Pierre-Adolphe), peintre français, né à ...</td>\n", + " <td>Arts et métiers</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>baizieux-0</td>\n", + " <td>5</td>\n", + " <td>133</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>BAIZIEUX(Bacium, Basium). Com. du dép. de la\\n...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>balsam1te-0</td>\n", + " <td>5</td>\n", + " <td>677</td>\n", + " <td>botany</td>\n", + " <td>explicit domain</td>\n", + " <td>BALSAM1TE(Bot.) (Balsamita Desf.). Genre de Co...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td>balze-0</td>\n", + " <td>5</td>\n", + " <td>757</td>\n", + " <td>navy</td>\n", + " <td>explicit domain</td>\n", + " <td>BALZE(Mar.). Radeau delà côte occidentale de l...</td>\n", + " <td>Marine</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td>bande-2</td>\n", + " <td>5</td>\n", + " <td>880</td>\n", + " <td>history</td>\n", + " <td>NaN</td>\n", + " <td>BANDE(Ordre delà ) ou de l’ECHARPE.Ordre milita...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td>barbosa-5</td>\n", + " <td>5</td>\n", + " <td>1580</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>BARBOSA(Antonio), jésuite et orientaliste port...</td>\n", + " <td>Religion</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>bati-0</td>\n", + " <td>5</td>\n", + " <td>2955</td>\n", + " <td>architecture</td>\n", + " <td>NaN</td>\n", + " <td>BATIÈRE. Toit en forme de bât se terminant à c...</td>\n", + " <td>Architecture</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td>baveuse-0</td>\n", + " <td>5</td>\n", + " <td>3457</td>\n", + " <td>zoology</td>\n", + " <td>explicit domain</td>\n", + " <td>BAVEUSE(Zool.). Nom vulgaire par lequel les\\np...</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>beard-2</td>\n", + " <td>5</td>\n", + " <td>3728</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>BEARD(James-Henry), peintre américain contempo...</td>\n", + " <td>Beaux-arts</td>\n", + " </tr>\n", + " <tr>\n", + " <th>48</th>\n", + " <td>beaufort-4</td>\n", + " <td>5</td>\n", + " <td>3838</td>\n", + " <td>geography</td>\n", + " <td>NaN</td>\n", + " <td>BEAUFORT. Com. du dép. de la Meuse, arr. de Mo...</td>\n", + " <td>Géographie</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49</th>\n", + " <td>beaumont-26</td>\n", + " <td>5</td>\n", + " <td>4018</td>\n", + " <td>biography</td>\n", + " <td>NaN</td>\n", + " <td>BEAUMONT(J.-G. Leprevôt de), secrétaire du cle...</td>\n", + " <td>Histoire</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " id tome rank domain remark \\\n", + "0 abrabeses-0 1 623 geography NaN \n", + "1 accius-0 1 1076 biography NaN \n", + "2 achenbach-2 1 1357 biography NaN \n", + "3 acireale-0 1 1513 geography NaN \n", + "4 actée-0 1 1731 botany NaN \n", + "5 adulteration-0 1 2197 NaN cross reference \n", + "6 aérides-0 1 2334 botany NaN \n", + "7 ager-0 1 2710 biography NaN \n", + "8 aigu-1 1 3160 NaN cross reference \n", + "9 alavika-0 1 3664 theology NaN \n", + "10 allassac-0 2 755 geography NaN \n", + "11 allegretto-0 2 786 NaN cross reference \n", + "12 alleuze-0 2 908 geography NaN \n", + "13 alliat-0 2 933 geography NaN \n", + "14 amanty-0 2 1651 geography NaN \n", + "15 âmasserah-0 2 1701 geography explicit domain \n", + "16 a-118 2 2971 history NaN \n", + "17 androclès-0 2 3261 mythology explicit domain \n", + "18 anfouson-0 2 3394 zoology NaN \n", + "19 anicet-bourgeois-0 2 3717 biography NaN \n", + "20 anomalistique-0 3 238 astronomy explicit domain \n", + "21 anostostome-0 3 298 zoology NaN \n", + "22 anthoxanthème-0 3 571 chemistry NaN \n", + "23 aod-0 3 1024 theology NaN \n", + "24 aphellan-0 3 1177 astronomy NaN \n", + "25 appelle-0 3 1494 geography NaN \n", + "26 aragona-1 3 1841 biography NaN \n", + "27 araujuzon-0 3 1940 geography NaN \n", + "28 ardant-0 3 2421 biography NaN \n", + "29 ariano-0 3 2839 geography NaN \n", + "30 athabaska-0 4 1118 anthropology NaN \n", + "31 aslonnes-0 4 446 geography NaN \n", + "32 astr0rh1za-0 4 992 zoology explicit domain \n", + "33 atthidographes-0 4 1397 NaN cross reference \n", + "34 aubery-2 4 1577 biography NaN \n", + "35 aula-0 4 1992 history NaN \n", + "36 au-113 4 2112 botany explicit domain \n", + "37 auriol-4 4 2224 NaN cross reference \n", + "38 ave-lalleniant-0 4 2739 biography NaN \n", + "39 badin-2 4 3857 biography NaN \n", + "40 baizieux-0 5 133 geography NaN \n", + "41 balsam1te-0 5 677 botany explicit domain \n", + "42 balze-0 5 757 navy explicit domain \n", + "43 bande-2 5 880 history NaN \n", + "44 barbosa-5 5 1580 biography NaN \n", + "45 bati-0 5 2955 architecture NaN \n", + "46 baveuse-0 5 3457 zoology explicit domain \n", + "47 beard-2 5 3728 biography NaN \n", + "48 beaufort-4 5 3838 geography NaN \n", + "49 beaumont-26 5 4018 biography NaN \n", + "\n", + " content \\\n", + "0 ABRABESES. Village d’Espagne de la prov. de Za... \n", + "1 ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po... \n", + "2 ACHENBACH(Henri), administrateur prussien, né ... \n", + "3 ACIREALE. Yille de Sicile, de la province et d... \n", + "4 ACTÉE(Actœa L.). Genre de plantes de la famill... \n", + "5 ADULTERATION. Altération d’un médicament, d’un... \n", + "6 AÉRIDES{Aérides Lour.). Genres de plantes de l... \n", + "7 AGERouAGERIUS (Nicolaus), médecin alsacien, né... \n", + "8 AIGU1 LH E (V. Raimond d’).\\n \n", + "9 ALAVIKA« qui est d'Alava »(V. ce mot) : Bhikch... \n", + "10 ALLASSAC. Com. du dép. de la Corrèze, arr. de ... \n", + "11 ALLEGRETTO(V. Allegro).\\n \n", + "12 ALLEUZE. Com. du dép. du Cantal, arr. et cant.... \n", + "13 ALLIAT. Com. du dép. de l’Ariège, arr. de Foix... \n", + "14 AMANTY. Corn, du dép. de la Meuse, arr. de Com... \n", + "15 ÂMASSERAH, AMASR1 ou AMASRAH (Géogr.). Ville d... \n", + "16 AN Cl LIA. Boucliers sacrés des Romains, au no... \n", + "17 ANDROCLÈS(Myth.), un fils d’Eole qui régna sur... \n", + "18 ANFOUSON. Nom donné à Nice au Néron brun\\n(V. ... \n", + "19 ANICET-BOURGEOIS(Auguste Anicet, connu sous le... \n", + "20 ANOMALISTIQUE(Astron.). On appelle révolution\\... \n", + "21 ANOSTOSTOME(Anostostoma Gray). Genre d’insecte... \n", + "22 ANTHOXANTHÈME. L’un des deux principes coloran... \n", + "23 AOD, plus exactement Ehoud. personnage des com... \n", + "24 APHELLAN(Astron.). Un des noms de l’étoile a2 ... \n", + "25 APPELLE. Com. du dép. du Tarn, arr. de Lavaux,... \n", + "26 ARAGONA, cardinal d’origine sicilienne, né en ... \n", + "27 ARAUJUZON. Com. du dép. des Basses-Pyrénées, a... \n", + "28 ARDANT(Paul-Joseph), général français, né en 1... \n", + "29 ARIANOdi Puglia. Ville de la prov. de principa... \n", + "30 ATHABASKA. Col, rivière, lac, territoire et fa... \n", + "31 ASLONNES, corn, du dép. de la Vienne, arr. de ... \n", + "32 ASTR0RH1ZA(Zool.).Genre deForaminifèresimperfo... \n", + "33 ATTHIDOGRAPHES(V. Atthide).\\n \n", + "34 AUBERY(Antoine;, historien français, né le .18... \n", + "35 AULA. Mot latin signifiant cour, lieu découver... \n", + "36 AUNÉE (bot.). L'Aunée, Grande Année, Année off... \n", + "37 AURIOL. Nom donné à Marseille au Maquereau (V.... \n", + "38 AVE-LALLENIANT(Robert-Christian-Barthold), méd... \n", + "39 BADIN(Pierre-Adolphe), peintre français, né à ... \n", + "40 BAIZIEUX(Bacium, Basium). Com. du dép. de la\\n... \n", + "41 BALSAM1TE(Bot.) (Balsamita Desf.). Genre de Co... \n", + "42 BALZE(Mar.). Radeau delà côte occidentale de l... \n", + "43 BANDE(Ordre delà ) ou de l’ECHARPE.Ordre milita... \n", + "44 BARBOSA(Antonio), jésuite et orientaliste port... \n", + "45 BATIÈRE. Toit en forme de bât se terminant à c... \n", + "46 BAVEUSE(Zool.). Nom vulgaire par lequel les\\np... \n", + "47 BEARD(James-Henry), peintre américain contempo... \n", + "48 BEAUFORT. Com. du dép. de la Meuse, arr. de Mo... \n", + "49 BEAUMONT(J.-G. Leprevôt de), secrétaire du cle... \n", + "\n", + " class_bert \n", + "0 Géographie \n", + "1 Belles-lettres - Poésie \n", + "2 Histoire \n", + "3 Géographie \n", + "4 Histoire naturelle \n", + "5 Chimie \n", + "6 Histoire naturelle \n", + "7 Histoire \n", + "8 Marine \n", + "9 Religion \n", + "10 Géographie \n", + "11 Musique \n", + "12 Géographie \n", + "13 Géographie \n", + "14 Géographie \n", + "15 Géographie \n", + "16 Antiquité \n", + "17 Antiquité \n", + "18 Histoire naturelle \n", + "19 Belles-lettres - Poésie \n", + "20 Physique - [Sciences physico-mathématiques] \n", + "21 Histoire naturelle \n", + "22 Pharmacie \n", + "23 Histoire \n", + "24 Physique - [Sciences physico-mathématiques] \n", + "25 Géographie \n", + "26 Religion \n", + "27 Géographie \n", + "28 Militaire (Art) - Guerre - Arme \n", + "29 Géographie \n", + "30 Géographie \n", + "31 Géographie \n", + "32 Histoire naturelle \n", + "33 Géographie \n", + "34 Histoire \n", + "35 Architecture \n", + "36 Histoire naturelle \n", + "37 Histoire naturelle \n", + "38 Histoire \n", + "39 Arts et métiers \n", + "40 Géographie \n", + "41 Histoire naturelle \n", + "42 Marine \n", + "43 Histoire \n", + "44 Religion \n", + "45 Architecture \n", + "46 Histoire naturelle \n", + "47 Beaux-arts \n", + "48 Géographie \n", + "49 Histoire " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_LGE.head(50)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "df_LGE.to_csv(path + \"reports/classification_LGE.tsv\", sep=\"\\t\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "machine_shape": "hm", + "name": "EDdA-Classification_BertFineTuning.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3.8.5", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "vscode": { + "interpreter": { + "hash": "5a66862d1e699d22749b730d4d12326d6986b018faa2bf0b5fca0506fffc064f" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0279837673b446b09aac18346213eb7e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_69004a5069094f8c9d59d5136f627bef", + "IPY_MODEL_e96a95317b0945c58c8ff0e944c7593e", + "IPY_MODEL_68b69c9d3a274900bc2892848f725cb0" + ], + "layout": "IPY_MODEL_09b5f0bbd5c14bc289b0f92a22bb29ab" + } + }, + "0779c8ea0ed24e64a800ae5dff6bc8ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8d24b669a39b4876ac0a014dff678db1", + "max": 810912, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_99b785ea53744868b8b11e5e94936fcc", + "value": 810912 + } + }, + "09b5f0bbd5c14bc289b0f92a22bb29ab": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "12afa6b6474b401f9ff3f189cc0d3d11": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "152a31110bf9477989833eac91794688": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_826bd7d0a1f146ea9f7d53584468190c", + "max": 445032417, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e86a1d4d268c4314897b58f7bba5ec25", + "value": 445032417 + } + }, + "1bf6a76237454349aafc1e9284376879": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "274e505b5f354efc8de3ef26cc43e617": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ad5e0e1439a94578a31b80c90dbf3247", + "IPY_MODEL_0779c8ea0ed24e64a800ae5dff6bc8ce", + "IPY_MODEL_7870340ac12b469c8ac19de3a47b6e67" + ], + "layout": "IPY_MODEL_f1f9d5b32f60473b86ae6b340d6c0850" + } + }, + "2c44d9c11e704b70aa32904a23d1790c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2cf386a8d14d43389374f79bfa922675": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2d1d632da0f740c38512c9ad779d3173": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3592b1ed1d6d452b93c57b304943a1cb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4a23110523184d019a77368116f738f3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "500826e3813b414a820aa260bfde9e23": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5032547e748f45a3b0cdd12fafe1dd05": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "58b4f9e0366f4d4eba7f902af84b8965": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_12afa6b6474b401f9ff3f189cc0d3d11", + "placeholder": "​", + "style": "IPY_MODEL_c4d981755d1d42b6940396b77bc251bc", + "value": "Downloading: 100%" + } + }, + "5978954f56fb40928b970f32d1634aaf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5f321455342348f49879a9ca8b392077": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "68b69c9d3a274900bc2892848f725cb0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5032547e748f45a3b0cdd12fafe1dd05", + "placeholder": "​", + "style": "IPY_MODEL_c4c1675163bd4997bb44d7ea3967a356", + "value": " 1.40M/1.40M [00:00<00:00, 6.57MB/s]" + } + }, + "69004a5069094f8c9d59d5136f627bef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cb447c62ce1d4c1ea760175ae619fbb9", + "placeholder": "​", + "style": "IPY_MODEL_76007b17ffd2478fa4a86f959d4f1766", + "value": "Downloading: 100%" + } + }, + "70dd7428d78c44409308d62ba04917de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4a23110523184d019a77368116f738f3", + "placeholder": "​", + "style": "IPY_MODEL_1bf6a76237454349aafc1e9284376879", + "value": "Downloading: 100%" + } + }, + "76007b17ffd2478fa4a86f959d4f1766": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7870340ac12b469c8ac19de3a47b6e67": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2c44d9c11e704b70aa32904a23d1790c", + "placeholder": "​", + "style": "IPY_MODEL_2cf386a8d14d43389374f79bfa922675", + "value": " 811k/811k [00:00<00:00, 2.75MB/s]" + } + }, + "826bd7d0a1f146ea9f7d53584468190c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8d24b669a39b4876ac0a014dff678db1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8f467553598f4dcc9abf55da79c11018": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_58b4f9e0366f4d4eba7f902af84b8965", + "IPY_MODEL_9383e09698ae4bd1820a4bca22e78315", + "IPY_MODEL_a189838c4de648198b0f4fc99c29ced8" + ], + "layout": "IPY_MODEL_9d7a8b3ecfe74f66b4238fe085c05906" + } + }, + "9383e09698ae4bd1820a4bca22e78315": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fe0e3b1df104484c98fbdcd31a04e427", + "max": 508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5978954f56fb40928b970f32d1634aaf", + "value": 508 + } + }, + "9420a47a2bf44ead8cff283f20566cda": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "99b785ea53744868b8b11e5e94936fcc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9d7a8b3ecfe74f66b4238fe085c05906": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a159d62667734657a49ba3a96494f137": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a189838c4de648198b0f4fc99c29ced8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_df95c20399dd4918bc7559a90886d4aa", + "placeholder": "​", + "style": "IPY_MODEL_2d1d632da0f740c38512c9ad779d3173", + "value": " 508/508 [00:00<00:00, 16.9kB/s]" + } + }, + "a9c47cb226ee41e18812f29f690992eb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ad5e0e1439a94578a31b80c90dbf3247": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9420a47a2bf44ead8cff283f20566cda", + "placeholder": "​", + "style": "IPY_MODEL_5f321455342348f49879a9ca8b392077", + "value": "Downloading: 100%" + } + }, + "c4c1675163bd4997bb44d7ea3967a356": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c4d981755d1d42b6940396b77bc251bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cb447c62ce1d4c1ea760175ae619fbb9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d09d664839d04303b8fef9ef895f6e4f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_70dd7428d78c44409308d62ba04917de", + "IPY_MODEL_152a31110bf9477989833eac91794688", + "IPY_MODEL_fcde5f4cf49846a0ad1b284aad98a38a" + ], + "layout": "IPY_MODEL_500826e3813b414a820aa260bfde9e23" + } + }, + "d4ad1a78750d49feaea584a82940bb7d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "df95c20399dd4918bc7559a90886d4aa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e86a1d4d268c4314897b58f7bba5ec25": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e96a95317b0945c58c8ff0e944c7593e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a9c47cb226ee41e18812f29f690992eb", + "max": 1395301, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d4ad1a78750d49feaea584a82940bb7d", + "value": 1395301 + } + }, + "f1f9d5b32f60473b86ae6b340d6c0850": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fcde5f4cf49846a0ad1b284aad98a38a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a159d62667734657a49ba3a96494f137", + "placeholder": "​", + "style": "IPY_MODEL_3592b1ed1d6d452b93c57b304943a1cb", + "value": " 445M/445M [00:14<00:00, 39.2MB/s]" + } + }, + "fe0e3b1df104484c98fbdcd31a04e427": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} -- GitLab