diff --git a/notebooks/EDdA_Classification_CNN_Conv2D.ipynb b/notebooks/EDdA_Classification_CNN_Conv2D.ipynb
deleted file mode 100644
index 8cd845358022acab2d5f7464f3eb95d3d0dcd5da..0000000000000000000000000000000000000000
--- a/notebooks/EDdA_Classification_CNN_Conv2D.ipynb
+++ /dev/null
@@ -1,1899 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "EDdA-Classification_CNN_Conv2D.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0yFsoHXX8Iyy"
-      },
-      "source": [
-        "# Deep learning for EDdA classification"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tFlUCDL2778i"
-      },
-      "source": [
-        "## Setup colab environment"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Sp8d_Uus7SHJ",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "2791f79e-a849-4b95-92c5-72150ea4d6e2"
-      },
-      "source": [
-        "from google.colab import drive\n",
-        "drive.mount('/content/drive')"
-      ],
-      "execution_count": 1,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Mounted at /content/drive\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jQBu-p6hBU-j"
-      },
-      "source": [
-        "### Install packages"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "bTIXsF6kBUdh"
-      },
-      "source": [
-        "#!pip install zeugma\n",
-        "#!pip install plot_model"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "56-04SNF8BMx"
-      },
-      "source": [
-        "### Import librairies"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "HwWkSznz7SEv"
-      },
-      "source": [
-        "import pandas as pd\n",
-        "import numpy as np\n",
-        "import matplotlib.pyplot as plt\n",
-        "import pickle\n",
-        "import os\n",
-        "\n",
-        "from tqdm import tqdm\n",
-        "import requests, zipfile, io\n",
-        "import codecs\n",
-        "\n",
-        "from sklearn import preprocessing # LabelEncoder\n",
-        "from sklearn.metrics import classification_report\n",
-        "from sklearn.metrics import confusion_matrix\n",
-        "\n",
-        "from keras.preprocessing import sequence\n",
-        "from keras.preprocessing.text import Tokenizer\n",
-        "\n",
-        "from keras.layers import BatchNormalization, Input, Reshape, Conv2D, MaxPool2D, Concatenate\n",
-        "from keras.layers import Embedding, Dropout, Flatten, Dense\n",
-        "from keras.models import Model, load_model\n",
-        "from keras.callbacks import ModelCheckpoint\n"
-      ],
-      "execution_count": 2,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xrekV6W978l4"
-      },
-      "source": [
-        "### Utils functions"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "4LJ5blQR7PUe"
-      },
-      "source": [
-        "\n",
-        "def resample_classes(df, classColumnName, numberOfInstances):\n",
-        "  #random numberOfInstances elements\n",
-        "  replace = False  # with replacement\n",
-        "  fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]\n",
-        "  return df.groupby(classColumnName, as_index=False).apply(fn)\n",
-        "    \n"
-      ],
-      "execution_count": 3,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "-Rh3JMDh7zYd"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": 3,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MtLr35eM753e"
-      },
-      "source": [
-        "## Load Data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "FnbNT4NF7zal",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "ca2c2751-6ac2-4ec8-ec1d-03927e9bd358"
-      },
-      "source": [
-        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
-        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv"
-      ],
-      "execution_count": 4,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "--2022-02-18 07:27:50--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv\n",
-            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
-            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 175634219 (167M) [text/tab-separated-values]\n",
-            "Saving to: â€˜training_set.tsvâ€™\n",
-            "\n",
-            "training_set.tsv    100%[===================>] 167.50M  23.4MB/s    in 7.7s    \n",
-            "\n",
-            "2022-02-18 07:27:58 (21.8 MB/s) - â€˜training_set.tsvâ€™ saved [175634219/175634219]\n",
-            "\n",
-            "--2022-02-18 07:27:58--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv\n",
-            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
-            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 42730598 (41M) [text/tab-separated-values]\n",
-            "Saving to: â€˜test_set.tsvâ€™\n",
-            "\n",
-            "test_set.tsv        100%[===================>]  40.75M  17.1MB/s    in 2.4s    \n",
-            "\n",
-            "2022-02-18 07:28:01 (17.1 MB/s) - â€˜test_set.tsvâ€™ saved [42730598/42730598]\n",
-            "\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Loading dataset"
-      ],
-      "metadata": {
-        "id": "UHushJ1XfUj9"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "train_path = 'training_set.tsv'\n",
-        "test_path =  'test_set.tsv'"
-      ],
-      "metadata": {
-        "id": "Q4te2c0bfvaJ"
-      },
-      "execution_count": 5,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "nRLaQUO97zcq"
-      },
-      "source": [
-        "df_train = pd.read_csv(train_path, sep=\"\\t\")\n"
-      ],
-      "execution_count": 6,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "df_train.sample(5)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 461
-        },
-        "id": "2MvHEc7zVK1N",
-        "outputId": "e5cfc8df-4ed9-4dd4-ce64-3350a8f9f6bc"
-      },
-      "execution_count": 7,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/html": [
-              "\n",
-              "  <div id=\"df-3f5f81f9-5804-4f8c-8424-4b90ee905792\">\n",
-              "    <div class=\"colab-df-container\">\n",
-              "      <div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>volume</th>\n",
-              "      <th>numero</th>\n",
-              "      <th>head</th>\n",
-              "      <th>normClass</th>\n",
-              "      <th>classEDdA</th>\n",
-              "      <th>author</th>\n",
-              "      <th>id_enccre</th>\n",
-              "      <th>domaine_enccre</th>\n",
-              "      <th>ensemble_domaine_enccre</th>\n",
-              "      <th>content</th>\n",
-              "      <th>contentWithoutClass</th>\n",
-              "      <th>firstParagraph</th>\n",
-              "      <th>nb_words</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>5965</th>\n",
-              "      <td>9</td>\n",
-              "      <td>712</td>\n",
-              "      <td>KNAWEL</td>\n",
-              "      <td>Botanique</td>\n",
-              "      <td>Botan.</td>\n",
-              "      <td>Jaucourt</td>\n",
-              "      <td>v9-452-0</td>\n",
-              "      <td>botanique</td>\n",
-              "      <td>Histoire naturelle</td>\n",
-              "      <td>KNAWEL, (Botan.) genre de plante ainsi nommÃ©e ...</td>\n",
-              "      <td>knawel   genre plante nommÃ©e \\n gÃ©rard ray par...</td>\n",
-              "      <td>knawel   genre plante nommÃ©e \\n gÃ©rard ray par...</td>\n",
-              "      <td>169</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>21406</th>\n",
-              "      <td>4</td>\n",
-              "      <td>3605</td>\n",
-              "      <td>DECRETE</td>\n",
-              "      <td>Jurisprudence</td>\n",
-              "      <td>Jurispr.</td>\n",
-              "      <td>Boucher d'Argis</td>\n",
-              "      <td>v4-1826-0</td>\n",
-              "      <td>jurisprudence</td>\n",
-              "      <td>Droit - Jurisprudence</td>\n",
-              "      <td>DECRETE, adj. (Jurispr.) se dit communÃ©ment\\nd...</td>\n",
-              "      <td>decrete adj   communÃ©ment \\n contre a ordonnÃ© ...</td>\n",
-              "      <td>decrete adj   communÃ©ment \\n contre a ordonnÃ© ...</td>\n",
-              "      <td>80</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>46481</th>\n",
-              "      <td>12</td>\n",
-              "      <td>2389</td>\n",
-              "      <td>Piece nette</td>\n",
-              "      <td>Artillerie</td>\n",
-              "      <td>Artillerie.</td>\n",
-              "      <td>Jaucourt</td>\n",
-              "      <td>v12-1440-16</td>\n",
-              "      <td>artillerie</td>\n",
-              "      <td>Militaire (Art) - Guerre - Arme</td>\n",
-              "      <td>Piece nette, (Artillerie.) on appelle pieces n...</td>\n",
-              "      <td>piece nette   appelle pieces nestes pieces art...</td>\n",
-              "      <td>piece nette   appelle pieces nestes pieces art...</td>\n",
-              "      <td>68</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>32540</th>\n",
-              "      <td>7</td>\n",
-              "      <td>1375</td>\n",
-              "      <td>Gale</td>\n",
-              "      <td>ManÃ¨ge | MarÃ©challerie</td>\n",
-              "      <td>ManÃ©ge &amp; MarÃ©challerie.</td>\n",
-              "      <td>Bourgelat</td>\n",
-              "      <td>v7-622-1</td>\n",
-              "      <td>manÃ¨ge</td>\n",
-              "      <td>MarÃ©chage - ManÃ¨ge</td>\n",
-              "      <td>Gale, (ManÃ©ge &amp; MarÃ©challerie.) maladie prurig...</td>\n",
-              "      <td>gale   maladie prurigineuse \\n cutanÃ©e manifes...</td>\n",
-              "      <td>gale   maladie prurigineuse \\n cutanÃ©e manifes...</td>\n",
-              "      <td>3052</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>27748</th>\n",
-              "      <td>13</td>\n",
-              "      <td>4039</td>\n",
-              "      <td>RÃ©cit historique</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>Histoire.</td>\n",
-              "      <td>unsigned</td>\n",
-              "      <td>v13-2396-2</td>\n",
-              "      <td>histoire</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>RÃ©cit historique, (Histoire.) le rÃ©cit histori...</td>\n",
-              "      <td>rÃ©cit historique   rÃ©cit historique \\n exposÃ© ...</td>\n",
-              "      <td>rÃ©cit historique   rÃ©cit historique \\n exposÃ© ...</td>\n",
-              "      <td>122</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3f5f81f9-5804-4f8c-8424-4b90ee905792')\"\n",
-              "              title=\"Convert this dataframe to an interactive table.\"\n",
-              "              style=\"display:none;\">\n",
-              "        \n",
-              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-              "       width=\"24px\">\n",
-              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
-              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
-              "  </svg>\n",
-              "      </button>\n",
-              "      \n",
-              "  <style>\n",
-              "    .colab-df-container {\n",
-              "      display:flex;\n",
-              "      flex-wrap:wrap;\n",
-              "      gap: 12px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert {\n",
-              "      background-color: #E8F0FE;\n",
-              "      border: none;\n",
-              "      border-radius: 50%;\n",
-              "      cursor: pointer;\n",
-              "      display: none;\n",
-              "      fill: #1967D2;\n",
-              "      height: 32px;\n",
-              "      padding: 0 0 0 0;\n",
-              "      width: 32px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert:hover {\n",
-              "      background-color: #E2EBFA;\n",
-              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-              "      fill: #174EA6;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert {\n",
-              "      background-color: #3B4455;\n",
-              "      fill: #D2E3FC;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert:hover {\n",
-              "      background-color: #434B5C;\n",
-              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-              "      fill: #FFFFFF;\n",
-              "    }\n",
-              "  </style>\n",
-              "\n",
-              "      <script>\n",
-              "        const buttonEl =\n",
-              "          document.querySelector('#df-3f5f81f9-5804-4f8c-8424-4b90ee905792 button.colab-df-convert');\n",
-              "        buttonEl.style.display =\n",
-              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
-              "\n",
-              "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-3f5f81f9-5804-4f8c-8424-4b90ee905792');\n",
-              "          const dataTable =\n",
-              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
-              "                                                     [key], {});\n",
-              "          if (!dataTable) return;\n",
-              "\n",
-              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
-              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
-              "            + ' to learn more about interactive tables.';\n",
-              "          element.innerHTML = '';\n",
-              "          dataTable['output_type'] = 'display_data';\n",
-              "          await google.colab.output.renderOutput(dataTable, element);\n",
-              "          const docLink = document.createElement('div');\n",
-              "          docLink.innerHTML = docLinkHtml;\n",
-              "          element.appendChild(docLink);\n",
-              "        }\n",
-              "      </script>\n",
-              "    </div>\n",
-              "  </div>\n",
-              "  "
-            ],
-            "text/plain": [
-              "       volume  ...  nb_words\n",
-              "5965        9  ...       169\n",
-              "21406       4  ...        80\n",
-              "46481      12  ...        68\n",
-              "32540       7  ...      3052\n",
-              "27748      13  ...       122\n",
-              "\n",
-              "[5 rows x 13 columns]"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 7
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Configuration\n"
-      ],
-      "metadata": {
-        "id": "-63bh_cKfN4p"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "columnText = 'contentWithoutClass'\n",
-        "columnClass = 'ensemble_domaine_enccre'\n",
-        "\n",
-        "maxOfInstancePerClass = 1500\n",
-        "\n",
-        "batch_size = 64\n",
-        "validation_split = 0.20\n",
-        "max_nb_words = 20000      # taille du vocabulaire\n",
-        "max_sequence_length = 512  # taille max du 'document' \n",
-        "epochs = 10\n",
-        "\n",
-        "#embedding_name = \"fasttext\" \n",
-        "#embedding_dim = 300 \n",
-        "\n",
-        "embedding_name = \"glove.6B.100d\"\n",
-        "embedding_dim = 100 \n",
-        "\n",
-        "path = \"drive/MyDrive/Classification-EDdA/\"\n",
-        "\n",
-        "encoder_filename = \"label_encoder.pkl\"\n",
-        "tokenizer_filename = \"tokenizer_keras.pkl\""
-      ],
-      "metadata": {
-        "id": "nsRuyzYUfOBg"
-      },
-      "execution_count": 8,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Preprocessing\n"
-      ],
-      "metadata": {
-        "id": "ZDz-Y1LCfQt0"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "if maxOfInstancePerClass != 10000:\n",
-        "  df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)"
-      ],
-      "metadata": {
-        "id": "4r41Z6T_yNND"
-      },
-      "execution_count": 9,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "vGWAgBH87ze8"
-      },
-      "source": [
-        "labels  = df_train[columnClass]\n",
-        "numberOfClasses = labels.nunique()\n",
-        "\n",
-        "if os.path.isfile(path+encoder_filename):    \n",
-        "    # load existing encoder \n",
-        "    with open(path+encoder_filename, 'rb') as file:\n",
-        "      encoder = pickle.load(file)\n",
-        "\n",
-        "else:\n",
-        "  encoder = preprocessing.LabelEncoder()\n",
-        "  encoder.fit(labels)\n",
-        "\n",
-        "  with open(path+encoder_filename, 'wb') as file:\n",
-        "      pickle.dump(encoder, file)\n",
-        "\n",
-        "\n",
-        "labels = encoder.transform(labels)"
-      ],
-      "execution_count": 10,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "encoder.classes_"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "SME4vvVhW9Sn",
-        "outputId": "5774cc43-ef3f-4874-bbef-c68f28ca9880"
-      },
-      "execution_count": 11,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "array(['Agriculture - Economie rustique', 'Anatomie', 'AntiquitÃ©',\n",
-              "       'Architecture', 'Arts et mÃ©tiers', 'Beaux-arts',\n",
-              "       'Belles-lettres - PoÃ©sie', 'Blason', 'CaractÃ¨res', 'Chasse',\n",
-              "       'Chimie', 'Commerce', 'Droit - Jurisprudence',\n",
-              "       'Economie domestique', 'Grammaire', 'GÃ©ographie', 'Histoire',\n",
-              "       'Histoire naturelle', 'Jeu', 'Marine', 'MarÃ©chage - ManÃ¨ge',\n",
-              "       'MathÃ©matiques', 'Mesure', 'Militaire (Art) - Guerre - Arme',\n",
-              "       'MinÃ©ralogie', 'Monnaie', 'Musique', 'MÃ©dailles',\n",
-              "       'MÃ©decine - Chirurgie', 'MÃ©tiers', 'Pharmacie', 'Philosophie',\n",
-              "       'Physique - [Sciences physico-mathÃ©matiques]', 'Politique',\n",
-              "       'PÃªche', 'Religion', 'Spectacle', 'Superstition'], dtype=object)"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 11
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "labels_index = dict(zip(list(encoder.classes_), encoder.transform(list(encoder.classes_))))"
-      ],
-      "metadata": {
-        "id": "nIzWQ2VbW_UO"
-      },
-      "execution_count": 12,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "labels_index"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "4e7ggEGiXC_W",
-        "outputId": "77c1fdd0-1c20-479f-eb47-9473484b424e"
-      },
-      "execution_count": 13,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "{'Agriculture - Economie rustique': 0,\n",
-              " 'Anatomie': 1,\n",
-              " 'AntiquitÃ©': 2,\n",
-              " 'Architecture': 3,\n",
-              " 'Arts et mÃ©tiers': 4,\n",
-              " 'Beaux-arts': 5,\n",
-              " 'Belles-lettres - PoÃ©sie': 6,\n",
-              " 'Blason': 7,\n",
-              " 'CaractÃ¨res': 8,\n",
-              " 'Chasse': 9,\n",
-              " 'Chimie': 10,\n",
-              " 'Commerce': 11,\n",
-              " 'Droit - Jurisprudence': 12,\n",
-              " 'Economie domestique': 13,\n",
-              " 'Grammaire': 14,\n",
-              " 'GÃ©ographie': 15,\n",
-              " 'Histoire': 16,\n",
-              " 'Histoire naturelle': 17,\n",
-              " 'Jeu': 18,\n",
-              " 'Marine': 19,\n",
-              " 'MarÃ©chage - ManÃ¨ge': 20,\n",
-              " 'MathÃ©matiques': 21,\n",
-              " 'Mesure': 22,\n",
-              " 'Militaire (Art) - Guerre - Arme': 23,\n",
-              " 'MinÃ©ralogie': 24,\n",
-              " 'Monnaie': 25,\n",
-              " 'Musique': 26,\n",
-              " 'MÃ©dailles': 27,\n",
-              " 'MÃ©decine - Chirurgie': 28,\n",
-              " 'MÃ©tiers': 29,\n",
-              " 'Pharmacie': 30,\n",
-              " 'Philosophie': 31,\n",
-              " 'Physique - [Sciences physico-mathÃ©matiques]': 32,\n",
-              " 'Politique': 33,\n",
-              " 'PÃªche': 34,\n",
-              " 'Religion': 35,\n",
-              " 'Spectacle': 36,\n",
-              " 'Superstition': 37}"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 13
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Loading pre-trained embeddings\n",
-        "\n",
-        "#### FastText"
-      ],
-      "metadata": {
-        "id": "Xo47i2WdIP7M"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# download FastText (prend trop de place pour le laisser sur le drive)\n",
-        "zip_file_url = \"https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip\"\n",
-        "r = requests.get(zip_file_url)\n",
-        "z = zipfile.ZipFile(io.BytesIO(r.content))\n",
-        "z.extractall()"
-      ],
-      "metadata": {
-        "id": "1yLSez4GIPu9"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "print('loading word embeddings FastText...')\n",
-        "\n",
-        "embeddings_index = {}\n",
-        "f = codecs.open('crawl-300d-2M.vec', encoding='utf-8')\n",
-        "\n",
-        "for line in tqdm(f):\n",
-        "    values = line.rstrip().rsplit(' ')\n",
-        "    word = values[0]\n",
-        "    coefs = np.asarray(values[1:], dtype='float32')\n",
-        "    embeddings_index[word] = coefs\n",
-        "f.close()\n",
-        "\n",
-        "print('found %s word vectors' % len(embeddings_index))"
-      ],
-      "metadata": {
-        "id": "3HqbPxx6IPsV"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "#### GLOVE"
-      ],
-      "metadata": {
-        "id": "y359qxKWIPam"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# download Glove\n",
-        "#zip_file_url = \"https://nlp.stanford.edu/data/glove.6B.zip\"\n",
-        "#r = requests.get(zip_file_url)\n",
-        "#z = zipfile.ZipFile(io.BytesIO(r.content))\n",
-        "#z.extractall()"
-      ],
-      "metadata": {
-        "id": "5e_yuRDFIPKL"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "print('loading word embeddings GLOVE...')\n",
-        "\n",
-        "embeddings_index = {}\n",
-        "f = open(path+\"embeddings/\"+embedding_name+\".txt\", encoding='utf-8')\n",
-        "for line in tqdm(f):\n",
-        "    values = line.split()\n",
-        "    word = values[0]\n",
-        "    coefs = np.asarray(values[1:], dtype='float32')\n",
-        "    embeddings_index[word] = coefs\n",
-        "f.close()\n",
-        "\n",
-        "print('Found %s word vectors.' % len(embeddings_index))"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "9rEI90qGIPHm",
-        "outputId": "8e8abb83-97a8-4465-d10f-ae0e5715f9df"
-      },
-      "execution_count": 15,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "loading word embeddings GLOVE...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "400000it [00:13, 30217.64it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Found 400000 word vectors.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HuUVfklf-dSR"
-      },
-      "source": [
-        "## Training models"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "NTNh6kMTp_eU",
-        "outputId": "aab501a9-16f7-47a9-bf4c-3134a431c114"
-      },
-      "source": [
-        "\n",
-        "raw_docs_train = df_train[columnText].tolist()\n",
-        "\n",
-        "\n",
-        "print(\"pre-processing train data...\")\n",
-        "\n",
-        "if os.path.isfile(path+tokenizer_filename):\n",
-        "  with open(path+tokenizer_filename, 'rb') as file:\n",
-        "      tokenizer = pickle.load(file)\n",
-        "else:\n",
-        "  tokenizer = Tokenizer(num_words = max_nb_words)\n",
-        "  tokenizer.fit_on_texts(raw_docs_train) \n",
-        "\n",
-        "  with open(path+tokenizer_filename, 'wb') as file:\n",
-        "        pickle.dump(tokenizer, file)\n",
-        "\n",
-        "sequences = tokenizer.texts_to_sequences(raw_docs_train)\n",
-        "\n",
-        "word_index = tokenizer.word_index\n",
-        "print(\"dictionary size: \", len(word_index))\n",
-        "\n",
-        "#pad sequences\n",
-        "data = sequence.pad_sequences(sequences, maxlen=max_sequence_length)\n",
-        "\n",
-        "print('Shape of data tensor:', data.shape)\n",
-        "print('Shape of label tensor:', labels.shape)\n",
-        "print(labels)"
-      ],
-      "execution_count": 16,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "pre-processing train data...\n",
-            "dictionary size:  190508\n",
-            "Shape of data tensor: (27381, 512)\n",
-            "Shape of label tensor: (27381,)\n",
-            "[ 0  0  0 ... 37 37 37]\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# split the data into a training set and a validation set\n",
-        "\n",
-        "indices = np.arange(data.shape[0])\n",
-        "np.random.shuffle(indices)\n",
-        "data = data[indices]\n",
-        "labels = labels[indices]\n",
-        "\n",
-        "nb_validation_samples = int(validation_split * data.shape[0])\n",
-        "\n",
-        "x_train = data[:-nb_validation_samples]\n",
-        "y_train = labels[:-nb_validation_samples]\n",
-        "x_val = data[-nb_validation_samples:]\n",
-        "y_val = labels[-nb_validation_samples:]\n"
-      ],
-      "metadata": {
-        "id": "sHYJ4P-YDfFb"
-      },
-      "execution_count": 17,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "wGjQI0YgpQAS",
-        "outputId": "0eb50fb4-92b2-4fca-e277-c531b7b474e9"
-      },
-      "source": [
-        "#embedding matrix\n",
-        "\n",
-        "print('preparing embedding matrix...')\n",
-        "\n",
-        "embedding_matrix = np.zeros((len(word_index)+1, embedding_dim))\n",
-        "\n",
-        "for word, i in word_index.items():\n",
-        "    embedding_vector = embeddings_index.get(word)\n",
-        "    if embedding_vector is not None : \n",
-        "        embedding_matrix[i] = embedding_vector\n"
-      ],
-      "execution_count": 18,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "preparing embedding matrix...\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "\n",
-        "filter_sizes = [2,  3, 5]\n",
-        "drop = 0.5\n",
-        "\n",
-        "embedding_layer = Embedding(len(word_index)+1, embedding_dim, input_length = max_sequence_length,\n",
-        "                    weights=[embedding_matrix], trainable=False)\n",
-        "inputs = Input(shape=(max_sequence_length), dtype='int32')\n",
-        "embedding = embedding_layer(inputs)\n",
-        "\n",
-        "print(embedding.shape)\n",
-        "reshape = Reshape((max_sequence_length, embedding_dim, 1))(embedding)\n",
-        "print(reshape.shape)\n",
-        "\n",
-        "# https://github.com/elvinaqa/Text-Classification-GloVe-CNN\n",
-        "\n",
-        "conv_0 = Conv2D(max_sequence_length, kernel_size=(filter_sizes[0], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)\n",
-        "conv_1 = Conv2D(max_sequence_length, kernel_size=(filter_sizes[1], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)\n",
-        "conv_2 = Conv2D(max_sequence_length, kernel_size=(filter_sizes[2], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)\n",
-        "\n",
-        "maxpool_0 = MaxPool2D(pool_size=(max_sequence_length - filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')(conv_0)\n",
-        "maxpool_1 = MaxPool2D(pool_size=(max_sequence_length - filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')(conv_1)\n",
-        "maxpool_2 = MaxPool2D(pool_size=(max_sequence_length - filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')(conv_2)\n",
-        "\n",
-        "concatenated_tensor = Concatenate(axis=1)([maxpool_0, maxpool_1, maxpool_2])\n",
-        "flatten = Flatten()(concatenated_tensor)\n",
-        "dropout = Dropout(drop)(flatten)\n",
-        "output = Dense(len(labels_index), activation='softmax')(dropout)\n",
-        "\n",
-        "# this creates a model that includes\n",
-        "model = Model(inputs=inputs, outputs=output)\n",
-        "\n",
-        "checkpoint = ModelCheckpoint('weights_cnn_sentece.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='auto')\n",
-        "#adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n",
-        "\n",
-        "model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])\n",
-        "model.summary()"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "OUphqlYJCC9n",
-        "outputId": "d51c889c-5582-411e-a039-ef5911fbeb9a"
-      },
-      "execution_count": 19,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "(None, 512, 100)\n",
-            "(None, 512, 100, 1)\n",
-            "Model: \"model\"\n",
-            "__________________________________________________________________________________________________\n",
-            " Layer (type)                   Output Shape         Param #     Connected to                     \n",
-            "==================================================================================================\n",
-            " input_1 (InputLayer)           [(None, 512)]        0           []                               \n",
-            "                                                                                                  \n",
-            " embedding (Embedding)          (None, 512, 100)     19050900    ['input_1[0][0]']                \n",
-            "                                                                                                  \n",
-            " reshape (Reshape)              (None, 512, 100, 1)  0           ['embedding[0][0]']              \n",
-            "                                                                                                  \n",
-            " conv2d (Conv2D)                (None, 511, 1, 512)  102912      ['reshape[0][0]']                \n",
-            "                                                                                                  \n",
-            " conv2d_1 (Conv2D)              (None, 510, 1, 512)  154112      ['reshape[0][0]']                \n",
-            "                                                                                                  \n",
-            " conv2d_2 (Conv2D)              (None, 508, 1, 512)  256512      ['reshape[0][0]']                \n",
-            "                                                                                                  \n",
-            " max_pooling2d (MaxPooling2D)   (None, 1, 1, 512)    0           ['conv2d[0][0]']                 \n",
-            "                                                                                                  \n",
-            " max_pooling2d_1 (MaxPooling2D)  (None, 1, 1, 512)   0           ['conv2d_1[0][0]']               \n",
-            "                                                                                                  \n",
-            " max_pooling2d_2 (MaxPooling2D)  (None, 1, 1, 512)   0           ['conv2d_2[0][0]']               \n",
-            "                                                                                                  \n",
-            " concatenate (Concatenate)      (None, 3, 1, 512)    0           ['max_pooling2d[0][0]',          \n",
-            "                                                                  'max_pooling2d_1[0][0]',        \n",
-            "                                                                  'max_pooling2d_2[0][0]']        \n",
-            "                                                                                                  \n",
-            " flatten (Flatten)              (None, 1536)         0           ['concatenate[0][0]']            \n",
-            "                                                                                                  \n",
-            " dropout (Dropout)              (None, 1536)         0           ['flatten[0][0]']                \n",
-            "                                                                                                  \n",
-            " dense (Dense)                  (None, 38)           58406       ['dropout[0][0]']                \n",
-            "                                                                                                  \n",
-            "==================================================================================================\n",
-            "Total params: 19,622,842\n",
-            "Trainable params: 571,942\n",
-            "Non-trainable params: 19,050,900\n",
-            "__________________________________________________________________________________________________\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "history = model.fit(x_train, y_train, \n",
-        "                    batch_size=batch_size, \n",
-        "                    epochs=epochs, \n",
-        "                    verbose=1,\n",
-        "                    callbacks=[checkpoint],\n",
-        "                    validation_data=(x_val, y_val))"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "3aUBdLdNCEGK",
-        "outputId": "421839de-e503-4c04-e96f-ed3496c8e0ee"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/10\n",
-            "343/343 [==============================] - ETA: 0s - loss: 2.6879 - acc: 0.3116\n",
-            "Epoch 1: val_acc improved from -inf to 0.54164, saving model to weights_cnn_sentece.hdf5\n",
-            "343/343 [==============================] - 573s 2s/step - loss: 2.6879 - acc: 0.3116 - val_loss: 1.7624 - val_acc: 0.5416\n",
-            "Epoch 2/10\n",
-            "343/343 [==============================] - ETA: 0s - loss: 1.6911 - acc: 0.5385\n",
-            "Epoch 2: val_acc improved from 0.54164 to 0.61158, saving model to weights_cnn_sentece.hdf5\n",
-            "343/343 [==============================] - 559s 2s/step - loss: 1.6911 - acc: 0.5385 - val_loss: 1.4473 - val_acc: 0.6116\n",
-            "Epoch 3/10\n",
-            "177/343 [==============>...............] - ETA: 4:07 - loss: 1.3971 - acc: 0.6153"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "ZcYbQsQEJKLq"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "plt.plot(history.history['acc'])\n",
-        "plt.plot(history.history['val_acc'])\n",
-        "plt.title('model accuracy')\n",
-        "plt.ylabel('accuracy')\n",
-        "plt.xlabel('epoch')\n",
-        "plt.legend(['train', 'validation'], loc='lower right')\n",
-        "plt.show()\n",
-        "\n",
-        "# summarize history for loss\n",
-        "plt.plot(history.history['loss'])\n",
-        "plt.plot(history.history['val_loss'])\n",
-        "plt.title('model loss')\n",
-        "plt.ylabel('loss')\n",
-        "plt.xlabel('epoch')\n",
-        "plt.legend(['train', 'validation'], loc='upper right')\n",
-        "plt.show()"
-      ],
-      "metadata": {
-        "id": "Job-3uMvJKN_"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Uw6YR76p_AF0"
-      },
-      "source": [
-        "## Saving models"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "name = \"cnn_conv2D_\"+embedding_name+\"_s\"+str(maxOfInstancePerClass)"
-      ],
-      "metadata": {
-        "id": "TEzya-KCIyE7"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ykTp9lyRaAma"
-      },
-      "source": [
-        "model.save(path+name+\".h5\")\n"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "5J4xDoqRUSfS"
-      },
-      "source": [
-        "# save embeddings\n",
-        "\n",
-        "# saving embeddings index \n"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HHlEtipG_Cp0"
-      },
-      "source": [
-        "## Loading models"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "fKt8ft1t_Cxx"
-      },
-      "source": [
-        "model = load_model(path+name+\".h5\")\n",
-        "\n",
-        "with open(path+tokenizer_filename, 'rb') as file:\n",
-        "  tokenizer = pickle.load(file)\n",
-        "\n",
-        "with open(path+encoder_filename, 'rb') as file:\n",
-        "  encoder = pickle.load(file)\n"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zbS4poso-3k7"
-      },
-      "source": [
-        "## Evaluation"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n"
-      ],
-      "metadata": {
-        "id": "KWORvsadvBbr"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "test_texts = df_test[columnText].tolist()\n",
-        "test_labels  = df_test[columnClass].tolist()\n",
-        "\n",
-        "test_sequences = tokenizer.texts_to_sequences(test_texts)\n",
-        "test_input = sequence.pad_sequences(test_sequences, maxlen=max_sequence_length)\n",
-        "\n",
-        "# Get predictions\n",
-        "test_predictions_probas = model.predict(test_input)\n",
-        "test_predictions = test_predictions_probas.argmax(axis=-1)"
-      ],
-      "metadata": {
-        "id": "Xr0o-0i5t38G"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "\n",
-        "test_intent_predictions = encoder.inverse_transform(test_predictions)\n",
-        "#test_intent_original = encoder.inverse_transform(test_labels)\n",
-        "\n",
-        "print('accuracy: ', sum(test_intent_predictions == test_labels) / len(test_labels))\n",
-        "print(\"Precision, Recall and F1-Score:\\n\\n\", classification_report(test_labels, test_intent_predictions))\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "lSn8yZ0gt3-d"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "\n",
-        "report = classification_report(test_labels, test_intent_predictions, output_dict = True)\n",
-        "\n",
-        "precision = []\n",
-        "recall = []\n",
-        "f1 = []\n",
-        "support = []\n",
-        "dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
-        "for c in encoder.classes_:\n",
-        "  precision.append(report[c]['precision'])\n",
-        "  recall.append(report[c]['recall'])\n",
-        "  f1.append(report[c]['f1-score'])\n",
-        "  support.append(report[c]['support'])\n",
-        "\n",
-        "accuracy = report['accuracy']\n",
-        "weighted_avg = report['weighted avg']\n",
-        "\n",
-        "\n",
-        "cnf_matrix = confusion_matrix(test_labels, test_intent_predictions)\n",
-        "FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
-        "FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
-        "TP = np.diag(cnf_matrix)\n",
-        "TN = cnf_matrix.sum() - (FP + FN + TP)\n",
-        "\n",
-        "dff['className'] = encoder.classes_\n",
-        "dff['precision'] = precision\n",
-        "dff['recall'] = recall\n",
-        "dff['f1-score'] = f1\n",
-        "dff['support'] = support\n",
-        "dff['FP'] = FP\n",
-        "dff['FN'] = FN\n",
-        "dff['TP'] = TP\n",
-        "dff['TN'] = TN\n",
-        "\n",
-        "\n",
-        "      \n",
-        "content = name + \"\\n\"\n",
-        "print(name)\n",
-        "content += str(weighted_avg) + \"\\n\"\n",
-        "print(weighted_avg)\n",
-        "print(accuracy)\n",
-        "print(dff)\n",
-        "\n",
-        "dff.to_csv(path+\"/reports/report_\"+name+\".csv\", index=False)\n",
-        "\n",
-        "# enregistrer les predictions\n",
-        "pd.DataFrame({'labels': pd.Series(df_test[columnClass]), 'predictions': pd.Series(test_intent_predictions)}).to_csv(path+\"/predictions/predictions_\"+name+\".csv\")\n",
-        "\n",
-        "with open(path+\"/reports/report_\"+name+\".txt\", 'w') as f:\n",
-        "  f.write(content)\n"
-      ],
-      "metadata": {
-        "id": "RQ0LYGuOt4A4"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "Lbwg2H8sJRe7"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "4mX5g55AJRhj"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "bIzOUHZnu8s_"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "AsDsTNWdu8vf"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "m9S-2wbeu8yK"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "qDpAbSWKu80r"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "-D8Gj6kzJRjv"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "Lwz5cO2eJRmD"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "yr_UWq14JRoI"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "WJM6J6_EJRqx"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "CtYR7NTvJRs2"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "Qppm6jATJRvM"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "rK5nK4gyJRx0"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "vSjWwcQKJRz7"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "G9pjdMdNW_KS"
-      },
-      "source": [
-        "predictions = model.predict(word_seq_validation)\n",
-        "predictions = np.argmax(predictions,axis=1)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "IHpVJ79IW_M0",
-        "outputId": "2e1657b3-04d1-42f1-ea8b-9bbcd4744108"
-      },
-      "source": [
-        "report = classification_report(predictions, y_validation, output_dict = True)\n",
-        "\n",
-        "accuracy = report['accuracy']\n",
-        "weighted_avg = report['weighted avg']\n",
-        "\n",
-        "print(accuracy, weighted_avg)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "0.5726683109527725 {'precision': 0.6118028288513718, 'recall': 0.5726683109527725, 'f1-score': 0.5870482221489528, 'support': 10947}\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
-            "  _warn_prf(average, modifier, msg_start, len(result))\n",
-            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
-            "  _warn_prf(average, modifier, msg_start, len(result))\n",
-            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
-            "  _warn_prf(average, modifier, msg_start, len(result))\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "9SKjWffUW_PC"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "LpgkGq-fW_RN"
-      },
-      "source": [
-        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
-        "\n",
-        "encoder = preprocessing.LabelEncoder()\n",
-        "y_test = encoder.fit_transform(df_test[columnClass])\n"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Q9eYqi5SW_Ta",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "31e45f20-583a-4ca6-eac8-21863f6fef5b"
-      },
-      "source": [
-        "raw_docs_test = df_test[columnText].tolist()\n",
-        "\n",
-        "print(\"pre-processing test data...\")\n",
-        "\n",
-        "stop_words = set(stopwords.words('french'))\n",
-        "\n",
-        "processed_docs_test = []\n",
-        "for doc in tqdm(raw_docs_test):\n",
-        "    tokens = word_tokenize(doc, language='french')\n",
-        "    filtered = [word for word in tokens if word not in stop_words]\n",
-        "    processed_docs_test.append(\" \".join(filtered))\n",
-        "#end for\n",
-        "\n",
-        "print(\"tokenizing input data...\")\n",
-        "#tokenizer = Tokenizer(num_words=max_len, lower=True, char_level=False)\n",
-        "#tokenizer.fit_on_texts(processed_docs_train + processed_docs_validation)  #leaky\n",
-        "word_seq_test = tokenizer.texts_to_sequences(processed_docs_test)\n",
-        "\n",
-        "#pad sequences\n",
-        "word_seq_test = sequence.pad_sequences(word_seq_test, maxlen=max_len)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "pre-processing test data...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 13137/13137 [00:09<00:00, 1331.48it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "tokenizing input data...\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "_WjpJN-Bqjeb"
-      },
-      "source": [
-        "predictions = model.predict(word_seq_test)\n",
-        "predictions = np.argmax(predictions,axis=1)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "zUwjL_dQqjgx",
-        "outputId": "912642ad-95eb-413a-d074-8d4881a57359"
-      },
-      "source": [
-        "report = classification_report(predictions, y_test, output_dict = True)\n",
-        "\n",
-        "accuracy = report['accuracy']\n",
-        "weighted_avg = report['weighted avg']\n",
-        "\n",
-        "print(accuracy, weighted_avg)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "0.5698409073608891 {'precision': 0.6081680700148677, 'recall': 0.5698409073608891, 'f1-score': 0.5847417616022411, 'support': 13137}\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
-            "  _warn_prf(average, modifier, msg_start, len(result))\n",
-            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
-            "  _warn_prf(average, modifier, msg_start, len(result))\n",
-            "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
-            "  _warn_prf(average, modifier, msg_start, len(result))\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "ka6DcPe7qqvg",
-        "outputId": "0c8cfbe6-178d-4208-98ba-4ba688e32939"
-      },
-      "source": [
-        "from sklearn.metrics import confusion_matrix\n",
-        "\n",
-        "classesName = encoder.classes_\n",
-        "classes = [str(e) for e in encoder.transform(encoder.classes_)]\n",
-        "\n",
-        "precision = []\n",
-        "recall = []\n",
-        "f1 = []\n",
-        "support = []\n",
-        "dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])\n",
-        "for c in classes:\n",
-        "  precision.append(report[c]['precision'])\n",
-        "  recall.append(report[c]['recall'])\n",
-        "  f1.append(report[c]['f1-score'])\n",
-        "  support.append(report[c]['support'])\n",
-        "\n",
-        "accuracy = report['accuracy']\n",
-        "weighted_avg = report['weighted avg']\n",
-        "\n",
-        "\n",
-        "cnf_matrix = confusion_matrix(y_test, predictions)\n",
-        "FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)\n",
-        "FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)\n",
-        "TP = np.diag(cnf_matrix)\n",
-        "TN = cnf_matrix.sum() - (FP + FN + TP)\n",
-        "\n",
-        "dff['className'] = classesName\n",
-        "dff['precision'] = precision\n",
-        "dff['recall'] = recall\n",
-        "dff['f1-score'] = f1\n",
-        "dff['support'] = support\n",
-        "dff['FP'] = FP\n",
-        "dff['FN'] = FN\n",
-        "dff['TP'] = TP\n",
-        "dff['TN'] = TN\n",
-        "\n",
-        "print(\"test_cnn_s\"+str(maxOfInstancePerClass))\n",
-        "\n",
-        "print(weighted_avg)\n",
-        "print(accuracy)\n",
-        "print(dff)\n",
-        "\n",
-        "dff.to_csv(\"drive/MyDrive/Classification-EDdA/report_test_cnn_s\"+str(maxOfInstancePerClass)+\".csv\", index=False)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "test_cnn_s10000\n",
-            "{'precision': 0.6081680700148677, 'recall': 0.5698409073608891, 'f1-score': 0.5847417616022411, 'support': 13137}\n",
-            "0.5698409073608891\n",
-            "                                      className  precision  ...    TP     TN\n",
-            "0               Agriculture - Economie rustique   0.216535  ...    55  12636\n",
-            "1                                      Anatomie   0.459821  ...   103  12768\n",
-            "2                                     AntiquitÃ©   0.287975  ...    91  12710\n",
-            "3                                  Architecture   0.339623  ...   108  12722\n",
-            "4                               Arts et mÃ©tiers   0.015504  ...     2  12995\n",
-            "5                                    Beaux-arts   0.060000  ...     6  13018\n",
-            "6                       Belles-lettres - PoÃ©sie   0.127660  ...    30  12761\n",
-            "7                                        Blason   0.228571  ...    24  12993\n",
-            "8                                    CaractÃ¨res   0.037037  ...     1  13110\n",
-            "9                                        Chasse   0.221311  ...    27  12962\n",
-            "10                                       Chimie   0.160714  ...    18  12991\n",
-            "11                                     Commerce   0.443418  ...   192  12490\n",
-            "12                        Droit - Jurisprudence   0.762879  ...  1081  11263\n",
-            "13                          Economie domestique   0.000000  ...     0  13102\n",
-            "14                                    Grammaire   0.408929  ...   229  12254\n",
-            "15                                   GÃ©ographie   0.917312  ...  2607   9910\n",
-            "16                                     Histoire   0.405063  ...   288  11777\n",
-            "17                           Histoire naturelle   0.743292  ...   831  11661\n",
-            "18                                          Jeu   0.061538  ...     4  13067\n",
-            "19                                       Marine   0.590805  ...   257  12549\n",
-            "20                           MarÃ©chage - ManÃ¨ge   0.620690  ...    72  13001\n",
-            "21                                MathÃ©matiques   0.549669  ...    83  12903\n",
-            "22                                       Mesure   0.095238  ...     4  13087\n",
-            "23              Militaire (Art) - Guerre - Arme   0.476351  ...   141  12704\n",
-            "24                                  MinÃ©ralogie   0.000000  ...     0  13111\n",
-            "25                                      Monnaie   0.054795  ...     4  13051\n",
-            "26                                      Musique   0.287500  ...    46  12904\n",
-            "27                                    MÃ©dailles   0.000000  ...     0  13107\n",
-            "28                         MÃ©decine - Chirurgie   0.376218  ...   193  12149\n",
-            "29                                      MÃ©tiers   0.605634  ...   731  11047\n",
-            "30                                    Pharmacie   0.070423  ...     5  13045\n",
-            "31                                  Philosophie   0.071429  ...     8  12996\n",
-            "32  Physique - [Sciences physico-mathÃ©matiques]   0.378378  ...   112  12674\n",
-            "33                                    Politique   0.000000  ...     0  13110\n",
-            "34                                        PÃªche   0.170213  ...     8  13069\n",
-            "35                                     Religion   0.326371  ...   125  12488\n",
-            "36                                    Spectacle   0.000000  ...     0  13121\n",
-            "37                                 Superstition   0.000000  ...     0  13112\n",
-            "\n",
-            "[38 rows x 9 columns]\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "BqJ1_hUUqqx5"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "bhfuGNwIqrOQ"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "NkL3MopyqrQk"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "XLHl-pvzqjjI"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "lLR_Xvi9qjlo"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8cGcLOFTqjoP"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "vLGTnit_W_V8"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "R-3lBXjDD9wE"
-      },
-      "source": [
-        "def predict(data, max_len):\n",
-        "  \n",
-        "  pad_sequ_test, _ = prepare_sequence(data, max_len)\n",
-        "  pred_labels_ = model.predict(pad_sequ_test)\n",
-        "\n",
-        "  return np.argmax(pred_labels_,axis=1)\n",
-        "\n",
-        "\n",
-        "def eval(data, labels, max_len):\n",
-        "  \n",
-        "  pred_labels_ = predict(data, max_len)\n",
-        "  report = classification_report(pred_labels_, labels, output_dict = True)\n",
-        "\n",
-        "  accuracy = report['accuracy']\n",
-        "  weighted_avg = report['weighted avg']\n",
-        "  \n",
-        "  print(accuracy, weighted_avg)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "6T3kAvKvExgc",
-        "outputId": "c6d4560e-fc64-4579-9adb-79c2e36d2386"
-      },
-      "source": [
-        "# evaluation sur le jeu de validation\n",
-        "eval(df_validation[columnText], y_validation, max_len)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
-            "  return np.array(self.texts_to_sequences(texts))\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "0.06925290207361841 {'precision': 0.09108131158125257, 'recall': 0.06925290207361841, 'f1-score': 0.06099084715237025, 'support': 10079}\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "pTDJA03_-8yu",
-        "outputId": "d8bcdf73-c4c3-4c88-b063-90bd1cad5122"
-      },
-      "source": [
-        "# evaluation sur le jeu de test\n",
-        "df_test = pd.read_csv(test_path, sep=\"\\t\")\n",
-        "#df_test = resample_classes(df_test, columnClass, maxOfInstancePerClass)\n",
-        "\n",
-        "y_test = df_test[columnClass]\n",
-        "encoder = preprocessing.LabelEncoder()\n",
-        "y_test = encoder.fit_transform(y_test)\n",
-        "\n",
-        "eval(df_test[columnText], y_test, max_len)\n"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "/usr/local/lib/python3.7/dist-packages/zeugma/keras_transformers.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
-            "  return np.array(self.texts_to_sequences(texts))\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "0.07231483595950369 {'precision': 0.081194635559303, 'recall': 0.07231483595950369, 'f1-score': 0.06322383877903374, 'support': 13137}\n"
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file