From cb0b695f407feb1c688aece313293156dff278d5 Mon Sep 17 00:00:00 2001
From: Ludovic Moncla <moncla.ludovic@gmail.com>
Date: Mon, 16 Jan 2023 15:04:30 +0100
Subject: [PATCH] Update Classification_Zero-Shot-Learning.ipynb

---
 .../Classification_Zero-Shot-Learning.ipynb   | 269 ++++++++++++++++--
 1 file changed, 247 insertions(+), 22 deletions(-)

diff --git a/notebooks/Classification_Zero-Shot-Learning.ipynb b/notebooks/Classification_Zero-Shot-Learning.ipynb
index 673bab2..a42e4f4 100644
--- a/notebooks/Classification_Zero-Shot-Learning.ipynb
+++ b/notebooks/Classification_Zero-Shot-Learning.ipynb
@@ -123,7 +123,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 2,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -138,7 +138,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 3,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -146,7 +146,170 @@
         "id": "LRKJzWmf3pCg",
         "outputId": "686c3ef4-8267-4266-95af-7193725aadca"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>normClass</th>\n",
+              "      <th>classEDdA</th>\n",
+              "      <th>author</th>\n",
+              "      <th>id_enccre</th>\n",
+              "      <th>domaine_enccre</th>\n",
+              "      <th>ensemble_domaine_enccre</th>\n",
+              "      <th>content</th>\n",
+              "      <th>contentWithoutClass</th>\n",
+              "      <th>firstParagraph</th>\n",
+              "      <th>nb_word</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>11</td>\n",
+              "      <td>2973</td>\n",
+              "      <td>ORNIS</td>\n",
+              "      <td>Commerce</td>\n",
+              "      <td>Comm.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v11-1767-0</td>\n",
+              "      <td>commerce</td>\n",
+              "      <td>Commerce</td>\n",
+              "      <td>ORNIS, s. m. toile des Indes, (Comm.) sortes d...</td>\n",
+              "      <td>ORNIS, s. m. toile des Indes, () sortes de\\nto...</td>\n",
+              "      <td>ORNIS, s. m. toile des Indes, () sortes de\\nto...</td>\n",
+              "      <td>45</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>3</td>\n",
+              "      <td>3525</td>\n",
+              "      <td>COMPRENDRE</td>\n",
+              "      <td>Philosophie</td>\n",
+              "      <td>terme de Philosophie,</td>\n",
+              "      <td>Diderot</td>\n",
+              "      <td>v3-1722-0</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>* COMPRENDRE, v. act. terme de Philosophie,\\nc...</td>\n",
+              "      <td>* COMPRENDRE, v. act. \\nc'est appercevoir la l...</td>\n",
+              "      <td>* COMPRENDRE, v. act. \\nc'est appercevoir la l...</td>\n",
+              "      <td>92</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>1</td>\n",
+              "      <td>2560</td>\n",
+              "      <td>ANCRE</td>\n",
+              "      <td>Marine</td>\n",
+              "      <td>Marine</td>\n",
+              "      <td>d'Alembert &amp; Diderot</td>\n",
+              "      <td>v1-1865-0</td>\n",
+              "      <td>marine</td>\n",
+              "      <td>Marine</td>\n",
+              "      <td>ANCRE, s. f. (Marine.) est un instrument de fe...</td>\n",
+              "      <td>ANCRE, s. f. (.) est un instrument de fer\\nABC...</td>\n",
+              "      <td>ANCRE, s. f. (.) est un instrument de fer\\nABC...</td>\n",
+              "      <td>3327</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>16</td>\n",
+              "      <td>4241</td>\n",
+              "      <td>VAKEBARO</td>\n",
+              "      <td>Géographie moderne</td>\n",
+              "      <td>Géog. mod.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v16-2587-0</td>\n",
+              "      <td>géographie</td>\n",
+              "      <td>Géographie</td>\n",
+              "      <td>VAKEBARO, (Géog. mod.) vallée du royaume\\nd'Es...</td>\n",
+              "      <td>VAKEBARO, () vallée du royaume\\nd'Espagne dans...</td>\n",
+              "      <td>VAKEBARO, () vallée du royaume\\nd'Espagne dans...</td>\n",
+              "      <td>34</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>8</td>\n",
+              "      <td>3281</td>\n",
+              "      <td>INSPECTEUR</td>\n",
+              "      <td>Histoire ancienne</td>\n",
+              "      <td>Hist. anc.</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>v8-2533-0</td>\n",
+              "      <td>histoire</td>\n",
+              "      <td>Histoire</td>\n",
+              "      <td>INSPECTEUR, s. m. inspector ; (Hist. anc.) cel...</td>\n",
+              "      <td>INSPECTEUR, s. m. inspector ; () celui \\nà qui...</td>\n",
+              "      <td>INSPECTEUR, s. m. inspector ; () celui \\nà qui...</td>\n",
+              "      <td>102</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   volume  numero        head           normClass              classEDdA  \\\n",
+              "0      11    2973       ORNIS            Commerce                  Comm.   \n",
+              "1       3    3525  COMPRENDRE         Philosophie  terme de Philosophie,   \n",
+              "2       1    2560       ANCRE              Marine                 Marine   \n",
+              "3      16    4241    VAKEBARO  Géographie moderne             Géog. mod.   \n",
+              "4       8    3281  INSPECTEUR   Histoire ancienne             Hist. anc.   \n",
+              "\n",
+              "                 author   id_enccre domaine_enccre ensemble_domaine_enccre  \\\n",
+              "0              unsigned  v11-1767-0       commerce                Commerce   \n",
+              "1               Diderot   v3-1722-0            NaN                     NaN   \n",
+              "2  d'Alembert & Diderot   v1-1865-0         marine                  Marine   \n",
+              "3              unsigned  v16-2587-0     géographie              Géographie   \n",
+              "4              unsigned   v8-2533-0       histoire                Histoire   \n",
+              "\n",
+              "                                             content  \\\n",
+              "0  ORNIS, s. m. toile des Indes, (Comm.) sortes d...   \n",
+              "1  * COMPRENDRE, v. act. terme de Philosophie,\\nc...   \n",
+              "2  ANCRE, s. f. (Marine.) est un instrument de fe...   \n",
+              "3  VAKEBARO, (Géog. mod.) vallée du royaume\\nd'Es...   \n",
+              "4  INSPECTEUR, s. m. inspector ; (Hist. anc.) cel...   \n",
+              "\n",
+              "                                 contentWithoutClass  \\\n",
+              "0  ORNIS, s. m. toile des Indes, () sortes de\\nto...   \n",
+              "1  * COMPRENDRE, v. act. \\nc'est appercevoir la l...   \n",
+              "2  ANCRE, s. f. (.) est un instrument de fer\\nABC...   \n",
+              "3  VAKEBARO, () vallée du royaume\\nd'Espagne dans...   \n",
+              "4  INSPECTEUR, s. m. inspector ; () celui \\nà qui...   \n",
+              "\n",
+              "                                      firstParagraph  nb_word  \n",
+              "0  ORNIS, s. m. toile des Indes, () sortes de\\nto...       45  \n",
+              "1  * COMPRENDRE, v. act. \\nc'est appercevoir la l...       92  \n",
+              "2  ANCRE, s. f. (.) est un instrument de fer\\nABC...     3327  \n",
+              "3  VAKEBARO, () vallée du royaume\\nd'Espagne dans...       34  \n",
+              "4  INSPECTEUR, s. m. inspector ; () celui \\nà qui...      102  "
+            ]
+          },
+          "execution_count": 3,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "df = pd.read_csv(input_path + test_set_path, sep=\"\\t\")\n",
         "df.head()"
@@ -154,16 +317,27 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 4,
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "(15854, 13)"
+            ]
+          },
+          "execution_count": 4,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "df.shape"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -174,7 +348,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 6,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -183,18 +357,77 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 7,
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "(13441, 13)"
+            ]
+          },
+          "execution_count": 7,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "df.shape"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 8,
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "['Commerce',\n",
+              " 'Marine',\n",
+              " 'Géographie',\n",
+              " 'Histoire',\n",
+              " 'Belles-lettres - Poésie',\n",
+              " 'Economie domestique',\n",
+              " 'Droit - Jurisprudence',\n",
+              " 'Médecine - Chirurgie',\n",
+              " 'Militaire (Art) - Guerre - Arme',\n",
+              " 'Beaux-arts',\n",
+              " 'Antiquité',\n",
+              " 'Histoire naturelle',\n",
+              " 'Grammaire',\n",
+              " 'Philosophie',\n",
+              " 'Arts et métiers',\n",
+              " 'Pharmacie',\n",
+              " 'Religion',\n",
+              " 'Pêche',\n",
+              " 'Anatomie',\n",
+              " 'Architecture',\n",
+              " 'Musique',\n",
+              " 'Jeu',\n",
+              " 'Caractères',\n",
+              " 'Métiers',\n",
+              " 'Physique - [Sciences physico-mathématiques]',\n",
+              " 'Maréchage - Manège',\n",
+              " 'Chimie',\n",
+              " 'Blason',\n",
+              " 'Chasse',\n",
+              " 'Mathématiques',\n",
+              " 'Médailles',\n",
+              " 'Superstition',\n",
+              " 'Agriculture - Economie rustique',\n",
+              " 'Mesure',\n",
+              " 'Monnaie',\n",
+              " 'Minéralogie',\n",
+              " 'Politique',\n",
+              " 'Spectacle']"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "classes = df[column_class].unique().tolist()\n",
         "classes"
@@ -218,7 +451,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 9,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -259,7 +492,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 10,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -318,15 +551,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "df[column_text].tolist()[0]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
+        "# test\n",
         "premise = df[column_text].tolist()[0]\n",
         "\n",
         "true_probs = zero_shot_prediction(premise, classes)\n",
-- 
GitLab