diff --git a/notebooks/Classification_Zero-Shot-Learning.ipynb b/notebooks/Classification_Zero-Shot-Learning.ipynb index 673bab2bdcbf525f967104dfa5e5f466e6db39ac..a42e4f47dfc932dcbb78a454aa256d29542c75f6 100644 --- a/notebooks/Classification_Zero-Shot-Learning.ipynb +++ b/notebooks/Classification_Zero-Shot-Learning.ipynb @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -138,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -146,7 +146,170 @@ "id": "LRKJzWmf3pCg", "outputId": "686c3ef4-8267-4266-95af-7193725aadca" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>volume</th>\n", + " <th>numero</th>\n", + " <th>head</th>\n", + " <th>normClass</th>\n", + " <th>classEDdA</th>\n", + " <th>author</th>\n", + " <th>id_enccre</th>\n", + " <th>domaine_enccre</th>\n", + " <th>ensemble_domaine_enccre</th>\n", + " <th>content</th>\n", + " <th>contentWithoutClass</th>\n", + " <th>firstParagraph</th>\n", + " <th>nb_word</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>11</td>\n", + " <td>2973</td>\n", + " <td>ORNIS</td>\n", + " <td>Commerce</td>\n", + " <td>Comm.</td>\n", + " <td>unsigned</td>\n", + " <td>v11-1767-0</td>\n", + " <td>commerce</td>\n", + " <td>Commerce</td>\n", + " <td>ORNIS, s. m. toile des Indes, (Comm.) sortes d...</td>\n", + " <td>ORNIS, s. m. toile des Indes, () sortes de\\nto...</td>\n", + " <td>ORNIS, s. m. toile des Indes, () sortes de\\nto...</td>\n", + " <td>45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>3</td>\n", + " <td>3525</td>\n", + " <td>COMPRENDRE</td>\n", + " <td>Philosophie</td>\n", + " <td>terme de Philosophie,</td>\n", + " <td>Diderot</td>\n", + " <td>v3-1722-0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>* COMPRENDRE, v. act. terme de Philosophie,\\nc...</td>\n", + " <td>* COMPRENDRE, v. act. \\nc'est appercevoir la l...</td>\n", + " <td>* COMPRENDRE, v. act. \\nc'est appercevoir la l...</td>\n", + " <td>92</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1</td>\n", + " <td>2560</td>\n", + " <td>ANCRE</td>\n", + " <td>Marine</td>\n", + " <td>Marine</td>\n", + " <td>d'Alembert & Diderot</td>\n", + " <td>v1-1865-0</td>\n", + " <td>marine</td>\n", + " <td>Marine</td>\n", + " <td>ANCRE, s. f. (Marine.) est un instrument de fe...</td>\n", + " <td>ANCRE, s. f. (.) est un instrument de fer\\nABC...</td>\n", + " <td>ANCRE, s. f. (.) est un instrument de fer\\nABC...</td>\n", + " <td>3327</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>16</td>\n", + " <td>4241</td>\n", + " <td>VAKEBARO</td>\n", + " <td>Géographie moderne</td>\n", + " <td>Géog. mod.</td>\n", + " <td>unsigned</td>\n", + " <td>v16-2587-0</td>\n", + " <td>géographie</td>\n", + " <td>Géographie</td>\n", + " <td>VAKEBARO, (Géog. mod.) vallée du royaume\\nd'Es...</td>\n", + " <td>VAKEBARO, () vallée du royaume\\nd'Espagne dans...</td>\n", + " <td>VAKEBARO, () vallée du royaume\\nd'Espagne dans...</td>\n", + " <td>34</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>8</td>\n", + " <td>3281</td>\n", + " <td>INSPECTEUR</td>\n", + " <td>Histoire ancienne</td>\n", + " <td>Hist. anc.</td>\n", + " <td>unsigned</td>\n", + " <td>v8-2533-0</td>\n", + " <td>histoire</td>\n", + " <td>Histoire</td>\n", + " <td>INSPECTEUR, s. m. inspector ; (Hist. anc.) cel...</td>\n", + " <td>INSPECTEUR, s. m. inspector ; () celui \\nà qui...</td>\n", + " <td>INSPECTEUR, s. m. inspector ; () celui \\nà qui...</td>\n", + " <td>102</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " volume numero head normClass classEDdA \\\n", + "0 11 2973 ORNIS Commerce Comm. \n", + "1 3 3525 COMPRENDRE Philosophie terme de Philosophie, \n", + "2 1 2560 ANCRE Marine Marine \n", + "3 16 4241 VAKEBARO Géographie moderne Géog. mod. \n", + "4 8 3281 INSPECTEUR Histoire ancienne Hist. anc. \n", + "\n", + " author id_enccre domaine_enccre ensemble_domaine_enccre \\\n", + "0 unsigned v11-1767-0 commerce Commerce \n", + "1 Diderot v3-1722-0 NaN NaN \n", + "2 d'Alembert & Diderot v1-1865-0 marine Marine \n", + "3 unsigned v16-2587-0 géographie Géographie \n", + "4 unsigned v8-2533-0 histoire Histoire \n", + "\n", + " content \\\n", + "0 ORNIS, s. m. toile des Indes, (Comm.) sortes d... \n", + "1 * COMPRENDRE, v. act. terme de Philosophie,\\nc... \n", + "2 ANCRE, s. f. (Marine.) est un instrument de fe... \n", + "3 VAKEBARO, (Géog. mod.) vallée du royaume\\nd'Es... \n", + "4 INSPECTEUR, s. m. inspector ; (Hist. anc.) cel... \n", + "\n", + " contentWithoutClass \\\n", + "0 ORNIS, s. m. toile des Indes, () sortes de\\nto... \n", + "1 * COMPRENDRE, v. act. \\nc'est appercevoir la l... \n", + "2 ANCRE, s. f. (.) est un instrument de fer\\nABC... \n", + "3 VAKEBARO, () vallée du royaume\\nd'Espagne dans... \n", + "4 INSPECTEUR, s. m. inspector ; () celui \\nà qui... \n", + "\n", + " firstParagraph nb_word \n", + "0 ORNIS, s. m. toile des Indes, () sortes de\\nto... 45 \n", + "1 * COMPRENDRE, v. act. \\nc'est appercevoir la l... 92 \n", + "2 ANCRE, s. f. (.) est un instrument de fer\\nABC... 3327 \n", + "3 VAKEBARO, () vallée du royaume\\nd'Espagne dans... 34 \n", + "4 INSPECTEUR, s. m. inspector ; () celui \\nà qui... 102 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.read_csv(input_path + test_set_path, sep=\"\\t\")\n", "df.head()" @@ -154,16 +317,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(15854, 13)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.shape" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -174,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -183,18 +357,77 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(13441, 13)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.shape" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Commerce',\n", + " 'Marine',\n", + " 'Géographie',\n", + " 'Histoire',\n", + " 'Belles-lettres - Poésie',\n", + " 'Economie domestique',\n", + " 'Droit - Jurisprudence',\n", + " 'Médecine - Chirurgie',\n", + " 'Militaire (Art) - Guerre - Arme',\n", + " 'Beaux-arts',\n", + " 'Antiquité',\n", + " 'Histoire naturelle',\n", + " 'Grammaire',\n", + " 'Philosophie',\n", + " 'Arts et métiers',\n", + " 'Pharmacie',\n", + " 'Religion',\n", + " 'Pêche',\n", + " 'Anatomie',\n", + " 'Architecture',\n", + " 'Musique',\n", + " 'Jeu',\n", + " 'Caractères',\n", + " 'Métiers',\n", + " 'Physique - [Sciences physico-mathématiques]',\n", + " 'Maréchage - Manège',\n", + " 'Chimie',\n", + " 'Blason',\n", + " 'Chasse',\n", + " 'Mathématiques',\n", + " 'Médailles',\n", + " 'Superstition',\n", + " 'Agriculture - Economie rustique',\n", + " 'Mesure',\n", + " 'Monnaie',\n", + " 'Minéralogie',\n", + " 'Politique',\n", + " 'Spectacle']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "classes = df[column_class].unique().tolist()\n", "classes" @@ -218,7 +451,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -259,7 +492,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -318,15 +551,7 @@ "metadata": {}, "outputs": [], "source": [ - "df[column_text].tolist()[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "# test\n", "premise = df[column_text].tolist()[0]\n", "\n", "true_probs = zero_shot_prediction(premise, classes)\n",