From 3c9033281ca80b6b0ffbf9e09602264fc46a2a06 Mon Sep 17 00:00:00 2001
From: Ludovic Moncla <moncla.ludovic@gmail.com>
Date: Fri, 18 Nov 2022 10:06:03 +0100
Subject: [PATCH] Update Classification_BertFineTuning.ipynb

---
 notebooks/Classification_BertFineTuning.ipynb | 635 +-----------------
 1 file changed, 25 insertions(+), 610 deletions(-)

diff --git a/notebooks/Classification_BertFineTuning.ipynb b/notebooks/Classification_BertFineTuning.ipynb
index 6e533e7..784f96c 100644
--- a/notebooks/Classification_BertFineTuning.ipynb
+++ b/notebooks/Classification_BertFineTuning.ipynb
@@ -67,7 +67,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -75,15 +75,7 @@
         "id": "dPOU-Efhf4ui",
         "outputId": "121dd21e-f98c-483d-d6d1-2838f732a4e2"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "We will use the GPU\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "import torch\n",
         "\n",
@@ -139,7 +131,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": null,
       "metadata": {
         "id": "SkErnwgMMbRj"
       },
@@ -181,7 +173,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": null,
       "metadata": {
         "id": "WkIVcabUgxIl"
       },
@@ -246,7 +238,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": null,
       "metadata": {
         "id": "7JEnKknRoClH"
       },
@@ -258,7 +250,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -267,170 +259,7 @@
         "id": "5u1acjunhoxe",
         "outputId": "3038048d-6506-473d-85c9-2d3ebdcc6a72"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>volume</th>\n",
-              "      <th>numero</th>\n",
-              "      <th>head</th>\n",
-              "      <th>normClass</th>\n",
-              "      <th>classEDdA</th>\n",
-              "      <th>author</th>\n",
-              "      <th>id_enccre</th>\n",
-              "      <th>domaine_enccre</th>\n",
-              "      <th>ensemble_domaine_enccre</th>\n",
-              "      <th>content</th>\n",
-              "      <th>contentWithoutClass</th>\n",
-              "      <th>firstParagraph</th>\n",
-              "      <th>nb_words</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>15</td>\n",
-              "      <td>5</td>\n",
-              "      <td>SENACULE</td>\n",
-              "      <td>AntiquitÃ© romaine</td>\n",
-              "      <td>Antiq. rom.</td>\n",
-              "      <td>Jaucourt</td>\n",
-              "      <td>v15-4-0</td>\n",
-              "      <td>antiquitÃ©</td>\n",
-              "      <td>AntiquitÃ©</td>\n",
-              "      <td>SENACULE, s. m. (Antiq. rom.) senaculum:\\nlieu...</td>\n",
-              "      <td>senacule s. m.   senaculum \\n lieu oÃ¹ tenoit s...</td>\n",
-              "      <td>senacule s. m.   senaculum \\n lieu oÃ¹ tenoit s...</td>\n",
-              "      <td>91</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>8</td>\n",
-              "      <td>3509</td>\n",
-              "      <td>Investir</td>\n",
-              "      <td>Marine</td>\n",
-              "      <td>Marine.</td>\n",
-              "      <td>Le Blond</td>\n",
-              "      <td>v8-2689-1</td>\n",
-              "      <td>marine</td>\n",
-              "      <td>Marine</td>\n",
-              "      <td>Investir, (Marine.) se dit parmi les matelots\\...</td>\n",
-              "      <td>investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chou...</td>\n",
-              "      <td>investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chou...</td>\n",
-              "      <td>30</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>2</td>\n",
-              "      <td>3428</td>\n",
-              "      <td>BOYARDS, ou BOJARES, ou BOJARDS</td>\n",
-              "      <td>Histoire moderne</td>\n",
-              "      <td>Hist. mod.</td>\n",
-              "      <td>Mallet</td>\n",
-              "      <td>v2-2041-0</td>\n",
-              "      <td>histoire</td>\n",
-              "      <td>Histoire</td>\n",
-              "      <td>BOYARDS, ou BOJARES, ou BOJARDS, s. m.\\npl. (H...</td>\n",
-              "      <td>boyard bojares bojards s. m. \\n pl   nom donne...</td>\n",
-              "      <td>boyard bojares bojards s. m. \\n pl   nom donne...</td>\n",
-              "      <td>218</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>2</td>\n",
-              "      <td>6532</td>\n",
-              "      <td>Cerceau</td>\n",
-              "      <td>Tonnelier</td>\n",
-              "      <td>Tonneliers</td>\n",
-              "      <td>unsigned</td>\n",
-              "      <td>v2-4266-3</td>\n",
-              "      <td>tonnelier</td>\n",
-              "      <td>MÃ©tiers</td>\n",
-              "      <td>Cerceau, c'est un lien de bois qui se plie fac...</td>\n",
-              "      <td>cerceau lien bois plie facilement \\n   servent...</td>\n",
-              "      <td>cerceau lien bois plie facilement \\n   servent...</td>\n",
-              "      <td>229</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>8</td>\n",
-              "      <td>1827</td>\n",
-              "      <td>HIERONYMITES, ou HERMITES DE S. JEROME</td>\n",
-              "      <td>unclassified</td>\n",
-              "      <td>unclassified</td>\n",
-              "      <td>unsigned</td>\n",
-              "      <td>v8-1404-0</td>\n",
-              "      <td>histoireecclÃ©siastique</td>\n",
-              "      <td>Religion</td>\n",
-              "      <td>HIERONYMITES, ou HERMITES DE S. JEROME, Voyez ...</td>\n",
-              "      <td>hieronymites hermites s. jerome jeronymites he...</td>\n",
-              "      <td>hieronymites hermites s. jerome jeronymites he...</td>\n",
-              "      <td>34</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   volume  numero                                    head          normClass  \\\n",
-              "0      15       5                                SENACULE  AntiquitÃ© romaine   \n",
-              "1       8    3509                                Investir             Marine   \n",
-              "2       2    3428         BOYARDS, ou BOJARES, ou BOJARDS   Histoire moderne   \n",
-              "3       2    6532                                 Cerceau          Tonnelier   \n",
-              "4       8    1827  HIERONYMITES, ou HERMITES DE S. JEROME       unclassified   \n",
-              "\n",
-              "      classEDdA    author  id_enccre          domaine_enccre  \\\n",
-              "0   Antiq. rom.  Jaucourt    v15-4-0               antiquitÃ©   \n",
-              "1       Marine.  Le Blond  v8-2689-1                  marine   \n",
-              "2    Hist. mod.    Mallet  v2-2041-0                histoire   \n",
-              "3    Tonneliers  unsigned  v2-4266-3               tonnelier   \n",
-              "4  unclassified  unsigned  v8-1404-0  histoireecclÃ©siastique   \n",
-              "\n",
-              "  ensemble_domaine_enccre                                            content  \\\n",
-              "0               AntiquitÃ©  SENACULE, s. m. (Antiq. rom.) senaculum:\\nlieu...   \n",
-              "1                  Marine  Investir, (Marine.) se dit parmi les matelots\\...   \n",
-              "2                Histoire  BOYARDS, ou BOJARES, ou BOJARDS, s. m.\\npl. (H...   \n",
-              "3                 MÃ©tiers  Cerceau, c'est un lien de bois qui se plie fac...   \n",
-              "4                Religion  HIERONYMITES, ou HERMITES DE S. JEROME, Voyez ...   \n",
-              "\n",
-              "                                 contentWithoutClass  \\\n",
-              "0  senacule s. m.   senaculum \\n lieu oÃ¹ tenoit s...   \n",
-              "1  investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chou...   \n",
-              "2  boyard bojares bojards s. m. \\n pl   nom donne...   \n",
-              "3  cerceau lien bois plie facilement \\n   servent...   \n",
-              "4  hieronymites hermites s. jerome jeronymites he...   \n",
-              "\n",
-              "                                      firstParagraph  nb_words  \n",
-              "0  senacule s. m.   senaculum \\n lieu oÃ¹ tenoit s...        91  \n",
-              "1  investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chou...        30  \n",
-              "2  boyard bojares bojards s. m. \\n pl   nom donne...       218  \n",
-              "3  cerceau lien bois plie facilement \\n   servent...       229  \n",
-              "4  hieronymites hermites s. jerome jeronymites he...        34  "
-            ]
-          },
-          "execution_count": 5,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "df_train = pd.read_csv(train_path, sep=\"\\t\")\n",
         "df_train.head()"
@@ -438,7 +267,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -446,15 +275,7 @@
         "id": "zj3JDoJNfx1f",
         "outputId": "f1ec1fcf-b287-460a-8110-dbb00091c203"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "(46807, 13)\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "print(df_train.shape)"
       ]
@@ -470,7 +291,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 24,
+      "execution_count": null,
       "metadata": {
         "id": "I0OrfFsBn4Io"
       },
@@ -515,7 +336,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 25,
+      "execution_count": null,
       "metadata": {
         "id": "zrjZvs2dhzAy"
       },
@@ -543,7 +364,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 26,
+      "execution_count": null,
       "metadata": {
         "id": "Xt_PhH_6h1_3"
       },
@@ -555,7 +376,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 27,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -563,25 +384,7 @@
         "id": "Dq_KF5WAsbpC",
         "outputId": "7925ce5a-4b9e-4147-fdc1-f2916d0e6600"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "array(['senacule s. m.   senaculum \\n lieu oÃ¹ tenoit sÃ©nat rome \\n sÃ©nacules endroit oÃ¹ corps illustre assembloit \\n capitole forum \\n porte capÃ¨ne troisieme prÃ¨s temple \\n bellone cirque flaminien empereur hÃ©liogabale bÃ¢tir lieu assemblÃ©e dame \\n lieu appellÃ© senaculum matronarum d. j.',\n",
-              "       'investir   parmi matelot \\n mÃ©diterranÃ©e Ã©chouer toucher \\n cÃ´te banc sable q',\n",
-              "       'boyard bojares bojards s. m. \\n pl   nom donne grand seigneur \\n moscovie becman boyard \\n chez russiens chose haute noblesse \\n pays auteur ajoÃ»te \\n acte public czar nomme boyard \\n avant waivodes waivodes \\n olÃ©arius voyage moscovie \\n grand principal membre conseil Ã©tat \\n moscou magnifiques hÃ´tel \\n obligÃ© suivre prince voyage \\n jour cÃ©rÃ©monie vÃªtus \\n tunique brocard enrichie perle couvert \\n grand bonnet fourrÃ© renard noir \\n prÃ©sident tribunal justice depuis \\n czar pierre ier a tirÃ© russie grossieretÃ© \\n oÃ¹ plongÃ©e a laissÃ© boyard \\n titre noblesse joÃ¼issent grand \\n considÃ©ration paroÃ®t ayent grand \\n part gouvernement gramme',\n",
-              "       ...,\n",
-              "       'salta gÃ©og mod ville ouverte amÃ©rique mÃ©ridional tueman petit riviere \\n midi s. salvador 15 lieue estreco quoique ville petit commerce \\n beaucoup avantageusement pÃ©rou blÃ© \\n farine bÃ©tail vin chair salÃ©e c. latit \\n mÃ©ridional 24 56 d. j.',\n",
-              "       'hydrochoos s. m.   constellation \\n nomme latin aquarius franÃ§ois \\n verseau signe zodiaque \\n composÃ© trente Ã©toile soleil \\n moi janvier tire nom grec latin \\n ordinairement pluvieux grece \\n italie nom franÃ§ois rÃ©pond \\n idÃ©e verseau d.',\n",
-              "       \"bois chauffage bois chauffage \\n flottÃ© marchand bois \\n embarquent port rivieres navigables \\n bois amenÃ©s charroi empilent \\n ensuite thÃ©atre voit \\n port place ville pari a accordÃ© \\n usage chantier sorte marchand \\n font guere tiers provision \\n ville c. \\n marchand bois flottÃ© font \\n venir bois province Ã©loignÃ©es \\n jettent abord bois perdu ruisseau entrent \\n rivieres lesquelles commerce \\n Ã©tabli ensuite mÃªme rivieres amenent \\n mÃªme encore bois perdu endroit \\n oÃ¹ possible mettre train conduire \\n pari nÃ©anmoins avoir rÃ©tirÃ©s \\n eau avant flotter train avoir \\n sÃ©cher suffisamment sans quoi bois iroit fond \\n marchand font tiers provision \\n a quelques siecles apprÃ©hension \\n pari manquÃ¢t jour bois chauffage forÃªt environs dÃ©truisoient \\n prÃ©voyoit jour faudroit transporter bois \\n province Ã©loignÃ©es rendroit marchandise \\n utile usage gÃ©nÃ©ral prix \\n exorbitant occasionnÃ© coÃ»t charroi \\n demandÃ© alors plÃ»part sentent \\n moins aujourd'hui mÃ©rite invention \\n flottage bois comment pourroit remÃ©dier \\n terrible inconvÃ©nient menacÃ© \\n auroient crois bien embarrassÃ©s accroissement \\n entretien forÃªt \\n apparence unique ressource effet \\n moyen long coÃ»teux pÃ©nibles rÃ©duisit \\n alors prudence gouvernement capitale \\n\\n\\n  point devenir beaucoup moins \\n habitÃ©e chÃ©retÃ© bois nommÃ© jean \\n rouvet bourgeois pari imagina 1549 rassembler \\n eau plusieurs ruisseau rivieres \\n non navigables jetter bois coupÃ© \\n forÃªt Ã©loignÃ©es descendre \\n grand rivieres lÃ  former train \\n amener flot sans bateau pari ose assÃ»rer invention utile \\n royaume plusieurs bataille gagnÃ©es mÃ©ritoit \\n honneur autant moins aucune beau action \\n jean rouvet premier essai morvant \\n rassembla ruisseau contrÃ©e \\n couper bois abandonna hardiment courant \\n eau rÃ©ussit projet traitÃ© folie \\n avant exÃ©cution traversÃ© succÃ¨s \\n coÃ»tume portÃ© perfection \\n reÃ§ut Ã©tendue susceptible \\n 1566 renÃ© arnoul article \\n train maniere construire voyent \\n arriver pari longue masse bois effrayÃ©s \\n conduisent approche \\n pont a guere remontent \\n Ã©tendue vÃ»es intrÃ©piditÃ© premier inventeur \\n osa rassembler eau grand frai \\n jetter ensuite reste fortune \\n marchand bois flottÃ© un \\n bourgeois forain a beaucoup \\n bourgeois forain fassent commerce \\n bois vient pays amont contraire \\n a beaucoup forain bourgeois \\n fassent commerce pays aval \\n concerne bois chauffage rÃ©duit \\n faÃ§on tems tirer vente voiture \\n dÃ©chargeage diligence voiture \\n arrivÃ©e vente chantier \\n officier veillent \\n faÃ§on enjoint donner bois \\n brÃ»ler piÃ©s demi longueur bois \\n moule dix-huit pouce tour bois corde \\n quartier traverse autant bois quartier \\n traverse fendu a dix-huit pouce tour \\n mesure moule a va \\n bois corde membrure bois taillis \\n avoir pouce tour bois andelle a \\n grosseur court a piÃ©s \\n demi environ \\n sortie vente marchand tenu \\n couper sortir bois vente tems \\n fixÃ© Ã©gard lieux \\n qualitÃ© arpens \\n voiture permis voiturer depuis forÃªt \\n rivieres travers terre \\n avertissant jour auparavant publication \\n prÃ´ne jetter bois rivieres \\n pousser ruisseau Ã©tang fossÃ© chÃ¢teau \\n c. sans puissent empÃªchÃ©s \\n \\n diligence dÃ©fendu sÃ©journer chemin \\n sans nÃ©cessitÃ© dÃ©charger ailleurs pari \\n vente enjoint mettre chantier \\n peuvent vendu ailleurs \\n officier ville commet personne \\n veiller distribution diffÃ©rence \\n a bois chauffage flotte \\n tire taille voiture mesure \\n relativement taille distribue gros bois \\n menu bois voiture bois bois \\n flottÃ© mesure bois moule compte \\n bois corde \\n gros bois compris sou nom gÃ©nÃ©rique \\n bÃ»che chaque bÃ»che bois \\n avoir dÃ©jÃ  \\n piÃ©s demi long \\n grosse bÃ»che nommÃ©es bois moule moulure compte mesurent \\n moule anneau anneau \\n doivent avoir dix-huit pouce tour \\n bois traverse suit immÃ©diatement grosseur \\n bois compte moule avoir dix-sept \\n pouce tour a comprennent sou \\n dÃ©nomination bois blanc \\n appelle bois taillis a \\n pouce tour \\n bois corde avoir moins dix-sept pouce \\n appellÃ© bois corde bucherons plantent corde pieu quarrÃ© \\n cÃ´tÃ© a piÃ©s chaque pieu a piÃ©s \\n haut -lÃ  mesure corde contient \\n voit foi 64 256 piÃ©s cube bois \\n mÃ©thode mesurer bois a durÃ© \\n 1641 ordonnÃ© servir membrure \\n charpente retint nom corde \\n corde membrure \\n menu bois coteret fagot bourrÃ©e \\n a coterets bois taillis fendu coterets bois rond \\n ci viennent yonne doivent \\n avoir un piÃ©s long dixsept \\n dix-huit pouce tour \\n fagot fait branche arbre menues \\n doivent avoir piÃ©s demi long dixsept \\n dix-huit pouce tour \\n bourrÃ©e espece fagot faite \\n brossailles Ã©pine ronce c. \\n voici encore quelques dÃ©nomination donne \\n bois chauffage\"],\n",
-              "      dtype=object)"
-            ]
-          },
-          "execution_count": 27,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "sentences_train"
       ]
@@ -598,7 +401,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 28,
+      "execution_count": null,
       "metadata": {
         "id": "YZ5PhEYZiCEA"
       },
@@ -614,7 +417,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 29,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -658,15 +461,7 @@
         "id": "C4bigx_3ibuN",
         "outputId": "ebcca5ee-85d8-4525-c9ad-9fc3b5c1741d"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loading BERT tokenizer...\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Load the BERT tokenizer.\n",
         "if model_chosen == \"bert\":\n",
@@ -679,7 +474,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 30,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -687,15 +482,7 @@
         "id": "5hNod5X9jDZN",
         "outputId": "bca0db0e-7463-40cd-8052-1712965c7a95"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Token indices sequence length is longer than the specified maximum sequence length for this model (667 > 512). Running this sequence through the model will result in indexing errors\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         " # Tokenize all of the sentences and map the tokens to thier word IDs.\n",
         "input_ids_train = []\n",
@@ -733,15 +520,7 @@
         "id": "W9EWv5JvjGH3",
         "outputId": "dde87708-7bcb-47c7-af71-2ec2b2e0c2db"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Max sentence length train:  45443\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "print('Max sentence length train: ', max([len(sen) for sen in input_ids_train]))"
       ]
@@ -876,337 +655,7 @@
         "id": "C7M2Er1ajsTf",
         "outputId": "151034cd-9a77-413e-a61e-561c97b4072e"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']\n",
-            "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-            "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-            "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "BertForSequenceClassification(\n",
-              "  (bert): BertModel(\n",
-              "    (embeddings): BertEmbeddings(\n",
-              "      (word_embeddings): Embedding(119547, 768, padding_idx=0)\n",
-              "      (position_embeddings): Embedding(512, 768)\n",
-              "      (token_type_embeddings): Embedding(2, 768)\n",
-              "      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "      (dropout): Dropout(p=0.1, inplace=False)\n",
-              "    )\n",
-              "    (encoder): BertEncoder(\n",
-              "      (layer): ModuleList(\n",
-              "        (0): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (1): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (2): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (3): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (4): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (5): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (6): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (7): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (8): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (9): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (10): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "        (11): BertLayer(\n",
-              "          (attention): BertAttention(\n",
-              "            (self): BertSelfAttention(\n",
-              "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "            (output): BertSelfOutput(\n",
-              "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "              (dropout): Dropout(p=0.1, inplace=False)\n",
-              "            )\n",
-              "          )\n",
-              "          (intermediate): BertIntermediate(\n",
-              "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-              "            (intermediate_act_fn): GELUActivation()\n",
-              "          )\n",
-              "          (output): BertOutput(\n",
-              "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-              "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-              "            (dropout): Dropout(p=0.1, inplace=False)\n",
-              "          )\n",
-              "        )\n",
-              "      )\n",
-              "    )\n",
-              "    (pooler): BertPooler(\n",
-              "      (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-              "      (activation): Tanh()\n",
-              "    )\n",
-              "  )\n",
-              "  (dropout): Dropout(p=0.1, inplace=False)\n",
-              "  (classifier): Linear(in_features=768, out_features=38, bias=True)\n",
-              ")"
-            ]
-          },
-          "execution_count": 20,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Load BertForSequenceClassification, the pretrained BERT model with a single \n",
         "# linear classification layer on top.\n",
@@ -1240,16 +689,7 @@
       "metadata": {
         "id": "xd_cG-8pj4Iw"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
-            "  warnings.warn(\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "#Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n",
         "# I believe the 'W' stands for 'Weight Decay fix\"\n",
@@ -1289,31 +729,7 @@
         "id": "SbHBbYpwkKaA",
         "outputId": "4cd1be4a-6014-4804-df56-f38e98039797"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "======== Epoch 1 / 4 ========\n",
-            "Training...\n"
-          ]
-        },
-        {
-          "ename": "KeyboardInterrupt",
-          "evalue": "",
-          "output_type": "error",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-            "Cell \u001b[0;32mIn [23], line 92\u001b[0m\n\u001b[1;32m     88\u001b[0m loss\u001b[39m.\u001b[39mbackward()\n\u001b[1;32m     90\u001b[0m \u001b[39m# Clip the norm of the gradients to 1.0.\u001b[39;00m\n\u001b[1;32m     91\u001b[0m \u001b[39m# This is to help prevent the \"exploding gradients\" problem.\u001b[39;00m\n\u001b[0;32m---> 92\u001b[0m torch\u001b[39m.\u001b[39;49mnn\u001b[39m.\u001b[39;49mutils\u001b[39m.\u001b[39;49mclip_grad_norm_(model\u001b[39m.\u001b[39;49mparameters(), \u001b[39m1.0\u001b[39;49m)\n\u001b[1;32m     94\u001b[0m \u001b[39m# Update parameters and take a step using the computed gradient.\u001b[39;00m\n\u001b[1;32m     95\u001b[0m \u001b[39m# The optimizer dictates the \"update rule\"--how the parameters are\u001b[39;00m\n\u001b[1;32m     96\u001b[0m \u001b[39m# modified based on their gradients, the learning rate, etc.\u001b[39;00m\n\u001b[1;32m     97\u001b[0m optimizer\u001b[39m.\u001b[39mstep()\n",
-            "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/torch/nn/utils/clip_grad.py:43\u001b[0m, in \u001b[0;36mclip_grad_norm_\u001b[0;34m(parameters, max_norm, norm_type, error_if_nonfinite)\u001b[0m\n\u001b[1;32m     41\u001b[0m     total_norm \u001b[39m=\u001b[39m norms[\u001b[39m0\u001b[39m] \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(norms) \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m \u001b[39melse\u001b[39;00m torch\u001b[39m.\u001b[39mmax(torch\u001b[39m.\u001b[39mstack(norms))\n\u001b[1;32m     42\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 43\u001b[0m     total_norm \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mnorm(torch\u001b[39m.\u001b[39mstack([torch\u001b[39m.\u001b[39mnorm(g\u001b[39m.\u001b[39mdetach(), norm_type)\u001b[39m.\u001b[39mto(device) \u001b[39mfor\u001b[39;00m g \u001b[39min\u001b[39;00m grads]), norm_type)\n\u001b[1;32m     44\u001b[0m \u001b[39mif\u001b[39;00m error_if_nonfinite \u001b[39mand\u001b[39;00m torch\u001b[39m.\u001b[39mlogical_or(total_norm\u001b[39m.\u001b[39misnan(), total_norm\u001b[39m.\u001b[39misinf()):\n\u001b[1;32m     45\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[1;32m     46\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mThe total norm of order \u001b[39m\u001b[39m{\u001b[39;00mnorm_type\u001b[39m}\u001b[39;00m\u001b[39m for gradients from \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m     47\u001b[0m         \u001b[39m'\u001b[39m\u001b[39m`parameters` is non-finite, so it cannot be clipped. To disable \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m     48\u001b[0m         \u001b[39m'\u001b[39m\u001b[39mthis error and scale the gradients by the non-finite norm anyway, \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m     49\u001b[0m         \u001b[39m'\u001b[39m\u001b[39mset `error_if_nonfinite=False`\u001b[39m\u001b[39m'\u001b[39m)\n",
-            "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/torch/nn/utils/clip_grad.py:43\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m     41\u001b[0m     total_norm \u001b[39m=\u001b[39m norms[\u001b[39m0\u001b[39m] \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(norms) \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m \u001b[39melse\u001b[39;00m torch\u001b[39m.\u001b[39mmax(torch\u001b[39m.\u001b[39mstack(norms))\n\u001b[1;32m     42\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 43\u001b[0m     total_norm \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mnorm(torch\u001b[39m.\u001b[39mstack([torch\u001b[39m.\u001b[39;49mnorm(g\u001b[39m.\u001b[39;49mdetach(), norm_type)\u001b[39m.\u001b[39mto(device) \u001b[39mfor\u001b[39;00m g \u001b[39min\u001b[39;00m grads]), norm_type)\n\u001b[1;32m     44\u001b[0m \u001b[39mif\u001b[39;00m error_if_nonfinite \u001b[39mand\u001b[39;00m torch\u001b[39m.\u001b[39mlogical_or(total_norm\u001b[39m.\u001b[39misnan(), total_norm\u001b[39m.\u001b[39misinf()):\n\u001b[1;32m     45\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[1;32m     46\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mThe total norm of order \u001b[39m\u001b[39m{\u001b[39;00mnorm_type\u001b[39m}\u001b[39;00m\u001b[39m for gradients from \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m     47\u001b[0m         \u001b[39m'\u001b[39m\u001b[39m`parameters` is non-finite, so it cannot be clipped. To disable \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m     48\u001b[0m         \u001b[39m'\u001b[39m\u001b[39mthis error and scale the gradients by the non-finite norm anyway, \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m     49\u001b[0m         \u001b[39m'\u001b[39m\u001b[39mset `error_if_nonfinite=False`\u001b[39m\u001b[39m'\u001b[39m)\n",
-            "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/torch/functional.py:1485\u001b[0m, in \u001b[0;36mnorm\u001b[0;34m(input, p, dim, keepdim, out, dtype)\u001b[0m\n\u001b[1;32m   1483\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(p, \u001b[39mstr\u001b[39m):\n\u001b[1;32m   1484\u001b[0m         _dim \u001b[39m=\u001b[39m [i \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(ndim)]  \u001b[39m# noqa: C416 TODO: rewrite as list(range(m))\u001b[39;00m\n\u001b[0;32m-> 1485\u001b[0m         \u001b[39mreturn\u001b[39;00m _VF\u001b[39m.\u001b[39;49mnorm(\u001b[39minput\u001b[39;49m, p, dim\u001b[39m=\u001b[39;49m_dim, keepdim\u001b[39m=\u001b[39;49mkeepdim)  \u001b[39m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[1;32m   1487\u001b[0m \u001b[39m# TODO: when https://github.com/pytorch/pytorch/issues/33782 is fixed\u001b[39;00m\n\u001b[1;32m   1488\u001b[0m \u001b[39m# remove the overloads where dim is an int and replace with BraodcastingList1\u001b[39;00m\n\u001b[1;32m   1489\u001b[0m \u001b[39m# and remove next four lines, replace _dim with dim\u001b[39;00m\n\u001b[1;32m   1490\u001b[0m \u001b[39mif\u001b[39;00m dim \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
-            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "# This training code is based on the `run_glue.py` script here:\n",
         "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n",
@@ -1447,8 +863,8 @@
       },
       "outputs": [],
       "source": [
-        "name = model_bert+\"_s\"+str(maxOfInstancePerClass)\n",
-        "model_path = path+\"model_\"+name+\".pt\""
+        "name = model_bert + \"_s\" + str(maxOfInstancePerClass)\n",
+        "model_path = path + \"model_\"+name+\".pt\""
       ]
     },
     {
@@ -1469,7 +885,6 @@
       "outputs": [],
       "source": [
         "model.save_pretrained(model_path)\n",
-        "tokenizer.save_pretrained(model_path)\n",
         "#ludo: changement de la faÃ§on de sauver le modÃ¨le"
       ]
     },
-- 
GitLab