From 3c9033281ca80b6b0ffbf9e09602264fc46a2a06 Mon Sep 17 00:00:00 2001 From: Ludovic Moncla <moncla.ludovic@gmail.com> Date: Fri, 18 Nov 2022 10:06:03 +0100 Subject: [PATCH] Update Classification_BertFineTuning.ipynb --- notebooks/Classification_BertFineTuning.ipynb | 635 +----------------- 1 file changed, 25 insertions(+), 610 deletions(-) diff --git a/notebooks/Classification_BertFineTuning.ipynb b/notebooks/Classification_BertFineTuning.ipynb index 6e533e7..784f96c 100644 --- a/notebooks/Classification_BertFineTuning.ipynb +++ b/notebooks/Classification_BertFineTuning.ipynb @@ -67,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -75,15 +75,7 @@ "id": "dPOU-Efhf4ui", "outputId": "121dd21e-f98c-483d-d6d1-2838f732a4e2" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We will use the GPU\n" - ] - } - ], + "outputs": [], "source": [ "import torch\n", "\n", @@ -139,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "id": "SkErnwgMMbRj" }, @@ -181,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "id": "WkIVcabUgxIl" }, @@ -246,7 +238,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "id": "7JEnKknRoClH" }, @@ -258,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -267,170 +259,7 @@ "id": "5u1acjunhoxe", "outputId": "3038048d-6506-473d-85c9-2d3ebdcc6a72" }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>volume</th>\n", - " <th>numero</th>\n", - " <th>head</th>\n", - " <th>normClass</th>\n", - " <th>classEDdA</th>\n", - " <th>author</th>\n", - " <th>id_enccre</th>\n", - " <th>domaine_enccre</th>\n", - " <th>ensemble_domaine_enccre</th>\n", - " <th>content</th>\n", - " <th>contentWithoutClass</th>\n", - " <th>firstParagraph</th>\n", - " <th>nb_words</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>15</td>\n", - " <td>5</td>\n", - " <td>SENACULE</td>\n", - " <td>Antiquité romaine</td>\n", - " <td>Antiq. rom.</td>\n", - " <td>Jaucourt</td>\n", - " <td>v15-4-0</td>\n", - " <td>antiquité</td>\n", - " <td>Antiquité</td>\n", - " <td>SENACULE, s. m. (Antiq. rom.) senaculum:\\nlieu...</td>\n", - " <td>senacule s. m. senaculum \\n lieu où tenoit s...</td>\n", - " <td>senacule s. m. senaculum \\n lieu où tenoit s...</td>\n", - " <td>91</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>8</td>\n", - " <td>3509</td>\n", - " <td>Investir</td>\n", - " <td>Marine</td>\n", - " <td>Marine.</td>\n", - " <td>Le Blond</td>\n", - " <td>v8-2689-1</td>\n", - " <td>marine</td>\n", - " <td>Marine</td>\n", - " <td>Investir, (Marine.) se dit parmi les matelots\\...</td>\n", - " <td>investir parmi matelot \\n méditerranée échou...</td>\n", - " <td>investir parmi matelot \\n méditerranée échou...</td>\n", - " <td>30</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>2</td>\n", - " <td>3428</td>\n", - " <td>BOYARDS, ou BOJARES, ou BOJARDS</td>\n", - " <td>Histoire moderne</td>\n", - " <td>Hist. mod.</td>\n", - " <td>Mallet</td>\n", - " <td>v2-2041-0</td>\n", - " <td>histoire</td>\n", - " <td>Histoire</td>\n", - " <td>BOYARDS, ou BOJARES, ou BOJARDS, s. m.\\npl. (H...</td>\n", - " <td>boyard bojares bojards s. m. \\n pl nom donne...</td>\n", - " <td>boyard bojares bojards s. m. \\n pl nom donne...</td>\n", - " <td>218</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>2</td>\n", - " <td>6532</td>\n", - " <td>Cerceau</td>\n", - " <td>Tonnelier</td>\n", - " <td>Tonneliers</td>\n", - " <td>unsigned</td>\n", - " <td>v2-4266-3</td>\n", - " <td>tonnelier</td>\n", - " <td>Métiers</td>\n", - " <td>Cerceau, c'est un lien de bois qui se plie fac...</td>\n", - " <td>cerceau lien bois plie facilement \\n servent...</td>\n", - " <td>cerceau lien bois plie facilement \\n servent...</td>\n", - " <td>229</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>8</td>\n", - " <td>1827</td>\n", - " <td>HIERONYMITES, ou HERMITES DE S. JEROME</td>\n", - " <td>unclassified</td>\n", - " <td>unclassified</td>\n", - " <td>unsigned</td>\n", - " <td>v8-1404-0</td>\n", - " <td>histoireecclésiastique</td>\n", - " <td>Religion</td>\n", - " <td>HIERONYMITES, ou HERMITES DE S. JEROME, Voyez ...</td>\n", - " <td>hieronymites hermites s. jerome jeronymites he...</td>\n", - " <td>hieronymites hermites s. jerome jeronymites he...</td>\n", - " <td>34</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " volume numero head normClass \\\n", - "0 15 5 SENACULE Antiquité romaine \n", - "1 8 3509 Investir Marine \n", - "2 2 3428 BOYARDS, ou BOJARES, ou BOJARDS Histoire moderne \n", - "3 2 6532 Cerceau Tonnelier \n", - "4 8 1827 HIERONYMITES, ou HERMITES DE S. JEROME unclassified \n", - "\n", - " classEDdA author id_enccre domaine_enccre \\\n", - "0 Antiq. rom. Jaucourt v15-4-0 antiquité \n", - "1 Marine. Le Blond v8-2689-1 marine \n", - "2 Hist. mod. Mallet v2-2041-0 histoire \n", - "3 Tonneliers unsigned v2-4266-3 tonnelier \n", - "4 unclassified unsigned v8-1404-0 histoireecclésiastique \n", - "\n", - " ensemble_domaine_enccre content \\\n", - "0 Antiquité SENACULE, s. m. (Antiq. rom.) senaculum:\\nlieu... \n", - "1 Marine Investir, (Marine.) se dit parmi les matelots\\... \n", - "2 Histoire BOYARDS, ou BOJARES, ou BOJARDS, s. m.\\npl. (H... \n", - "3 Métiers Cerceau, c'est un lien de bois qui se plie fac... \n", - "4 Religion HIERONYMITES, ou HERMITES DE S. JEROME, Voyez ... \n", - "\n", - " contentWithoutClass \\\n", - "0 senacule s. m. senaculum \\n lieu où tenoit s... \n", - "1 investir parmi matelot \\n méditerranée échou... \n", - "2 boyard bojares bojards s. m. \\n pl nom donne... \n", - "3 cerceau lien bois plie facilement \\n servent... \n", - "4 hieronymites hermites s. jerome jeronymites he... \n", - "\n", - " firstParagraph nb_words \n", - "0 senacule s. m. senaculum \\n lieu où tenoit s... 91 \n", - "1 investir parmi matelot \\n méditerranée échou... 30 \n", - "2 boyard bojares bojards s. m. \\n pl nom donne... 218 \n", - "3 cerceau lien bois plie facilement \\n servent... 229 \n", - "4 hieronymites hermites s. jerome jeronymites he... 34 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_train = pd.read_csv(train_path, sep=\"\\t\")\n", "df_train.head()" @@ -438,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -446,15 +275,7 @@ "id": "zj3JDoJNfx1f", "outputId": "f1ec1fcf-b287-460a-8110-dbb00091c203" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(46807, 13)\n" - ] - } - ], + "outputs": [], "source": [ "print(df_train.shape)" ] @@ -470,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": { "id": "I0OrfFsBn4Io" }, @@ -515,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": { "id": "zrjZvs2dhzAy" }, @@ -543,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": { "id": "Xt_PhH_6h1_3" }, @@ -555,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -563,25 +384,7 @@ "id": "Dq_KF5WAsbpC", "outputId": "7925ce5a-4b9e-4147-fdc1-f2916d0e6600" }, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['senacule s. m. senaculum \\n lieu où tenoit sénat rome \\n sénacules endroit où corps illustre assembloit \\n capitole forum \\n porte capène troisieme près temple \\n bellone cirque flaminien empereur héliogabale bâtir lieu assemblée dame \\n lieu appellé senaculum matronarum d. j.',\n", - " 'investir parmi matelot \\n méditerranée échouer toucher \\n côte banc sable q',\n", - " 'boyard bojares bojards s. m. \\n pl nom donne grand seigneur \\n moscovie becman boyard \\n chez russiens chose haute noblesse \\n pays auteur ajoûte \\n acte public czar nomme boyard \\n avant waivodes waivodes \\n oléarius voyage moscovie \\n grand principal membre conseil état \\n moscou magnifiques hôtel \\n obligé suivre prince voyage \\n jour cérémonie vêtus \\n tunique brocard enrichie perle couvert \\n grand bonnet fourré renard noir \\n président tribunal justice depuis \\n czar pierre ier a tiré russie grossiereté \\n où plongée a laissé boyard \\n titre noblesse joüissent grand \\n considération paroît ayent grand \\n part gouvernement gramme',\n", - " ...,\n", - " 'salta géog mod ville ouverte amérique méridional tueman petit riviere \\n midi s. salvador 15 lieue estreco quoique ville petit commerce \\n beaucoup avantageusement pérou blé \\n farine bétail vin chair salée c. latit \\n méridional 24 56 d. j.',\n", - " 'hydrochoos s. m. constellation \\n nomme latin aquarius françois \\n verseau signe zodiaque \\n composé trente étoile soleil \\n moi janvier tire nom grec latin \\n ordinairement pluvieux grece \\n italie nom françois répond \\n idée verseau d.',\n", - " \"bois chauffage bois chauffage \\n flotté marchand bois \\n embarquent port rivieres navigables \\n bois amenés charroi empilent \\n ensuite théatre voit \\n port place ville pari a accordé \\n usage chantier sorte marchand \\n font guere tiers provision \\n ville c. \\n marchand bois flotté font \\n venir bois province éloignées \\n jettent abord bois perdu ruisseau entrent \\n rivieres lesquelles commerce \\n établi ensuite même rivieres amenent \\n même encore bois perdu endroit \\n où possible mettre train conduire \\n pari néanmoins avoir rétirés \\n eau avant flotter train avoir \\n sécher suffisamment sans quoi bois iroit fond \\n marchand font tiers provision \\n a quelques siecles appréhension \\n pari manquât jour bois chauffage forêt environs détruisoient \\n prévoyoit jour faudroit transporter bois \\n province éloignées rendroit marchandise \\n utile usage général prix \\n exorbitant occasionné coût charroi \\n demandé alors plûpart sentent \\n moins aujourd'hui mérite invention \\n flottage bois comment pourroit remédier \\n terrible inconvénient menacé \\n auroient crois bien embarrassés accroissement \\n entretien forêt \\n apparence unique ressource effet \\n moyen long coûteux pénibles réduisit \\n alors prudence gouvernement capitale \\n\\n\\n point devenir beaucoup moins \\n habitée chéreté bois nommé jean \\n rouvet bourgeois pari imagina 1549 rassembler \\n eau plusieurs ruisseau rivieres \\n non navigables jetter bois coupé \\n forêt éloignées descendre \\n grand rivieres là former train \\n amener flot sans bateau pari ose assûrer invention utile \\n royaume plusieurs bataille gagnées méritoit \\n honneur autant moins aucune beau action \\n jean rouvet premier essai morvant \\n rassembla ruisseau contrée \\n couper bois abandonna hardiment courant \\n eau réussit projet traité folie \\n avant exécution traversé succès \\n coûtume porté perfection \\n reçut étendue susceptible \\n 1566 rené arnoul article \\n train maniere construire voyent \\n arriver pari longue masse bois effrayés \\n conduisent approche \\n pont a guere remontent \\n étendue vûes intrépidité premier inventeur \\n osa rassembler eau grand frai \\n jetter ensuite reste fortune \\n marchand bois flotté un \\n bourgeois forain a beaucoup \\n bourgeois forain fassent commerce \\n bois vient pays amont contraire \\n a beaucoup forain bourgeois \\n fassent commerce pays aval \\n concerne bois chauffage réduit \\n façon tems tirer vente voiture \\n déchargeage diligence voiture \\n arrivée vente chantier \\n officier veillent \\n façon enjoint donner bois \\n brûler piés demi longueur bois \\n moule dix-huit pouce tour bois corde \\n quartier traverse autant bois quartier \\n traverse fendu a dix-huit pouce tour \\n mesure moule a va \\n bois corde membrure bois taillis \\n avoir pouce tour bois andelle a \\n grosseur court a piés \\n demi environ \\n sortie vente marchand tenu \\n couper sortir bois vente tems \\n fixé égard lieux \\n qualité arpens \\n voiture permis voiturer depuis forêt \\n rivieres travers terre \\n avertissant jour auparavant publication \\n prône jetter bois rivieres \\n pousser ruisseau étang fossé château \\n c. sans puissent empêchés \\n \\n diligence défendu séjourner chemin \\n sans nécessité décharger ailleurs pari \\n vente enjoint mettre chantier \\n peuvent vendu ailleurs \\n officier ville commet personne \\n veiller distribution différence \\n a bois chauffage flotte \\n tire taille voiture mesure \\n relativement taille distribue gros bois \\n menu bois voiture bois bois \\n flotté mesure bois moule compte \\n bois corde \\n gros bois compris sou nom générique \\n bûche chaque bûche bois \\n avoir déjà \\n piés demi long \\n grosse bûche nommées bois moule moulure compte mesurent \\n moule anneau anneau \\n doivent avoir dix-huit pouce tour \\n bois traverse suit immédiatement grosseur \\n bois compte moule avoir dix-sept \\n pouce tour a comprennent sou \\n dénomination bois blanc \\n appelle bois taillis a \\n pouce tour \\n bois corde avoir moins dix-sept pouce \\n appellé bois corde bucherons plantent corde pieu quarré \\n côté a piés chaque pieu a piés \\n haut -là mesure corde contient \\n voit foi 64 256 piés cube bois \\n méthode mesurer bois a duré \\n 1641 ordonné servir membrure \\n charpente retint nom corde \\n corde membrure \\n menu bois coteret fagot bourrée \\n a coterets bois taillis fendu coterets bois rond \\n ci viennent yonne doivent \\n avoir un piés long dixsept \\n dix-huit pouce tour \\n fagot fait branche arbre menues \\n doivent avoir piés demi long dixsept \\n dix-huit pouce tour \\n bourrée espece fagot faite \\n brossailles épine ronce c. \\n voici encore quelques dénomination donne \\n bois chauffage\"],\n", - " dtype=object)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sentences_train" ] @@ -598,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": { "id": "YZ5PhEYZiCEA" }, @@ -614,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -658,15 +461,7 @@ "id": "C4bigx_3ibuN", "outputId": "ebcca5ee-85d8-4525-c9ad-9fc3b5c1741d" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading BERT tokenizer...\n" - ] - } - ], + "outputs": [], "source": [ "# Load the BERT tokenizer.\n", "if model_chosen == \"bert\":\n", @@ -679,7 +474,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -687,15 +482,7 @@ "id": "5hNod5X9jDZN", "outputId": "bca0db0e-7463-40cd-8052-1712965c7a95" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Token indices sequence length is longer than the specified maximum sequence length for this model (667 > 512). Running this sequence through the model will result in indexing errors\n" - ] - } - ], + "outputs": [], "source": [ " # Tokenize all of the sentences and map the tokens to thier word IDs.\n", "input_ids_train = []\n", @@ -733,15 +520,7 @@ "id": "W9EWv5JvjGH3", "outputId": "dde87708-7bcb-47c7-af71-2ec2b2e0c2db" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Max sentence length train: 45443\n" - ] - } - ], + "outputs": [], "source": [ "print('Max sentence length train: ', max([len(sen) for sen in input_ids_train]))" ] @@ -876,337 +655,7 @@ "id": "C7M2Er1ajsTf", "outputId": "151034cd-9a77-413e-a61e-561c97b4072e" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']\n", - "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", - "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" - ] - }, - { - "data": { - "text/plain": [ - "BertForSequenceClassification(\n", - " (bert): BertModel(\n", - " (embeddings): BertEmbeddings(\n", - " (word_embeddings): Embedding(119547, 768, padding_idx=0)\n", - " (position_embeddings): Embedding(512, 768)\n", - " (token_type_embeddings): Embedding(2, 768)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (encoder): BertEncoder(\n", - " (layer): ModuleList(\n", - " (0): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (1): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (2): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (3): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (4): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (5): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (6): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (7): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (8): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (9): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (10): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (11): BertLayer(\n", - " (attention): BertAttention(\n", - " (self): BertSelfAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (output): BertSelfOutput(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " (intermediate): BertIntermediate(\n", - " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", - " (intermediate_act_fn): GELUActivation()\n", - " )\n", - " (output): BertOutput(\n", - " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " )\n", - " )\n", - " (pooler): BertPooler(\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (activation): Tanh()\n", - " )\n", - " )\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " (classifier): Linear(in_features=768, out_features=38, bias=True)\n", - ")" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Load BertForSequenceClassification, the pretrained BERT model with a single \n", "# linear classification layer on top.\n", @@ -1240,16 +689,7 @@ "metadata": { "id": "xd_cG-8pj4Iw" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "#Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n", "# I believe the 'W' stands for 'Weight Decay fix\"\n", @@ -1289,31 +729,7 @@ "id": "SbHBbYpwkKaA", "outputId": "4cd1be4a-6014-4804-df56-f38e98039797" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "======== Epoch 1 / 4 ========\n", - "Training...\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn [23], line 92\u001b[0m\n\u001b[1;32m 88\u001b[0m loss\u001b[39m.\u001b[39mbackward()\n\u001b[1;32m 90\u001b[0m \u001b[39m# Clip the norm of the gradients to 1.0.\u001b[39;00m\n\u001b[1;32m 91\u001b[0m \u001b[39m# This is to help prevent the \"exploding gradients\" problem.\u001b[39;00m\n\u001b[0;32m---> 92\u001b[0m torch\u001b[39m.\u001b[39;49mnn\u001b[39m.\u001b[39;49mutils\u001b[39m.\u001b[39;49mclip_grad_norm_(model\u001b[39m.\u001b[39;49mparameters(), \u001b[39m1.0\u001b[39;49m)\n\u001b[1;32m 94\u001b[0m \u001b[39m# Update parameters and take a step using the computed gradient.\u001b[39;00m\n\u001b[1;32m 95\u001b[0m \u001b[39m# The optimizer dictates the \"update rule\"--how the parameters are\u001b[39;00m\n\u001b[1;32m 96\u001b[0m \u001b[39m# modified based on their gradients, the learning rate, etc.\u001b[39;00m\n\u001b[1;32m 97\u001b[0m optimizer\u001b[39m.\u001b[39mstep()\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/torch/nn/utils/clip_grad.py:43\u001b[0m, in \u001b[0;36mclip_grad_norm_\u001b[0;34m(parameters, max_norm, norm_type, error_if_nonfinite)\u001b[0m\n\u001b[1;32m 41\u001b[0m total_norm \u001b[39m=\u001b[39m norms[\u001b[39m0\u001b[39m] \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(norms) \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m \u001b[39melse\u001b[39;00m torch\u001b[39m.\u001b[39mmax(torch\u001b[39m.\u001b[39mstack(norms))\n\u001b[1;32m 42\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 43\u001b[0m total_norm \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mnorm(torch\u001b[39m.\u001b[39mstack([torch\u001b[39m.\u001b[39mnorm(g\u001b[39m.\u001b[39mdetach(), norm_type)\u001b[39m.\u001b[39mto(device) \u001b[39mfor\u001b[39;00m g \u001b[39min\u001b[39;00m grads]), norm_type)\n\u001b[1;32m 44\u001b[0m \u001b[39mif\u001b[39;00m error_if_nonfinite \u001b[39mand\u001b[39;00m torch\u001b[39m.\u001b[39mlogical_or(total_norm\u001b[39m.\u001b[39misnan(), total_norm\u001b[39m.\u001b[39misinf()):\n\u001b[1;32m 45\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[1;32m 46\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mThe total norm of order \u001b[39m\u001b[39m{\u001b[39;00mnorm_type\u001b[39m}\u001b[39;00m\u001b[39m for gradients from \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 47\u001b[0m \u001b[39m'\u001b[39m\u001b[39m`parameters` is non-finite, so it cannot be clipped. To disable \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[39m'\u001b[39m\u001b[39mthis error and scale the gradients by the non-finite norm anyway, \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 49\u001b[0m \u001b[39m'\u001b[39m\u001b[39mset `error_if_nonfinite=False`\u001b[39m\u001b[39m'\u001b[39m)\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/torch/nn/utils/clip_grad.py:43\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 41\u001b[0m total_norm \u001b[39m=\u001b[39m norms[\u001b[39m0\u001b[39m] \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(norms) \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m \u001b[39melse\u001b[39;00m torch\u001b[39m.\u001b[39mmax(torch\u001b[39m.\u001b[39mstack(norms))\n\u001b[1;32m 42\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 43\u001b[0m total_norm \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mnorm(torch\u001b[39m.\u001b[39mstack([torch\u001b[39m.\u001b[39;49mnorm(g\u001b[39m.\u001b[39;49mdetach(), norm_type)\u001b[39m.\u001b[39mto(device) \u001b[39mfor\u001b[39;00m g \u001b[39min\u001b[39;00m grads]), norm_type)\n\u001b[1;32m 44\u001b[0m \u001b[39mif\u001b[39;00m error_if_nonfinite \u001b[39mand\u001b[39;00m torch\u001b[39m.\u001b[39mlogical_or(total_norm\u001b[39m.\u001b[39misnan(), total_norm\u001b[39m.\u001b[39misinf()):\n\u001b[1;32m 45\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[1;32m 46\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mThe total norm of order \u001b[39m\u001b[39m{\u001b[39;00mnorm_type\u001b[39m}\u001b[39;00m\u001b[39m for gradients from \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 47\u001b[0m \u001b[39m'\u001b[39m\u001b[39m`parameters` is non-finite, so it cannot be clipped. To disable \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[39m'\u001b[39m\u001b[39mthis error and scale the gradients by the non-finite norm anyway, \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 49\u001b[0m \u001b[39m'\u001b[39m\u001b[39mset `error_if_nonfinite=False`\u001b[39m\u001b[39m'\u001b[39m)\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniforge/base/envs/geode-classification-py39/lib/python3.9/site-packages/torch/functional.py:1485\u001b[0m, in \u001b[0;36mnorm\u001b[0;34m(input, p, dim, keepdim, out, dtype)\u001b[0m\n\u001b[1;32m 1483\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(p, \u001b[39mstr\u001b[39m):\n\u001b[1;32m 1484\u001b[0m _dim \u001b[39m=\u001b[39m [i \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(ndim)] \u001b[39m# noqa: C416 TODO: rewrite as list(range(m))\u001b[39;00m\n\u001b[0;32m-> 1485\u001b[0m \u001b[39mreturn\u001b[39;00m _VF\u001b[39m.\u001b[39;49mnorm(\u001b[39minput\u001b[39;49m, p, dim\u001b[39m=\u001b[39;49m_dim, keepdim\u001b[39m=\u001b[39;49mkeepdim) \u001b[39m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[1;32m 1487\u001b[0m \u001b[39m# TODO: when https://github.com/pytorch/pytorch/issues/33782 is fixed\u001b[39;00m\n\u001b[1;32m 1488\u001b[0m \u001b[39m# remove the overloads where dim is an int and replace with BraodcastingList1\u001b[39;00m\n\u001b[1;32m 1489\u001b[0m \u001b[39m# and remove next four lines, replace _dim with dim\u001b[39;00m\n\u001b[1;32m 1490\u001b[0m \u001b[39mif\u001b[39;00m dim \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "# This training code is based on the `run_glue.py` script here:\n", "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n", @@ -1447,8 +863,8 @@ }, "outputs": [], "source": [ - "name = model_bert+\"_s\"+str(maxOfInstancePerClass)\n", - "model_path = path+\"model_\"+name+\".pt\"" + "name = model_bert + \"_s\" + str(maxOfInstancePerClass)\n", + "model_path = path + \"model_\"+name+\".pt\"" ] }, { @@ -1469,7 +885,6 @@ "outputs": [], "source": [ "model.save_pretrained(model_path)\n", - "tokenizer.save_pretrained(model_path)\n", "#ludo: changement de la façon de sauver le modèle" ] }, -- GitLab