diff --git a/notebooks/Predictions_analysis_bckp.ipynb b/notebooks/Predictions_analysis_bckp.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..a126b03f68b8a1878579afc755cac05848e9799b
--- /dev/null
+++ b/notebooks/Predictions_analysis_bckp.ipynb
@@ -0,0 +1,470 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "M-41ZfqIHyi2"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "import csv"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "gVaa01O5IQke",
+        "outputId": "054b0d9d-148a-4cc6-8616-b9e704eab6ea"
+      },
+      "outputs": [],
+      "source": [
+        "!wget https://geode.liris.cnrs.fr/EDdA-Classification/predictions/dataset_test_predictions_sgd_tfidf.csv"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dYVLgduMIQm4",
+        "outputId": "4e35f288-f81a-428b-8b9e-035c3a1d3c7a"
+      },
+      "outputs": [],
+      "source": [
+        "df = pd.read_csv(\"dataset_test_predictions_sgd_tfidf.csv\")\n",
+        "\n",
+        "df.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 479
+        },
+        "id": "Bp50IA0qIQpf",
+        "outputId": "c4efa4c8-4fac-4349-cc12-a331f89850ad"
+      },
+      "outputs": [],
+      "source": [
+        "df.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "3Obah84eIQrm",
+        "outputId": "6971b6d4-b7c5-4029-86b9-83393899dd51"
+      },
+      "outputs": [],
+      "source": [
+        "\n",
+        "# articles dont la premiÃ¨re prÃ©diction correspond Ã  la vÃ©ritÃ© terrain (\n",
+        "df[df[\"ensemble_domaine_enccre\"] == df[\"predict1\"]]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 887
+        },
+        "id": "8eadTEGmJ2BK",
+        "outputId": "f9469880-f3d5-4fb2-8ac3-4db50eb7deb1"
+      },
+      "outputs": [],
+      "source": [
+        "# articles dont la deuxiÃ¨me classe correspond Ã  la vÃ©ritÃ© terrain (839)\n",
+        "df[(df[\"ensemble_domaine_enccre\"] != df[\"predict1\"]) & (df[\"ensemble_domaine_enccre\"] == df[\"predict2\"])]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 939
+        },
+        "id": "W9PzX5DUKbwO",
+        "outputId": "9b851666-3361-452b-cab3-f8e42f06c7e4"
+      },
+      "outputs": [],
+      "source": [
+        "# articles dont ni la premiÃ¨re ni la deuxiÃ¨me classe correspondent Ã  la vÃ©ritÃ© terrain (740)\n",
+        "df[(df[\"ensemble_domaine_enccre\"] != df[\"predict1\"]) & (df[\"ensemble_domaine_enccre\"] != df[\"predict2\"])]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "NLcWPZlQK9BM",
+        "outputId": "8baf8f1f-9f36-4779-c12c-47ce96a627da"
+      },
+      "outputs": [],
+      "source": [
+        "# articles de gÃ©ographie dont la prÃ©diction avec la plus forte proba n'est pas GÃ©ographie (seulement la deuxiÃ¨me proba correspond Ã  GÃ©ographie) -> 44\n",
+        "\n",
+        "df[(df[\"ensemble_domaine_enccre\"] != df[\"predict1\"]) & (df[\"ensemble_domaine_enccre\"] == df[\"predict2\"]) & (df[\"ensemble_domaine_enccre\"] == \"GÃ©ographie\")]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Aq7hmUshMhPh"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 479
+        },
+        "id": "wRv1Nv5-ztyK",
+        "outputId": "94e55eeb-f7a1-4a75-b674-5347092565f1"
+      },
+      "outputs": [],
+      "source": [
+        "df.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PYH0M0nddL34"
+      },
+      "source": [
+        "## Word frequency"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RmOViUd-zwe8",
+        "outputId": "e4beff09-2cb2-4cf3-9361-0537e70f484f"
+      },
+      "outputs": [],
+      "source": [
+        "# Liste des ensembles de domaines ENCCRE (classes)\n",
+        "df.ensemble_domaine_enccre.unique()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "H9YxH28xxMGf"
+      },
+      "outputs": [],
+      "source": [
+        "lst_domaines = sorted(df.ensemble_domaine_enccre.unique())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "26h8-7P-xMI7"
+      },
+      "outputs": [],
+      "source": [
+        "# fonction qui retourne un dictionnaire contenant la frÃ©quence associÃ©e Ã  chaque mot de la liste en paramÃ¨tre\n",
+        "def wordListToFreqDict(wordlist):\n",
+        "    wordfreq = [wordlist.count(p) for p in wordlist]\n",
+        "    return dict(list(zip(wordlist,wordfreq)))\n",
+        "\n",
+        "def sortFreqDict(freqdict):\n",
+        "    aux = [(freqdict[key], key) for key in freqdict]\n",
+        "    aux.sort()\n",
+        "    aux.reverse()\n",
+        "    return aux"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9aNEjYtExMN5"
+      },
+      "outputs": [],
+      "source": [
+        "d = {}\n",
+        "for domaine in lst_domaines:\n",
+        "  l_text = [word for line in list(df[df.ensemble_domaine_enccre == domaine].contentWithoutClass.values) for word in line.split()]\n",
+        "  print(domaine)\n",
+        "  d[domaine] = sortFreqDict(wordListToFreqDict(l_text))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1yYHcDjUY9HG",
+        "outputId": "31e2bb56-46ff-4dab-cc2f-bb117d61fb35"
+      },
+      "outputs": [],
+      "source": [
+        "d['GÃ©ographie']"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "uchRyb2gqnk0"
+      },
+      "outputs": [],
+      "source": [
+        "path = \"drive/MyDrive/Classification-EDdA/\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oNvSxYxmpqed"
+      },
+      "outputs": [],
+      "source": [
+        "# on crÃ©er un fichier csv pour chaque domaine avec la frÃ©quence de chaque mot\n",
+        "for domaine, wordFreq in d.items():\n",
+        "\n",
+        "  with open(path+'Wordclouds/frequency_'+domaine+'.csv','w') as file:\n",
+        "      csv_out=csv.writer(file)\n",
+        "      csv_out.writerow(['frequency','word'])\n",
+        "      csv_out.writerows(wordFreq)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tTz8JcdhdHNw"
+      },
+      "source": [
+        "## Wordclouds"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "C2NM2ayE9jcR"
+      },
+      "outputs": [],
+      "source": [
+        "from wordcloud import WordCloud"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "EITJqnZ5ecE8",
+        "outputId": "5a0392c9-f9dd-4a6b-b06a-6a2b50aeec27"
+      },
+      "outputs": [],
+      "source": [
+        "\n",
+        "lst_clouds = []\n",
+        "cpt = 1\n",
+        "n_cols = 4\n",
+        "n_rows = 10\n",
+        "\n",
+        "plt.figure(figsize=(30,50))\n",
+        "\n",
+        "for domaine in lst_domaines:\n",
+        "    plt.subplot(n_rows, n_cols, cpt)\n",
+        "    text = df[df.ensemble_domaine_enccre == domaine].contentWithoutClass.values\n",
+        "    cloud_i = WordCloud(width=1080, height=720, background_color='white',\n",
+        "                        collocations=False, colormap='Set2',\n",
+        "                        max_words = 100, random_state = 42\n",
+        "                       ).generate(\" \".join(text))\n",
+        "    \n",
+        "    # https://matplotlib.org/3.2.1/tutorials/colors/colormaps.html\n",
+        "\n",
+        "    plt.axis('off')\n",
+        "    plt.title(domaine,fontsize=10)\n",
+        "    plt.imshow(cloud_i)\n",
+        "\n",
+        "    cloud_i.to_file(path+\"/Wordclouds/Wordclouds_\"+domaine.split(\" \")[0]+\".png\")\n",
+        "    cpt += 1\n",
+        "\n",
+        "    lst_clouds.append(cloud_i)\n",
+        "\n",
+        "plt.savefig('Domaines_wordclouds.pdf', dpi=300, bbox_inches='tight')\n",
+        "plt.show()\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rVFvp3owZDPq"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2EIA9mV_ecH8"
+      },
+      "outputs": [],
+      "source": [
+        "# RÃ©cupÃ©ration des mots en communs\n",
+        "m = []\n",
+        "for d1 in lst_clouds :\n",
+        "  m2 = []\n",
+        "  for d2 in lst_clouds :\n",
+        "\n",
+        "    lst_1 = d1.words_.keys()\n",
+        "    lst_2 = d2.words_.keys()\n",
+        "\n",
+        "    lst_text = [i for i in lst_1 if i in lst_2]\n",
+        "    m2.append(len(lst_text))\n",
+        "  m.append(m2)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 874
+        },
+        "id": "p-D4rLn1TCMV",
+        "outputId": "6dc3c33a-733d-466b-c134-9060e9261973"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "\n",
+        "plt.figure(figsize=(16,13))\n",
+        "\n",
+        "ax = sns.heatmap(m, xticklabels=lst_domaines, yticklabels=lst_domaines, cmap='Blues')\n",
+        "\n",
+        "plt.savefig('Heatmap_commonWords.png', dpi=300, bbox_inches='tight')\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ZSnHC2IoTqO5",
+        "outputId": "d6249442-119b-4dbd-a764-422d46fcf0f0"
+      },
+      "outputs": [],
+      "source": [
+        "# nombre de mots en commun entre Arts et MÃ©tier et MÃ©tiers :\n",
+        "\n",
+        "\n",
+        "# 4 et 29\n",
+        "\n",
+        "lst_1 = lst_clouds[4].words_.keys()\n",
+        "lst_2 = lst_clouds[29].words_.keys()\n",
+        "\n",
+        "lst_text = [i for i in lst_1 if i in lst_2]\n",
+        "len(lst_text)\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "niAw2OF0bWMi",
+        "outputId": "4f267f2d-8586-44d0-bd54-1c73ebf646bf"
+      },
+      "outputs": [],
+      "source": [
+        "# mots de Arts et mÃ©tier qui ne sont pas dans les 100 plus frÃ©quents de MÃ©tiers\n",
+        "lst_1 = lst_clouds[4].words_.keys()\n",
+        "lst_2 = lst_clouds[29].words_.keys()\n",
+        "\n",
+        "lst_text = [i for i in lst_1 if i not in lst_2]\n",
+        "lst_text"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oYT479rsyVvq"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "EDdA-Classification_Analyses_predictions_proba.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3.9.13 ('stanza-lexicoscope-py39')",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.9.13"
+    },
+    "vscode": {
+      "interpreter": {
+        "hash": "68d5f9281eab57a7f4901cb150f4c691b1d08935474a18f188e0e3e8f8f412b7"
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}