From 6db02a3d32f8f3335862e0eb88902f86362eff39 Mon Sep 17 00:00:00 2001
From: Ludovic Moncla <moncla.ludovic@gmail.com>
Date: Mon, 28 Nov 2022 15:31:09 +0100
Subject: [PATCH] Update Predict_XAI.ipynb

---
 notebooks/Predict_XAI.ipynb | 480 ++++++++++++++++++++++++++----------
 1 file changed, 355 insertions(+), 125 deletions(-)

diff --git a/notebooks/Predict_XAI.ipynb b/notebooks/Predict_XAI.ipynb
index 1fdf013..517c7a3 100644
--- a/notebooks/Predict_XAI.ipynb
+++ b/notebooks/Predict_XAI.ipynb
@@ -108,8 +108,6 @@
         "import numpy as np\n",
         "import torch\n",
         "from torch.utils.data import TensorDataset, DataLoader, SequentialSampler\n",
-        "from tqdm import tqdm\n",
-        "import os\n",
         "import pandas as pd \n"
       ]
     },
@@ -278,27 +276,7 @@
         "\n",
         "    pred_labels_ += [item for sublist in pred_labels for item in sublist]\n",
         "    return pred_labels_\n",
-        "\n",
-        "\n",
-        "def text_folder_to_dataframe(path):\n",
-        "\n",
-        "  data = []\n",
-        "  # id,tome,filename,nb_words,content,domain\n",
-        "\n",
-        "  for tome in sorted(os.listdir(path)):\n",
-        "    try:\n",
-        "        for article in tqdm(sorted(os.listdir(path + \"/\" + tome))):\n",
-        "            filename = article[:-4]\n",
-        "            id = tome + filename\n",
-        "\n",
-        "            if article[-4:] == \".txt\":\n",
-        "                with open(path + \"/\" + tome + \"/\" + article) as f:\n",
-        "                    content = f.read()\n",
-        "\n",
-        "                    data.append([id, tome, filename, content, len(content.split(' '))])\n",
-        "    except NotADirectoryError:\n",
-        "        pass\n",
-        "  return pd.DataFrame(data, columns=['id', 'tome', 'filename', 'content', 'nb_words'])\n"
+        "\n"
       ]
     },
     {
@@ -307,92 +285,172 @@
         "id": "c5QKcXulhNJ-"
       },
       "source": [
-        "## 2. Load Data\n",
+        "## 3. Load Data\n",
         "\n",
         "\n",
         "!! A modifier: charger le corpus parallele : EDdA et LGE"
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": 3,
+      "cell_type": "markdown",
       "metadata": {},
-      "outputs": [],
       "source": [
-        "!wget https://api.nakala.fr/data/10.34847/nkl.74eb1xfd/e522413b58b04ab7c283f8fa68642e9cb69ab5c5"
+        "### 3.1 LGE (Nakala)"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
       "metadata": {},
       "outputs": [],
       "source": [
-        "!unzip e522413b58b04ab7c283f8fa68642e9cb69ab5c5"
+        "lge_path = \"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/LGE/LGE_dataset_articles.tsv\"\n",
+        "df_LGE = pd.read_csv(lge_path, sep=\"\\t\")"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 19,
+      "execution_count": 6,
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>tome</th>\n",
+              "      <th>filename</th>\n",
+              "      <th>content</th>\n",
+              "      <th>nb_words</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>T1article_1</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_1</td>\n",
+              "      <td>F.-Camille DREYFUS, député de la Seine.\\n</td>\n",
+              "      <td>6</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>T1article_10</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_10</td>\n",
+              "      <td>quimarque un mouvement en avant de l’esprit hu...</td>\n",
+              "      <td>212</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>T1article_100</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_100</td>\n",
+              "      <td>ABACUS. L’abacus ou abaque était un instrument...</td>\n",
+              "      <td>1345</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>T1article_1000</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_1000</td>\n",
+              "      <td>H6SS6)\\n1780-1793 Choiseul-Goufficr\\n1780-1793...</td>\n",
+              "      <td>218</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>T1article_1001</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>article_1001</td>\n",
+              "      <td>1803Le Brun.\\n</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "               id tome      filename  \\\n",
+              "0     T1article_1   T1     article_1   \n",
+              "1    T1article_10   T1    article_10   \n",
+              "2   T1article_100   T1   article_100   \n",
+              "3  T1article_1000   T1  article_1000   \n",
+              "4  T1article_1001   T1  article_1001   \n",
+              "\n",
+              "                                             content  nb_words  \n",
+              "0          F.-Camille DREYFUS, député de la Seine.\\n         6  \n",
+              "1  quimarque un mouvement en avant de l’esprit hu...       212  \n",
+              "2  ABACUS. L’abacus ou abaque était un instrument...      1345  \n",
+              "3  H6SS6)\\n1780-1793 Choiseul-Goufficr\\n1780-1793...       218  \n",
+              "4                                     1803Le Brun.\\n         2  "
+            ]
+          },
+          "execution_count": 6,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
-        "input_path = \"/Users/lmoncla/Documents/Data/Corpus/LGE/Text\"\n",
-        "#input_path = \"./Text\""
+        "df_LGE.head()"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 20,
+      "execution_count": 7,
       "metadata": {},
       "outputs": [
         {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "100%|██████████| 5201/5201 [00:00<00:00, 6137.32it/s]\n",
-            "100%|██████████| 5704/5704 [00:00<00:00, 6325.35it/s]\n",
-            "100%|██████████| 5214/5214 [00:00<00:00, 5986.96it/s]\n",
-            "100%|██████████| 5528/5528 [00:00<00:00, 6213.04it/s]\n",
-            "100%|██████████| 6963/6963 [00:01<00:00, 5686.82it/s]\n",
-            "100%|██████████| 5983/5983 [00:00<00:00, 6120.28it/s]\n",
-            "100%|██████████| 13713/13713 [00:01<00:00, 7057.45it/s]\n",
-            "100%|██████████| 9202/9202 [00:01<00:00, 7161.23it/s]\n",
-            "100%|██████████| 10704/10704 [00:01<00:00, 7208.53it/s]\n",
-            "100%|██████████| 6378/6378 [00:00<00:00, 6988.23it/s]\n",
-            "100%|██████████| 8476/8476 [00:01<00:00, 7098.48it/s]\n",
-            "100%|██████████| 6576/6576 [00:00<00:00, 6996.19it/s]\n",
-            "100%|██████████| 7797/7797 [00:01<00:00, 6981.47it/s]\n",
-            "100%|██████████| 9027/9027 [00:01<00:00, 6563.44it/s]\n",
-            "100%|██████████| 8383/8383 [00:01<00:00, 7017.88it/s]\n",
-            "100%|██████████| 7319/7319 [00:01<00:00, 7064.77it/s]\n",
-            "100%|██████████| 10269/10269 [00:01<00:00, 6864.36it/s]\n",
-            "100%|██████████| 7512/7512 [00:01<00:00, 6854.61it/s]\n",
-            "100%|██████████| 6701/6701 [00:01<00:00, 6501.17it/s]\n",
-            "100%|██████████| 7343/7343 [00:01<00:00, 6933.17it/s]\n",
-            "100%|██████████| 7273/7273 [00:01<00:00, 6877.68it/s]\n",
-            "100%|██████████| 10877/10877 [00:01<00:00, 6410.62it/s]\n",
-            "100%|██████████| 4731/4731 [00:00<00:00, 6429.83it/s]\n",
-            "100%|██████████| 8698/8698 [00:01<00:00, 6076.43it/s]\n",
-            "100%|██████████| 9675/9675 [00:01<00:00, 6399.53it/s]\n",
-            "100%|██████████| 5710/5710 [00:00<00:00, 6343.15it/s]\n",
-            "100%|██████████| 5664/5664 [00:00<00:00, 6450.75it/s]\n",
-            "100%|██████████| 5828/5828 [00:00<00:00, 6425.49it/s]\n",
-            "100%|██████████| 5721/5721 [00:00<00:00, 6536.62it/s]\n",
-            "100%|██████████| 6110/6110 [00:00<00:00, 6391.42it/s]\n",
-            "100%|██████████| 5195/5195 [00:00<00:00, 6016.13it/s]\n"
-          ]
+          "data": {
+            "text/plain": [
+              "(229475, 5)"
+            ]
+          },
+          "execution_count": 7,
+          "metadata": {},
+          "output_type": "execute_result"
         }
       ],
       "source": [
-        "df_LGE = text_folder_to_dataframe(input_path)\n",
-        "#df_LGE = pd.read_csv(path + \"data/LGE_withContent.tsv\", sep=\"\\t\")\n",
-        "data_LGE = df_LGE[\"content\"].values"
+        "df_LGE.shape"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### 3.2 LGE Parallel"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": 11,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "lge_par_path = \"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/LGE/LGE_parallel_dataset_articles.tsv\"\n",
+        "df_LGE_par = pd.read_csv(lge_par_path, sep=\"\\t\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
       "metadata": {},
       "outputs": [
         {
@@ -418,107 +476,279 @@
               "      <th></th>\n",
               "      <th>id</th>\n",
               "      <th>tome</th>\n",
-              "      <th>rank</th>\n",
-              "      <th>domain</th>\n",
-              "      <th>remark</th>\n",
+              "      <th>filename</th>\n",
               "      <th>content</th>\n",
+              "      <th>nb_words</th>\n",
               "    </tr>\n",
               "  </thead>\n",
               "  <tbody>\n",
               "    <tr>\n",
               "      <th>0</th>\n",
-              "      <td>abrabeses-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>623</td>\n",
-              "      <td>geography</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>ABRABESES. Village d’Espagne de la prov. de Za...</td>\n",
+              "      <td>T1aam-0</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>aam-0</td>\n",
+              "      <td>AAM. Mesure de capacité pour les liquides en u...</td>\n",
+              "      <td>38</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
-              "      <td>accius-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>1076</td>\n",
-              "      <td>biography</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...</td>\n",
+              "      <td>T1abaco-0</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>abaco-0</td>\n",
+              "      <td>ABACO, architecte italien du xvi siècle (V. La...</td>\n",
+              "      <td>8</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>2</th>\n",
-              "      <td>achenbach-2</td>\n",
-              "      <td>1</td>\n",
-              "      <td>1357</td>\n",
-              "      <td>biography</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>ACHENBACH(Henri), administrateur prussien, né ...</td>\n",
+              "      <td>T1abacot-0</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>abacot-0</td>\n",
+              "      <td>ABACOT. Double couronne que portaient autrefoi...</td>\n",
+              "      <td>33</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>3</th>\n",
-              "      <td>acireale-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>1513</td>\n",
-              "      <td>geography</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>ACIREALE. Yille de Sicile, de la province et d...</td>\n",
+              "      <td>T1abaddon-0</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>abaddon-0</td>\n",
+              "      <td>ABADDONou APOLYON le Destructeur. « Elles\\nava...</td>\n",
+              "      <td>109</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>4</th>\n",
-              "      <td>actée-0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>1731</td>\n",
-              "      <td>botany</td>\n",
-              "      <td>NaN</td>\n",
-              "      <td>ACTÉE(Actœa L.). Genre de plantes de la famill...</td>\n",
+              "      <td>T1abandonnement-0</td>\n",
+              "      <td>T1</td>\n",
+              "      <td>abandonnement-0</td>\n",
+              "      <td>ABANDONNEMENT. I. Droit civil. — Ce mot est un...</td>\n",
+              "      <td>76</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
               "</div>"
             ],
             "text/plain": [
-              "            id  tome  rank     domain remark  \\\n",
-              "0  abrabeses-0     1   623  geography    NaN   \n",
-              "1     accius-0     1  1076  biography    NaN   \n",
-              "2  achenbach-2     1  1357  biography    NaN   \n",
-              "3   acireale-0     1  1513  geography    NaN   \n",
-              "4      actée-0     1  1731     botany    NaN   \n",
+              "                  id tome         filename  \\\n",
+              "0            T1aam-0   T1            aam-0   \n",
+              "1          T1abaco-0   T1          abaco-0   \n",
+              "2         T1abacot-0   T1         abacot-0   \n",
+              "3        T1abaddon-0   T1        abaddon-0   \n",
+              "4  T1abandonnement-0   T1  abandonnement-0   \n",
               "\n",
-              "                                             content  \n",
-              "0  ABRABESES. Village d’Espagne de la prov. de Za...  \n",
-              "1  ACCIUS, L. ou L. ATTIUS (170-94 av. J.-C.), po...  \n",
-              "2  ACHENBACH(Henri), administrateur prussien, né ...  \n",
-              "3  ACIREALE. Yille de Sicile, de la province et d...  \n",
-              "4  ACTÉE(Actœa L.). Genre de plantes de la famill...  "
+              "                                             content  nb_words  \n",
+              "0  AAM. Mesure de capacité pour les liquides en u...        38  \n",
+              "1  ABACO, architecte italien du xvi siècle (V. La...         8  \n",
+              "2  ABACOT. Double couronne que portaient autrefoi...        33  \n",
+              "3  ABADDONou APOLYON le Destructeur. « Elles\\nava...       109  \n",
+              "4  ABANDONNEMENT. I. Droit civil. — Ce mot est un...        76  "
             ]
           },
-          "execution_count": 5,
+          "execution_count": 12,
           "metadata": {},
           "output_type": "execute_result"
         }
       ],
       "source": [
-        "df_LGE.head()"
+        "df_LGE_par.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### 3.3 EDdA (ARTFL)"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": 8,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "edda_path = \"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/EDdA/EDdA_dataset_articles.tsv\"\n",
+        "df_EDdA = pd.read_csv(edda_path, sep=\"\\t\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
       "metadata": {},
       "outputs": [
         {
           "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>volume</th>\n",
+              "      <th>numero</th>\n",
+              "      <th>head</th>\n",
+              "      <th>author</th>\n",
+              "      <th>edda_class</th>\n",
+              "      <th>enccre_id</th>\n",
+              "      <th>enccre_class</th>\n",
+              "      <th>content</th>\n",
+              "      <th>content_without_designant</th>\n",
+              "      <th>first_paragraph</th>\n",
+              "      <th>nb_words</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>1</td>\n",
+              "      <td>Title Page</td>\n",
+              "      <td>unsigned</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>\\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...</td>\n",
+              "      <td>151</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>1</td>\n",
+              "      <td>2</td>\n",
+              "      <td>A MONSEIGNEUR LE COMTE D'ARGENSON</td>\n",
+              "      <td>Diderot &amp; d'Alembert</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>\\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...</td>\n",
+              "      <td>208</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>1</td>\n",
+              "      <td>3</td>\n",
+              "      <td>DISCOURS PRÉLIMINAIRE DES EDITEURS</td>\n",
+              "      <td>d'Alembert</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...</td>\n",
+              "      <td>\\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n</td>\n",
+              "      <td>44669</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>1</td>\n",
+              "      <td>5</td>\n",
+              "      <td>A, a &amp; a</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>v1-1-0</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>\\nA, a &amp; a s.m. (ordre Encyclopéd.\\nEntend. Sc...</td>\n",
+              "      <td>711</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>1</td>\n",
+              "      <td>6</td>\n",
+              "      <td>A</td>\n",
+              "      <td>Dumarsais5</td>\n",
+              "      <td>unclassified</td>\n",
+              "      <td>v1-1-1</td>\n",
+              "      <td>Grammaire</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>\\nA, mot, est 1. la troisieme personne du prés...</td>\n",
+              "      <td>238</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
             "text/plain": [
-              "(310, 6)"
+              "   volume  numero                                head                author  \\\n",
+              "0       1       1                          Title Page              unsigned   \n",
+              "1       1       2   A MONSEIGNEUR LE COMTE D'ARGENSON  Diderot & d'Alembert   \n",
+              "2       1       3  DISCOURS PRÉLIMINAIRE DES EDITEURS            d'Alembert   \n",
+              "3       1       5                            A, a & a            Dumarsais5   \n",
+              "4       1       6                                   A            Dumarsais5   \n",
+              "\n",
+              "     edda_class enccre_id enccre_class  \\\n",
+              "0  unclassified       NaN          NaN   \n",
+              "1  unclassified       NaN          NaN   \n",
+              "2  unclassified       NaN          NaN   \n",
+              "3     Grammaire    v1-1-0    Grammaire   \n",
+              "4  unclassified    v1-1-1    Grammaire   \n",
+              "\n",
+              "                                             content  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
+              "\n",
+              "                           content_without_designant  \\\n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...   \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...   \n",
+              "2  \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n\\n...   \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...   \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...   \n",
+              "\n",
+              "                                     first_paragraph  nb_words  \n",
+              "0  \\n\\nENCYCLOPÉDIE,\\nDICTIONNAIRE RAISONNÉ\\nDES ...       151  \n",
+              "1  \\n\\nA MONSEIGNEUR\\nLE COMTE D'ARGENSON,\\nMINIS...       208  \n",
+              "2       \\n\\nDISCOURS PRÉLIMINAIRE\\nDES EDITEURS.\\n\\n     44669  \n",
+              "3  \\nA, a & a s.m. (ordre Encyclopéd.\\nEntend. Sc...       711  \n",
+              "4  \\nA, mot, est 1. la troisieme personne du prés...       238  "
             ]
           },
-          "execution_count": 6,
+          "execution_count": 10,
           "metadata": {},
           "output_type": "execute_result"
         }
       ],
       "source": [
-        "df_LGE.shape"
+        "df_EDdA.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### 3.4 EDdA Parallel"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    },
     {
       "cell_type": "markdown",
       "metadata": {},
-- 
GitLab