From 8da28111e84f67b36327930a57e6375cf5e34b35 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Tue, 29 Mar 2022 12:18:49 +0200
Subject: [PATCH] Adapt notebook computing confusion matrices

---
 notebooks/Confusion Matrices.ipynb | 34 ++++--------------------------
 1 file changed, 4 insertions(+), 30 deletions(-)

diff --git a/notebooks/Confusion Matrices.ipynb b/notebooks/Confusion Matrices.ipynb
index d7a11c5..38cb20a 100644
--- a/notebooks/Confusion Matrices.ipynb	
+++ b/notebooks/Confusion Matrices.ipynb	
@@ -18,6 +18,7 @@
    "outputs": [],
    "source": [
     "from EDdA import data\n",
+    "from EDdA.store import preparePath\n",
     "from EDdA.classification import confusionMatrix, metrics, toPNG, topNGrams\n",
     "import os"
    ]
@@ -32,7 +33,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "5ad65685",
    "metadata": {},
    "outputs": [],
@@ -40,39 +41,12 @@
     "source = data.load('training_set')"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "4e958e04",
-   "metadata": {},
-   "source": [
-    "This function rationalises the name of the files containing the confusion matrices to produce."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "545bdb4f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def preparePath(root, source, n, ranks, metricName):\n",
-    "    path = \"{root}/confusionMatrix/{inputHash}/{n}grams_top{ranks}_{name}.png\".format(\n",
-    "            root=root,\n",
-    "            inputHash=source.hash,\n",
-    "            n=n,\n",
-    "            ranks=ranks,\n",
-    "            name=metricName\n",
-    "        )\n",
-    "    os.makedirs(os.path.dirname(path), exist_ok=True)\n",
-    "    return path"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "4079559f",
    "metadata": {},
    "source": [
-    "Then we only have to loop on the n-gram size (`n`), the number of `ranks` to keep when computing the most frequent ones and the comparison method (the metrics' `name`)."
+    "We loop on the n-gram size (`n`), the number of `ranks` to keep when computing the most frequent ones and the comparison method (the metrics' `name`)."
    ]
   },
   {
@@ -86,7 +60,7 @@
     "    for ranks in [10, 50, 100]:\n",
     "        vectorizer = topNGrams(source, n, ranks)\n",
     "        for name in ['colinearity', 'keysIntersection']:\n",
-    "            imagePath = preparePath('.', source, n, ranks, name)\n",
+    "            imagePath = preparePath(f\"confusionMatrix/{source.hash}/{n}grams_top{ranks}_{name}.png\")\n",
     "            toPNG(confusionMatrix(vectorizer, metrics[name]), imagePath)"
    ]
   }
-- 
GitLab