From 8da28111e84f67b36327930a57e6375cf5e34b35 Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Tue, 29 Mar 2022 12:18:49 +0200 Subject: [PATCH] Adapt notebook computing confusion matrices --- notebooks/Confusion Matrices.ipynb | 34 ++++-------------------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/notebooks/Confusion Matrices.ipynb b/notebooks/Confusion Matrices.ipynb index d7a11c5..38cb20a 100644 --- a/notebooks/Confusion Matrices.ipynb +++ b/notebooks/Confusion Matrices.ipynb @@ -18,6 +18,7 @@ "outputs": [], "source": [ "from EDdA import data\n", + "from EDdA.store import preparePath\n", "from EDdA.classification import confusionMatrix, metrics, toPNG, topNGrams\n", "import os" ] @@ -32,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "5ad65685", "metadata": {}, "outputs": [], @@ -40,39 +41,12 @@ "source = data.load('training_set')" ] }, - { - "cell_type": "markdown", - "id": "4e958e04", - "metadata": {}, - "source": [ - "This function rationalises the name of the files containing the confusion matrices to produce." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "545bdb4f", - "metadata": {}, - "outputs": [], - "source": [ - "def preparePath(root, source, n, ranks, metricName):\n", - " path = \"{root}/confusionMatrix/{inputHash}/{n}grams_top{ranks}_{name}.png\".format(\n", - " root=root,\n", - " inputHash=source.hash,\n", - " n=n,\n", - " ranks=ranks,\n", - " name=metricName\n", - " )\n", - " os.makedirs(os.path.dirname(path), exist_ok=True)\n", - " return path" - ] - }, { "cell_type": "markdown", "id": "4079559f", "metadata": {}, "source": [ - "Then we only have to loop on the n-gram size (`n`), the number of `ranks` to keep when computing the most frequent ones and the comparison method (the metrics' `name`)." + "We loop on the n-gram size (`n`), the number of `ranks` to keep when computing the most frequent ones and the comparison method (the metrics' `name`)." ] }, { @@ -86,7 +60,7 @@ " for ranks in [10, 50, 100]:\n", " vectorizer = topNGrams(source, n, ranks)\n", " for name in ['colinearity', 'keysIntersection']:\n", - " imagePath = preparePath('.', source, n, ranks, name)\n", + " imagePath = preparePath(f\"confusionMatrix/{source.hash}/{n}grams_top{ranks}_{name}.png\")\n", " toPNG(confusionMatrix(vectorizer, metrics[name]), imagePath)" ] } -- GitLab