Skip to content
Snippets Groups Projects
Commit 5edc22a5 authored by Alice Brenon's avatar Alice Brenon
Browse files

Add notebook to visualize the graph of domain adjacencies according to their confusion matrices

parent 8da28111
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,7 @@
((gnu packages python-xyz) #:select (python-matplotlib
python-nltk
python-seaborn))
((gnu packages graphviz) #:select (graphviz python-graphviz))
(guix gexp)
(guix git-download)
((guix licenses) #:select (lgpl3+))
......@@ -19,14 +20,17 @@
#:select? (git-predicate %source-dir)))
(build-system python-build-system)
(propagated-inputs
(list python-matplotlib
(list graphviz
python-graphviz
python-matplotlib
python-nltk
python-pandas
python-seaborn))
python-seaborn
))
(home-page "https://gitlab.liris.cnrs.fr/geode/pyedda")
(synopsis "A set of tools to explore the EDdA")
(description
"PyEDdA provides a python library to expose the data from the Encyclopédie
by Diderot & d'Alembert, as well as several subpackages for the various
approach tested in the course of project GÉODE.")
"PyEDdA provides a python library to expose the data from the Encyclopédie
by Diderot & d'Alembert, as well as several subpackages for the various
approach tested in the course of project GÉODE.")
(license lgpl3+)))
%% Cell type:code id:fc7a6e69 tags:
``` /gnu/store/2rpsj69fzmcnafz4rml0blrynfayxqzr-python-wrapper-3.9.9/bin/python
from EDdA import data
from EDdA.store import preparePath
from EDdA.classification import confusionMatrix, metrics, toPNG, topNGrams
from IPython.display import Image
import graphviz
import os
```
%% Cell type:code id:f49c39b5 tags:
``` /gnu/store/2rpsj69fzmcnafz4rml0blrynfayxqzr-python-wrapper-3.9.9/bin/python
source = data.load('training_set')
```
%% Cell type:code id:3a37bfa1 tags:
``` /gnu/store/2rpsj69fzmcnafz4rml0blrynfayxqzr-python-wrapper-3.9.9/bin/python
def nearestAdjacency(matrix):
m = []
dimension = len(matrix)
for i in range(0, dimension):
link = max([matrix[i][j] for j in range(0, dimension) if j != i])
if link == 0:
m.append([])
else:
m.append([j for j in range(0, dimension) if j != i and matrix[i][j] == link])
return m
```
%% Cell type:code id:b9c92861 tags:
``` /gnu/store/2rpsj69fzmcnafz4rml0blrynfayxqzr-python-wrapper-3.9.9/bin/python
def listToMatrix(adjacencyList):
m = []
dimension = len(adjacencyList)
for i in range(0, dimension):
m.append(dimension * [0])
for j in adjacencyList[i]:
m[i][j] = 1
return m
```
%% Cell type:code id:69d494ab tags:
``` /gnu/store/2rpsj69fzmcnafz4rml0blrynfayxqzr-python-wrapper-3.9.9/bin/python
def showGraph(n, ranks, metricsName):
adjacencyList = nearestAdjacency(confusionMatrix(topNGrams(source, n, ranks), metrics[metricsName]))
g = graphviz.Digraph()
g.graph_attr['rankdir'] = 'LR'
dimension = len(adjacencyList)
for i in range(0, dimension):
g.node(data.domains[i])
for i in range(0, dimension):
for j in adjacencyList[i]:
g.edge(data.domains[i], data.domains[j])
return Image(filename=g.render(
preparePath(f'../graph/{source.hash}/{n}grams_top{ranks}_{metricsName}.gv'),
format='png')
)
```
%% Cell type:code id:3d0f3709 tags:
``` /gnu/store/2rpsj69fzmcnafz4rml0blrynfayxqzr-python-wrapper-3.9.9/bin/python
for n in range(1, 4):
for ranks in [10, 50, 100]:
for name in metrics:
showGraph(n, ranks, name)
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment