From 85986ba965387021dea0473ae691a73541fb68a6 Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Tue, 19 Apr 2022 19:10:24 +0200 Subject: [PATCH] Add function to EDdA.data module to keep only the first word of multi-word domain labels longer than 20 --- EDdA/classification/classSimilarities.py | 7 ++++--- EDdA/data.py | 7 +++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/EDdA/classification/classSimilarities.py b/EDdA/classification/classSimilarities.py index bcc7c05..e36e390 100644 --- a/EDdA/classification/classSimilarities.py +++ b/EDdA/classification/classSimilarities.py @@ -33,10 +33,11 @@ def confusionMatrix(vectorizer, metric, domains=data.domains): m[a][b] = metric(vectorizer(domains[a]), vectorizer(domains[b])) return m -def toPNG(matrix, filePath, domains=data.domains): +def toPNG(matrix, filePath, domains=list(map(data.shortDomain, data.domains)), **kwargs): plot.figure(figsize=(16,13)) + if 'cmap' not in kwargs: + kwargs['cmap'] = 'Blues' ax = seaborn.heatmap( - matrix, xticklabels=domains, yticklabels=domains, cmap='Blues' + matrix, xticklabels=domains, yticklabels=domains, **kwargs ) plot.savefig(filePath, dpi=300, bbox_inches='tight') - diff --git a/EDdA/data.py b/EDdA/data.py index 2243c1c..49ae599 100644 --- a/EDdA/data.py +++ b/EDdA/data.py @@ -32,5 +32,12 @@ domains = [ domainId = dict([(domains[k], k) for k in range(0, len(domains))]) +def shortDomain(name, maxSize=20): + if len(name) > maxSize: + components = name.split(' ') + return components[0] + ' […]' + else: + return name + def domain(articles, name): return articles[articles.ensemble_domaine_enccre == name] -- GitLab