From 85986ba965387021dea0473ae691a73541fb68a6 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Tue, 19 Apr 2022 19:10:24 +0200
Subject: [PATCH] Add function to EDdA.data module to keep only the first word
 of multi-word domain labels longer than 20

---
 EDdA/classification/classSimilarities.py | 7 ++++---
 EDdA/data.py                             | 7 +++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/EDdA/classification/classSimilarities.py b/EDdA/classification/classSimilarities.py
index bcc7c05..e36e390 100644
--- a/EDdA/classification/classSimilarities.py
+++ b/EDdA/classification/classSimilarities.py
@@ -33,10 +33,11 @@ def confusionMatrix(vectorizer, metric, domains=data.domains):
             m[a][b] = metric(vectorizer(domains[a]), vectorizer(domains[b]))
     return m
 
-def toPNG(matrix, filePath, domains=data.domains):
+def toPNG(matrix, filePath, domains=list(map(data.shortDomain, data.domains)), **kwargs):
     plot.figure(figsize=(16,13))
+    if 'cmap' not in kwargs:
+        kwargs['cmap'] = 'Blues'
     ax = seaborn.heatmap(
-            matrix, xticklabels=domains, yticklabels=domains, cmap='Blues'
+            matrix, xticklabels=domains, yticklabels=domains, **kwargs
         )
     plot.savefig(filePath, dpi=300, bbox_inches='tight')
-
diff --git a/EDdA/data.py b/EDdA/data.py
index 2243c1c..49ae599 100644
--- a/EDdA/data.py
+++ b/EDdA/data.py
@@ -32,5 +32,12 @@ domains = [
 
 domainId = dict([(domains[k], k) for k in range(0, len(domains))])
 
+def shortDomain(name, maxSize=20):
+    if len(name) > maxSize:
+        components = name.split(' ')
+        return components[0] + ' […]'
+    else:
+        return name
+
 def domain(articles, name):
     return articles[articles.ensemble_domaine_enccre == name]
-- 
GitLab