From bec657f062b75fe2f7b9aaf05afcfbaea42eca04 Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Wed, 19 Mar 2025 17:58:19 +0100
Subject: [PATCH] Improve bivariate specificity graph to work directly on
 measures from TXM

---
 visualisation/BivariateSpecificity.py | 9 ++++++---
 visualisation/LongFormatFromTXM.py    | 6 ++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/visualisation/BivariateSpecificity.py b/visualisation/BivariateSpecificity.py
index a67bd9e..2633998 100755
--- a/visualisation/BivariateSpecificity.py
+++ b/visualisation/BivariateSpecificity.py
@@ -1,16 +1,19 @@
 #!/usr/bin/env python3
 
 from GEODE import tabular
+from LongFormatFromTXM import extractBivariate, toLongFormat
 from color import qualitative
 import matplotlib.pyplot as plot
 import seaborn
 import sys
 
 def bivariateSpecificity(inputTSV, outputPNG):
-    specificities = tabular(inputTSV)
+    specificities = toLongFormat(tabular(inputTSV))
+    bivariate = extractBivariate(specificities, '_', ['domaine', 'auteur'])
     ax = seaborn.catplot(
-            data=specificities, x='auteur', y='Spécificité', col='domaine',
-            hue='lemme', kind='bar', zorder=3, palette=qualitative)
+            data=bivariate.reset_index(), x='auteur', y='Spécificité',
+            col='domaine', hue='lemme', kind='bar', zorder=3, height=3.5,
+            aspect=1.6, palette=qualitative)
     ax.despine(left=True)
     ax.set_xlabels('')
     ax.set_titles('{col_name}')
diff --git a/visualisation/LongFormatFromTXM.py b/visualisation/LongFormatFromTXM.py
index 903bae2..39bc75e 100755
--- a/visualisation/LongFormatFromTXM.py
+++ b/visualisation/LongFormatFromTXM.py
@@ -12,6 +12,12 @@ def toLongFormat(txmData):
     specificities.name = 'Spécificité'
     return pandas.DataFrame(specificities)
 
+def extractBivariate(specificities, sep, into):
+    index = specificities.index.names
+    columns = specificities.columns
+    specificities[into] = [*specificities.index.map(lambda r: r[0].split(sep))]
+    return specificities.reset_index().set_index(into + index[1:])[columns]
+
 def convert(inputTSV, outputTSV):
     toLongFormat(tabular(inputTSV)).to_csv(outputTSV, sep='\t')
 
-- 
GitLab