From bec657f062b75fe2f7b9aaf05afcfbaea42eca04 Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Wed, 19 Mar 2025 17:58:19 +0100 Subject: [PATCH] Improve bivariate specificity graph to work directly on measures from TXM --- visualisation/BivariateSpecificity.py | 9 ++++++--- visualisation/LongFormatFromTXM.py | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/visualisation/BivariateSpecificity.py b/visualisation/BivariateSpecificity.py index a67bd9e..2633998 100755 --- a/visualisation/BivariateSpecificity.py +++ b/visualisation/BivariateSpecificity.py @@ -1,16 +1,19 @@ #!/usr/bin/env python3 from GEODE import tabular +from LongFormatFromTXM import extractBivariate, toLongFormat from color import qualitative import matplotlib.pyplot as plot import seaborn import sys def bivariateSpecificity(inputTSV, outputPNG): - specificities = tabular(inputTSV) + specificities = toLongFormat(tabular(inputTSV)) + bivariate = extractBivariate(specificities, '_', ['domaine', 'auteur']) ax = seaborn.catplot( - data=specificities, x='auteur', y='Spécificité', col='domaine', - hue='lemme', kind='bar', zorder=3, palette=qualitative) + data=bivariate.reset_index(), x='auteur', y='Spécificité', + col='domaine', hue='lemme', kind='bar', zorder=3, height=3.5, + aspect=1.6, palette=qualitative) ax.despine(left=True) ax.set_xlabels('') ax.set_titles('{col_name}') diff --git a/visualisation/LongFormatFromTXM.py b/visualisation/LongFormatFromTXM.py index 903bae2..39bc75e 100755 --- a/visualisation/LongFormatFromTXM.py +++ b/visualisation/LongFormatFromTXM.py @@ -12,6 +12,12 @@ def toLongFormat(txmData): specificities.name = 'Spécificité' return pandas.DataFrame(specificities) +def extractBivariate(specificities, sep, into): + index = specificities.index.names + columns = specificities.columns + specificities[into] = [*specificities.index.map(lambda r: r[0].split(sep))] + return specificities.reset_index().set_index(into + index[1:])[columns] + def convert(inputTSV, outputTSV): toLongFormat(tabular(inputTSV)).to_csv(outputTSV, sep='\t') -- GitLab