From 8d9f69bfc4d80e216dc5eb40551ca2a7e8df4f2c Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Wed, 12 Mar 2025 17:30:45 +0100 Subject: [PATCH] New script to convert measures from TXM into the format useful with seaborn's barplot --- Makefile | 3 +++ visualisation/LongFormatFromTXM.py | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100755 visualisation/LongFormatFromTXM.py diff --git a/Makefile b/Makefile index 0d640f3..5e61750 100644 --- a/Makefile +++ b/Makefile @@ -109,6 +109,9 @@ data/corpus/domainGroup_frequencies.tsv: data/corpus/metadata.tsv figure/histogram/%.png: data/%.tsv ./visualisation/BarPlot.py $< $@ +%.tsv: %_TXM.tsv + ./visualisation/LongFormatFromTXM.py $< $@ + %/results.tsv: data/corpus/metadata.tsv %/predictions.tsv sed '1 s/domain/truth/' $(METADATA) > $@ diff --git a/visualisation/LongFormatFromTXM.py b/visualisation/LongFormatFromTXM.py new file mode 100755 index 0000000..a86357f --- /dev/null +++ b/visualisation/LongFormatFromTXM.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +from GEODE import tabular +import pandas +import sys + +def extractColumn(rows, name): + rows['Spécificité'] = rows[name] + rows['lemme'] = name + return rows[['Spécificité', 'lemme']] + +def convert(inputTSV, outputTSV): + txmData = tabular(inputTSV) + rows = txmData.set_index(txmData.columns[0]).transpose() + rows.index.name = rows.columns.name + rows.columns.name = None + longFormat = pandas.concat([extractColumn(rows.copy(), c) + for c in rows.columns], axis=0) + longFormat.to_csv(outputTSV, sep='\t') + +if __name__ == '__main__': + convert(*sys.argv[1:]) -- GitLab