From 8d9f69bfc4d80e216dc5eb40551ca2a7e8df4f2c Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Wed, 12 Mar 2025 17:30:45 +0100
Subject: [PATCH] New script to convert measures from TXM into the format
 useful with seaborn's barplot

---
 Makefile                           |  3 +++
 visualisation/LongFormatFromTXM.py | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100755 visualisation/LongFormatFromTXM.py

diff --git a/Makefile b/Makefile
index 0d640f3..5e61750 100644
--- a/Makefile
+++ b/Makefile
@@ -109,6 +109,9 @@ data/corpus/domainGroup_frequencies.tsv: data/corpus/metadata.tsv
 figure/histogram/%.png: data/%.tsv
 	./visualisation/BarPlot.py $< $@
 
+%.tsv: %_TXM.tsv
+	./visualisation/LongFormatFromTXM.py $< $@
+
 %/results.tsv: data/corpus/metadata.tsv %/predictions.tsv
 	sed '1 s/domain/truth/' $(METADATA) > $@
 
diff --git a/visualisation/LongFormatFromTXM.py b/visualisation/LongFormatFromTXM.py
new file mode 100755
index 0000000..a86357f
--- /dev/null
+++ b/visualisation/LongFormatFromTXM.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+from GEODE import tabular
+import pandas
+import sys
+
+def extractColumn(rows, name):
+    rows['Spécificité'] = rows[name]
+    rows['lemme'] = name
+    return rows[['Spécificité', 'lemme']]
+
+def convert(inputTSV, outputTSV):
+    txmData = tabular(inputTSV)
+    rows = txmData.set_index(txmData.columns[0]).transpose()
+    rows.index.name = rows.columns.name
+    rows.columns.name = None
+    longFormat = pandas.concat([extractColumn(rows.copy(), c)
+                                for c in rows.columns], axis=0)
+    longFormat.to_csv(outputTSV, sep='\t')
+
+if __name__ == '__main__':
+    convert(*sys.argv[1:])
-- 
GitLab