diff --git a/Makefile b/Makefile index 0d640f3c2f6142577e4fcfea00acd21596c4dc82..5e61750b97fab6ac3727f52c481df2c49d2c8bc2 100644 --- a/Makefile +++ b/Makefile @@ -109,6 +109,9 @@ data/corpus/domainGroup_frequencies.tsv: data/corpus/metadata.tsv figure/histogram/%.png: data/%.tsv ./visualisation/BarPlot.py $< $@ +%.tsv: %_TXM.tsv + ./visualisation/LongFormatFromTXM.py $< $@ + %/results.tsv: data/corpus/metadata.tsv %/predictions.tsv sed '1 s/domain/truth/' $(METADATA) > $@ diff --git a/visualisation/LongFormatFromTXM.py b/visualisation/LongFormatFromTXM.py new file mode 100755 index 0000000000000000000000000000000000000000..a86357fc6a0654fcfec01da757692922ade637d8 --- /dev/null +++ b/visualisation/LongFormatFromTXM.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +from GEODE import tabular +import pandas +import sys + +def extractColumn(rows, name): + rows['Spécificité'] = rows[name] + rows['lemme'] = name + return rows[['Spécificité', 'lemme']] + +def convert(inputTSV, outputTSV): + txmData = tabular(inputTSV) + rows = txmData.set_index(txmData.columns[0]).transpose() + rows.index.name = rows.columns.name + rows.columns.name = None + longFormat = pandas.concat([extractColumn(rows.copy(), c) + for c in rows.columns], axis=0) + longFormat.to_csv(outputTSV, sep='\t') + +if __name__ == '__main__': + convert(*sys.argv[1:])