diff --git a/visualisation/LongFormatFromTXM.py b/visualisation/LongFormatFromTXM.py index a86357fc6a0654fcfec01da757692922ade637d8..903bae208f644c3a44a716666791b96291e801cc 100755 --- a/visualisation/LongFormatFromTXM.py +++ b/visualisation/LongFormatFromTXM.py @@ -4,19 +4,16 @@ from GEODE import tabular import pandas import sys -def extractColumn(rows, name): - rows['Spécificité'] = rows[name] - rows['lemme'] = name - return rows[['Spécificité', 'lemme']] +def toLongFormat(txmData): + lemmas = txmData.set_index(txmData.columns[0]).transpose() + lemmas.index.name = lemmas.columns.name + lemmas.columns.name = 'lemme' + specificities = lemmas.stack() + specificities.name = 'Spécificité' + return pandas.DataFrame(specificities) def convert(inputTSV, outputTSV): - txmData = tabular(inputTSV) - rows = txmData.set_index(txmData.columns[0]).transpose() - rows.index.name = rows.columns.name - rows.columns.name = None - longFormat = pandas.concat([extractColumn(rows.copy(), c) - for c in rows.columns], axis=0) - longFormat.to_csv(outputTSV, sep='\t') + toLongFormat(tabular(inputTSV)).to_csv(outputTSV, sep='\t') if __name__ == '__main__': convert(*sys.argv[1:])