Newer
Older
#!/usr/bin/env python3
import pandas
import JSONL
import sys
def tsv_row(annotation):
return {'work': annotation['meta']['work'],
'volume': annotation['meta']['volume'],
'article': annotation['meta']['article'],
'paragraph': annotation['meta']['paragraph'],
'content': annotation['text'].strip(),
'paragraphFunction': annotation['label']
}
def acceptedToTSV(inputJSONL, outputTSV):
annotations = pandas.DataFrame(
sorted([tsv_row(a) for a in inputJSONL], key=toKey))
annotations.to_csv(outputTSV, sep='\t', index=False)
if __name__ == '__main__':
acceptedToTSV(JSONL.load(sys.argv[1]), sys.argv[2])