Skip to content
Snippets Groups Projects
prodigyAcceptedJSONLToTSV.py 727 B
Newer Older
from GEODE import toKey
import pandas
import JSONL
import sys

def tsv_row(annotation):
    return {'work': annotation['meta']['work'],
            'volume': annotation['meta']['volume'],
            'article': annotation['meta']['article'],
            'paragraph': annotation['meta']['paragraph'],
            'content': annotation['text'].strip(),
            'paragraphFunction': annotation['label']
            }

def acceptedToTSV(inputJSONL, outputTSV):
    annotations = pandas.DataFrame(
            sorted([tsv_row(a) for a in inputJSONL], key=toKey))
    annotations.to_csv(outputTSV, sep='\t', index=False)

if __name__ == '__main__':
    acceptedToTSV(JSONL.load(sys.argv[1]), sys.argv[2])