diff --git a/scripts/stanza-txt-annotator.py b/scripts/stanza-txt-annotator.py index c028f95f6ef8289f83c136e461cd3902919d2a7e..93e2d7837d647d1301dae8bb1dbd1f9efccfee61 100755 --- a/scripts/stanza-txt-annotator.py +++ b/scripts/stanza-txt-annotator.py @@ -5,6 +5,7 @@ import os.path import stanza from stanza.utils.conll import CoNLL import sys +from tqdm import tqdm def oneLine(sentence): return sentence.text.replace('\n', ' ').replace('\t', ' ') @@ -27,7 +28,7 @@ class Annotator: def annotate_dir(self, directory): source_path = f'{self.source}/{directory}' os.makedirs(f'{self.target}/{directory}', exist_ok=True) - for name in os.listdir(source_path): + for name in tqdm(os.listdir(source_path)): path = f'{source_path}/{name}' relpath = f'{directory}/{name}' if os.path.isdir(path): @@ -40,13 +41,13 @@ class Annotator: if not os.path.isfile(outputFile): with open(f'{self.source}/{file}', 'r') as source: document = source.read() - print("trying " + outputFile) try: with open(outputFile, 'w') as target: for paragraph in document.split('\n\n'): # print(f'# newpar id = {self.newpar()}', file=target) self.annotate_paragraph(paragraph, target) except Exception: + print("Failed trying " + outputFile) os.unlink(outputFile) sys.exit(1)