#!/usr/bin/env python3 import csv from bs4 import BeautifulSoup import os import sys header = ["T", "article", "head", "domain"] def getAttribute(article, attribute): result = article.find(type=attribute) return result.get('value') if result else '' def main(rootDirectory): output = csv.writer(sys.stdout, lineterminator='\n') output.writerow(header) for t in range(1,18): path = f"{rootDirectory}/T{t}" for rank in range(1, len(os.listdir(path))+1): root = BeautifulSoup(open(f"{path}/article{rank}.tei")) normclass = getAttribute(root, "normclass") output.writerow([ t, rank, getAttribute(root, "head"), normclass if normclass != 'unclassified' else getAttribute(root, "generatedclass") ]) if __name__ == '__main__': main(sys.argv[1])