#!/usr/bin/env python3 from BERT import Classifier import pandas from Corpus import corpus from sys import argv def label(classify, source, name='label'): """ Make predictions on a set of document Positional arguments :param classify: an instance of the Classifier class :param source: an instance of the Corpus class Keyword arguments :param name: defaults to 'label' — the name of the column to be created, that is to say, the name of the category you are predicting with your model (if your model labels in "Red", "Green", or "Blue", you may want to use `name='color'`). :return: a panda dataframe containing the records from the input TSV file plus an additional column """ records = pandas.DataFrame(source.get_all('key')) records[name] = classify(source.get_all('content')) return records if __name__ == '__main__': classify = Classifier(argv[1]) source = corpus(argv[2]) label(classify, source).to_csv(argv[3], sep='\t', index=False)