#!/usr/bin/env python3 from EDdA import data from EDdA.classification import topNGrams import sys def __syntax(this): print( "Syntax: {this} {required} {optional}".format( this=this, required="ARTICLES_DATA(.tsv)", optional="[NGRAM SIZE] [TOP_RANKS_SIZE] [DOMAIN]" ), file=sys.stderr ) sys.exit(1) def __populateCache(articlesSource, ns, ranksToTry, domains): for n in ns: for ranks in ranksToTry: cached = topNGrams(data.load(articlesSource), n, ranks) for domain in domains: cached(domain) if __name__ == '__main__': argc = len(sys.argv) if argc < 2: __syntax(sys.argv[0]) else: articlesSource = sys.argv[1] ns = [int(sys.argv[2])] if argc > 2 else range(1,4) ranksToTry = [int(sys.argv[3])] if argc > 3 else [10, 100, 50] domains = [sys.argv[4]] if argc > 4 else data.domains __populateCache(articlesSource, ns, ranksToTry, domains)