#!/usr/bin/env python3 from EDdA import data from EDdA.store import preparePath from EDdA.classification import confusionMatrix, heatmap, metrics, topNGrams import sys def __syntax(this): print( "Syntax: {this} {required} {optional}".format( this=this, required="ARTICLES_DATA(.csv) OUTPUT_DIR", optional="[NGRAM SIZE] [TOP_RANKS_SIZE] [METRIC_NAME]" ), file=sys.stderr ) sys.exit(1) def __compute(sourcePath, ns, ranksToTry, metricNames, root): source = data.load(sourcePath) path = f"{root}/confusionMatrix/{source.hash}" for n in ns: for ranks in ranksToTry: vectorizer = topNGrams(source, n, ranks) for name in metricNames: imagePath = preparePath(f"{path}/{n}grams_top{ranks}_{name}.png") heatmap(confusionMatrix(vectorizer, metrics[name]), imagePath) if __name__ == '__main__': argc = len(sys.argv) if argc < 2: __syntax(sys.argv[0]) else: sourcePath = sys.argv[1] outputDir = sys.argv[2] ns = [int(sys.argv[3])] if argc > 3 else range(1,4) ranksToTry = [int(sys.argv[4])] if argc > 4 else [10, 100, 50] metricNames = [sys.argv[5]] if argc > 5 else metrics.keys() __compute(sourcePath, ns, ranksToTry, metricNames, outputDir)