Newer
Older
Alice Brenon
committed
#!/usr/bin/env python3
from EDdA import data
from EDdA.store import preparePath
from EDdA.classification import confusionMatrix, heatmap, metrics, topNGrams
Alice Brenon
committed
import sys
def __syntax(this):
print(
"Syntax: {this} {required} {optional}".format(
this=this,
required="ARTICLES_DATA(.csv) OUTPUT_DIR",
optional="[NGRAM SIZE] [TOP_RANKS_SIZE] [METRIC_NAME]"
),
file=sys.stderr
)
sys.exit(1)
def __compute(sourcePath, ns, ranksToTry, metricNames, root):
source = data.load(sourcePath)
path = f"{root}/confusionMatrix/{source.hash}"
Alice Brenon
committed
for n in ns:
for ranks in ranksToTry:
vectorizer = topNGrams(source, n, ranks)
for name in metricNames:
imagePath = preparePath(f"{path}/{n}grams_top{ranks}_{name}.png")
heatmap(confusionMatrix(vectorizer, metrics[name]), imagePath)
Alice Brenon
committed
if __name__ == '__main__':
argc = len(sys.argv)
if argc < 2:
__syntax(sys.argv[0])
else:
sourcePath = sys.argv[1]
outputDir = sys.argv[2]
ns = [int(sys.argv[3])] if argc > 3 else range(1,4)
ranksToTry = [int(sys.argv[4])] if argc > 4 else [10, 100, 50]
metricNames = [sys.argv[5]] if argc > 5 else metrics.keys()
__compute(sourcePath, ns, ranksToTry, metricNames, outputDir)