Skip to content
Snippets Groups Projects
Commit 62603b09 authored by Alice Brenon's avatar Alice Brenon
Browse files

Add a python data-analysis tool to extract statistical metrics from the raw measurements

parent 922f5f43
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from GEODE import tabular
from GEODE.Store import prepare
import os
import pandas
import sys
def keyOf(t):
if type(t) == tuple:
return tuple(map(lambda s: s.strip(':'), t))
else:
return t.strip(':')
def getStats(series):
columns = ['mean', 'std', 'min', '25%', '50%', '75%', 'max']
return series.describe()[columns]
def partitonStats(metric, partition):
groups = {keyOf(p[0]): p[1] for p in partition}
byGroup = {g: getStats(groups[g][metric]) for g in groups}
stats = pandas.DataFrame(byGroup).transpose()
total = [groups[g][metric].sum() for g in groups]
stats['total'] = total
cardinal = stats['total'].sum()
stats['percentage'] = [f"{round(100*t/cardinal, 2)}%" for t in total]
return stats
def measurePartition(path, key, metric, outputPath):
measures = tabular(path)
measures['count'] = 1
groups = measures.groupby(key)
stats = partitonStats(metric, measures.groupby(key))
stats.to_csv(f"{outputPath}", sep='\t')
if __name__ == '__main__':
measurePartition(*sys.argv[1:])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment