From 263c25379c3ed18e6ce72340b124bbe6b957cc53 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Fri, 26 Mar 2021 16:27:07 +0100 Subject: [PATCH] Add evaluation script for baselines --- evaluate_baseline.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 evaluate_baseline.py diff --git a/evaluate_baseline.py b/evaluate_baseline.py new file mode 100644 index 0000000..2058636 --- /dev/null +++ b/evaluate_baseline.py @@ -0,0 +1,40 @@ +from joblib import load +import pandas as pd +from lib.utils_geo import haversine_pd,latlon2healpix +import argparse +import os + +parser = argparse.ArgumentParser() + +parser.add_argument("model_file") +parser.add_argument("vectorizer_file") +parser.add_argument("geocoding_dataset") +parser.add_argument("--healpix-nside",default=128) +args = parser.parse_args() + +MODEL_FN = args.model_file +VECTORIZER_FN = args.vectorizer_file +GEOCODING_DATASET_FN =args.geocoding_dataset +HEALPIX_RES = args.healpix_nside + +for fn in [MODEL_FN,VECTORIZER_FN,GEOCODING_DATASET_FN]: + if not os.path.exists(fn): + raise FileNotFoundError("File {0} does not exists!".format(fn)) + +# LOAD Model +model= load(MODEL_FN) +vectorizer = load(VECTORIZER_FN) + +# LOAD Geocoding dataset +df = pd.read_csv(GEOCODING_DATASET_FN,sep="\t") +# convert coordinates to Healpix Resolution used to trained the model +df["hp_split"] = df.apply(lambda row:latlon2healpix(row.latitude,row.longitude,HEALPIX_RES),axis=1) + +# preprocess the input +df["input_"] = df.apply(lambda row: row.toponym +" "+ row.toponym_context,axis=1) +X_test = vectorizer.transform(df.input_.values) +# predict the healpix cell for each pair in the input +df["hp_pred"] = model.predict(X_test) + +# return the accurracy +print((df.hp_pred == df.hp_split).sum()/len(df)) \ No newline at end of file -- GitLab