From 263c25379c3ed18e6ce72340b124bbe6b957cc53 Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Fri, 26 Mar 2021 16:27:07 +0100
Subject: [PATCH] Add evaluation script for baselines

---
 evaluate_baseline.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 evaluate_baseline.py

diff --git a/evaluate_baseline.py b/evaluate_baseline.py
new file mode 100644
index 0000000..2058636
--- /dev/null
+++ b/evaluate_baseline.py
@@ -0,0 +1,40 @@
+from joblib import load
+import pandas as pd
+from lib.utils_geo import haversine_pd,latlon2healpix
+import argparse
+import os
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument("model_file")
+parser.add_argument("vectorizer_file")
+parser.add_argument("geocoding_dataset")
+parser.add_argument("--healpix-nside",default=128)
+args = parser.parse_args()
+
+MODEL_FN = args.model_file
+VECTORIZER_FN = args.vectorizer_file
+GEOCODING_DATASET_FN =args.geocoding_dataset
+HEALPIX_RES = args.healpix_nside
+
+for fn in [MODEL_FN,VECTORIZER_FN,GEOCODING_DATASET_FN]:
+    if not os.path.exists(fn):
+        raise FileNotFoundError("File {0} does not exists!".format(fn))
+
+# LOAD Model
+model= load(MODEL_FN)
+vectorizer = load(VECTORIZER_FN)
+
+# LOAD Geocoding dataset
+df = pd.read_csv(GEOCODING_DATASET_FN,sep="\t")
+# convert coordinates to Healpix Resolution used to trained the model
+df["hp_split"] = df.apply(lambda row:latlon2healpix(row.latitude,row.longitude,HEALPIX_RES),axis=1)
+
+# preprocess the input
+df["input_"] = df.apply(lambda row: row.toponym +" "+ row.toponym_context,axis=1)
+X_test = vectorizer.transform(df.input_.values)
+# predict the healpix cell for each pair in the input
+df["hp_pred"] = model.predict(X_test)
+
+# return the accurracy
+print((df.hp_pred == df.hp_split).sum()/len(df))
\ No newline at end of file
-- 
GitLab