Skip to content
Snippets Groups Projects
Commit 263c2537 authored by Fize Jacques's avatar Fize Jacques
Browse files

Add evaluation script for baselines

parent 56a4ce79
No related branches found
No related tags found
No related merge requests found
from joblib import load
import pandas as pd
from lib.utils_geo import haversine_pd,latlon2healpix
import argparse
import os
parser = argparse.ArgumentParser()
parser.add_argument("model_file")
parser.add_argument("vectorizer_file")
parser.add_argument("geocoding_dataset")
parser.add_argument("--healpix-nside",default=128)
args = parser.parse_args()
MODEL_FN = args.model_file
VECTORIZER_FN = args.vectorizer_file
GEOCODING_DATASET_FN =args.geocoding_dataset
HEALPIX_RES = args.healpix_nside
for fn in [MODEL_FN,VECTORIZER_FN,GEOCODING_DATASET_FN]:
if not os.path.exists(fn):
raise FileNotFoundError("File {0} does not exists!".format(fn))
# LOAD Model
model= load(MODEL_FN)
vectorizer = load(VECTORIZER_FN)
# LOAD Geocoding dataset
df = pd.read_csv(GEOCODING_DATASET_FN,sep="\t")
# convert coordinates to Healpix Resolution used to trained the model
df["hp_split"] = df.apply(lambda row:latlon2healpix(row.latitude,row.longitude,HEALPIX_RES),axis=1)
# preprocess the input
df["input_"] = df.apply(lambda row: row.toponym +" "+ row.toponym_context,axis=1)
X_test = vectorizer.transform(df.input_.values)
# predict the healpix cell for each pair in the input
df["hp_pred"] = model.predict(X_test)
# return the accurracy
print((df.hp_pred == df.hp_split).sum()/len(df))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment