Skip to content
Snippets Groups Projects
Commit 710ceef6 authored by Jacques Fize's avatar Jacques Fize
Browse files

add desambiguation evaluation script

parent b3a74782
No related branches found
No related tags found
No related merge requests found
...@@ -147,4 +147,5 @@ notes.md ...@@ -147,4 +147,5 @@ notes.md
.idea* .idea*
other/* other/*
test* test*
nohup.out nohup.out
\ No newline at end of file log*
\ No newline at end of file
...@@ -136,7 +136,6 @@ meta_data = MetaDataSerializer( ...@@ -136,7 +136,6 @@ meta_data = MetaDataSerializer(
) )
meta_data.save("outputs/{0}.json".format(PREFIX_OUTPUT_FN)) meta_data.save("outputs/{0}.json".format(PREFIX_OUTPUT_FN))
############################################################################################# #############################################################################################
################################# LOAD DATA ################################################# ################################# LOAD DATA #################################################
############################################################################################# #############################################################################################
......
from glob import glob
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # No need for GPU
import argparse
import logging
import pandas as pd
from predict_toponym_coordinates import Geocoder
from lib.geo import haversine_pd
logging.getLogger("tensorflow").setLevel(logging.CRITICAL)
logging.getLogger("tensorflow_hub").setLevel(logging.CRITICAL)
parser = argparse.ArgumentParser()
parser.add_argument("eval_dataset")
parser.add_argument("models_directory")
args = parser.parse_args()
EVAL_DATASET_FN= args.eval_dataset#"./test_dataset_ambiguity.csv"
def eval_model(eval_dataset_fn,model_fn,model_index_fn):
df = pd.read_csv(eval_dataset_fn,index_col=0)
geocoder = Geocoder(model_fn,model_index_fn)
lon,lat = geocoder.get_coords(df.name1.values,df.name2.values)
lon,lat = geocoder.wgs_coord(lon,lat)
df["p_longitude"] = lon
df["p_latitude"] = lat
df["dist"] = haversine_pd(df.longitude,df.latitude,df.p_longitude,df.p_latitude)
print("Dataset -- {0} -- Model -- {1}".format(\
eval_dataset_fn.split("/")[-1],
model_fn.split("/")[-1]))
print("100km",(df.dist<100).sum()/len(df))
print("50km",(df.dist<50).sum()/len(df))
print("20km",(df.dist<20).sum()/len(df))
prefixes = [x.rstrip(".h5") for x in glob(args.models_directory+"/*.h5")]
for prefix in prefixes:
eval_model(EVAL_DATASET_FN,prefix + ".h5",prefix + "_index")
\ No newline at end of file
from keras.models import load_model from keras.models import load_model
import os
import tensorflow as tf import tensorflow as tf
import keras.backend as K import keras.backend as K
from utils import NgramIndex from lib.ngram_index import NgramIndex
import numpy as np import numpy as np
from tensorflow.python.keras.backend import set_session from tensorflow.python.keras.backend import set_session
from tensorflow.python.keras.models import load_model from tensorflow.python.keras.models import load_model
sess = None sess = None
graph = None graph = None
...@@ -75,6 +77,16 @@ class Geocoder(object): ...@@ -75,6 +77,16 @@ class Geocoder(object):
# with graph.as_default(): # with graph.as_default():
lon,lat = self.keras_model.predict([[p],[c]]) lon,lat = self.keras_model.predict([[p],[c]])
return lon[0][0],lat[0][0] return lon[0][0],lat[0][0]
def get_coords(self,list_toponym,list_toponym_context):
p = [self.ngram_encoder.complete(self.ngram_encoder.encode(toponym),self.ngram_encoder.max_len) for toponym in list_toponym]
c = [self.ngram_encoder.complete(self.ngram_encoder.encode(toponym),self.ngram_encoder.max_len) for toponym in list_toponym_context]
p = np.array(p)
c = np.array(c)
lon,lat = self.keras_model.predict([p,c])
return lon,lat
def wgs_coord(self,lon,lat): def wgs_coord(self,lon,lat):
return ((lon*360)-180),((lat*180)-90) return ((lon*360)-180),((lat*180)-90)
...@@ -98,11 +110,7 @@ class Geocoder(object): ...@@ -98,11 +110,7 @@ class Geocoder(object):
ax.plot(lon,lat,marker='o', color='red', markersize=5) ax.plot(lon,lat,marker='o', color='red', markersize=5)
plt.show() plt.show()
geocoder = Geocoder("outputs/LSTM_FR.txt_100_4_0.002_None_A_I_C.h5","./outputs/FR.txt_100_4_0.002_None_A_I_C_index")
top,topc = "Paris","Cherbourg"
lon,lat = geocoder.get_coord(top,topc)
lon,lat = geocoder.wgs_coord(lon,lat)
geocoder.plot_coord("{0},{1}".format(top,topc),lat,lon)
if __name__ == "__main__": if __name__ == "__main__":
from flask import Flask, escape, request, render_template from flask import Flask, escape, request, render_template
......
from lib.run import GridSearchModel from lib.run import GridSearchModel
from collections import OrderedDict from collections import OrderedDict
# Build all combination of relations
rels = ["-i","-a","-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt"] rels = ["-i","-a","-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt"]
comb = [] combinations = []
for rel in rels: for rel in rels:
comb.append(rel) combinations.append(rel)
for rel2 in rels: for rel2 in rels:
if not rel == rel2: if not rel == rel2:
if not rel2+ " " + rel in comb: if not rel2+ " " + rel in combinations:
comb.append(rel+ " " + rel2) combinations.append(rel+ " " + rel2)
# Init GridsearchModel
grid = GridSearchModel(\ grid = GridSearchModel(\
"python3 combination_embeddings.py", "python3 combination_embeddings.py",
**OrderedDict({ **OrderedDict({ # necessary because some args have to be given in a certain order
"rel":['-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt','-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt -i', '-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt -a','-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt -a -i'],#[comb], "rel":combinations,
"-n":[4], "-n":[4],
"--ngram-word2vec-iter" :[1], "--ngram-word2vec-iter" :[50],
"-e":[100], "-e":[100],
"geoname_fn":"../data/geonamesData/US_FR.txt".split(), "geoname_fn":"../data/geonamesData/FR.txt".split(),
"hierarchy_fn":"../data/geonamesData/hierarchy.txt".split(), "hierarchy_fn":"../data/geonamesData/hierarchy.txt".split()
"store_true":["rel"]
}.items())) }.items()))
print("########### THE FOLLOWING COMMAND(S) WILL BE EXECUTED ###########" ) print("########### THE FOLLOWING COMMAND(S) WILL BE EXECUTED ###########" )
[print(task.get_command()) for task in grid.tasks] [print(task.get_command()) for task in grid.tasks]
print("#################################################################") print("#################################################################")
grid.run("log_RUN_TEXAS_IDFrance.txt") grid.run("outputs/log_RUN_TEXAS_IDFrance.txt")
\ No newline at end of file
#["-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_FR.txt","-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_FR.txt -a","-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_FR.txt -i"]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment