diff --git a/.gitignore b/.gitignore index f3c43d12fee7bb9a5da3fcd97157bedfd7f1dcf7..4d37d1f3c23f68bdece2b704142864c08e4991c7 100644 --- a/.gitignore +++ b/.gitignore @@ -147,4 +147,5 @@ notes.md .idea* other/* test* -nohup.out \ No newline at end of file +nohup.out +log* \ No newline at end of file diff --git a/combination_embeddings.py b/combination_embeddings.py index fbbc40620e9cb9f32ee51752ccdb92b1225d7952..1040f14e5dc8d032dcf7cb9a89e7a7c1219b64d8 100644 --- a/combination_embeddings.py +++ b/combination_embeddings.py @@ -136,7 +136,6 @@ meta_data = MetaDataSerializer( ) meta_data.save("outputs/{0}.json".format(PREFIX_OUTPUT_FN)) - ############################################################################################# ################################# LOAD DATA ################################################# ############################################################################################# diff --git a/eval_amb.py b/eval_amb.py new file mode 100644 index 0000000000000000000000000000000000000000..7544cec68dfeea7595294d83023f08b49785523e --- /dev/null +++ b/eval_amb.py @@ -0,0 +1,46 @@ +from glob import glob +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # No need for GPU +import argparse +import logging + +import pandas as pd + +from predict_toponym_coordinates import Geocoder +from lib.geo import haversine_pd + +logging.getLogger("tensorflow").setLevel(logging.CRITICAL) +logging.getLogger("tensorflow_hub").setLevel(logging.CRITICAL) + + +parser = argparse.ArgumentParser() +parser.add_argument("eval_dataset") +parser.add_argument("models_directory") +args = parser.parse_args() + + + +EVAL_DATASET_FN= args.eval_dataset#"./test_dataset_ambiguity.csv" + + +def eval_model(eval_dataset_fn,model_fn,model_index_fn): + df = pd.read_csv(eval_dataset_fn,index_col=0) + geocoder = Geocoder(model_fn,model_index_fn) + lon,lat = geocoder.get_coords(df.name1.values,df.name2.values) + lon,lat = geocoder.wgs_coord(lon,lat) + + df["p_longitude"] = lon + df["p_latitude"] = lat + + df["dist"] = haversine_pd(df.longitude,df.latitude,df.p_longitude,df.p_latitude) + + print("Dataset -- {0} -- Model -- {1}".format(\ + eval_dataset_fn.split("/")[-1], + model_fn.split("/")[-1])) + print("100km",(df.dist<100).sum()/len(df)) + print("50km",(df.dist<50).sum()/len(df)) + print("20km",(df.dist<20).sum()/len(df)) + +prefixes = [x.rstrip(".h5") for x in glob(args.models_directory+"/*.h5")] +for prefix in prefixes: + eval_model(EVAL_DATASET_FN,prefix + ".h5",prefix + "_index") \ No newline at end of file diff --git a/predict_toponym_coordinates.py b/predict_toponym_coordinates.py index 1cf9221ada921077953f8b689fd75bae790b07ce..6fb4930e11e91b5d25d2d8c2316b5b1d133dbce7 100644 --- a/predict_toponym_coordinates.py +++ b/predict_toponym_coordinates.py @@ -1,12 +1,14 @@ from keras.models import load_model +import os import tensorflow as tf import keras.backend as K -from utils import NgramIndex +from lib.ngram_index import NgramIndex import numpy as np from tensorflow.python.keras.backend import set_session from tensorflow.python.keras.models import load_model + sess = None graph = None @@ -75,6 +77,16 @@ class Geocoder(object): # with graph.as_default(): lon,lat = self.keras_model.predict([[p],[c]]) return lon[0][0],lat[0][0] + + def get_coords(self,list_toponym,list_toponym_context): + p = [self.ngram_encoder.complete(self.ngram_encoder.encode(toponym),self.ngram_encoder.max_len) for toponym in list_toponym] + c = [self.ngram_encoder.complete(self.ngram_encoder.encode(toponym),self.ngram_encoder.max_len) for toponym in list_toponym_context] + + p = np.array(p) + c = np.array(c) + + lon,lat = self.keras_model.predict([p,c]) + return lon,lat def wgs_coord(self,lon,lat): return ((lon*360)-180),((lat*180)-90) @@ -98,11 +110,7 @@ class Geocoder(object): ax.plot(lon,lat,marker='o', color='red', markersize=5) plt.show() -geocoder = Geocoder("outputs/LSTM_FR.txt_100_4_0.002_None_A_I_C.h5","./outputs/FR.txt_100_4_0.002_None_A_I_C_index") -top,topc = "Paris","Cherbourg" -lon,lat = geocoder.get_coord(top,topc) -lon,lat = geocoder.wgs_coord(lon,lat) -geocoder.plot_coord("{0},{1}".format(top,topc),lat,lon) + if __name__ == "__main__": from flask import Flask, escape, request, render_template diff --git a/run_train.py b/run_train.py index dd893950219f0c202caf35bcc3d98c8fa0cf5ac4..aa8f82620c6bb934422f2344b668274fe803d6a3 100644 --- a/run_train.py +++ b/run_train.py @@ -1,27 +1,31 @@ from lib.run import GridSearchModel from collections import OrderedDict +# Build all combination of relations rels = ["-i","-a","-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt"] -comb = [] +combinations = [] for rel in rels: - comb.append(rel) + combinations.append(rel) for rel2 in rels: if not rel == rel2: - if not rel2+ " " + rel in comb: - comb.append(rel+ " " + rel2) + if not rel2+ " " + rel in combinations: + combinations.append(rel+ " " + rel2) +# Init GridsearchModel grid = GridSearchModel(\ "python3 combination_embeddings.py", - **OrderedDict({ - "rel":['-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt','-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt -i', '-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt -a','-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_US_FR.txt -a -i'],#[comb], + **OrderedDict({ # necessary because some args have to be given in a certain order + "rel":combinations, "-n":[4], - "--ngram-word2vec-iter" :[1], + "--ngram-word2vec-iter" :[50], "-e":[100], - "geoname_fn":"../data/geonamesData/US_FR.txt".split(), - "hierarchy_fn":"../data/geonamesData/hierarchy.txt".split(), - "store_true":["rel"] + "geoname_fn":"../data/geonamesData/FR.txt".split(), + "hierarchy_fn":"../data/geonamesData/hierarchy.txt".split() }.items())) + print("########### THE FOLLOWING COMMAND(S) WILL BE EXECUTED ###########" ) [print(task.get_command()) for task in grid.tasks] print("#################################################################") -grid.run("log_RUN_TEXAS_IDFrance.txt") \ No newline at end of file +grid.run("outputs/log_RUN_TEXAS_IDFrance.txt") + +#["-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_FR.txt","-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_FR.txt -a","-w --wikipedia-cooc-fn ../data/wikipedia/cooccurrence_FR.txt -i"] \ No newline at end of file