From 2abd0e7c8a1e60328cfa08e5cf758e6468c0c9c0 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Thu, 19 Nov 2020 17:39:34 +0100 Subject: [PATCH] update code + ADD dropdown menu in geocoding app --- geocoder_app.py | 52 ++++++++++++++++++++++++++-------- lib/geocoder/heuristics.py | 55 ++++++++++++++++++++++++++++++++++++ lib/geocoder/our_geocoder.py | 17 ++--------- templates/skeleton.html | 36 ++++++++++++++++++++++- train_geocoder_v2.py | 2 +- 5 files changed, 134 insertions(+), 28 deletions(-) diff --git a/geocoder_app.py b/geocoder_app.py index 26ab308..81d4a1e 100644 --- a/geocoder_app.py +++ b/geocoder_app.py @@ -1,6 +1,8 @@ -from flask import Flask, escape, request, render_template,jsonify,Markup, redirect -from lib.geocoder import Geocoder,TextGeocoder,heuristic_mean +from flask import Flask, escape, request, render_template,jsonify,Markup, redirect, url_for +from lib.geocoder.our_geocoder import Geocoder,TextGeocoder +from lib.geocoder.heuristics import * +import spacy app = Flask(__name__) @@ -14,31 +16,36 @@ dict_model = { "GB_C":("./outputs/GB_MODEL_2/GB.txt_100_4_100__C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__C_index"), "GB_AC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__A_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__A_C_index"), "GB_IC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__I_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__I_C_index") - ,"FR_IGN":("./outputs/IGN_4_100_A_C.h5","./outputs/IGN_4_100_A_C_index") + ,"FR_IGN":("./outputs/IGN/onlyAdjac/IGN_4_100_A_C.h5","./outputs/IGN/onlyAdjac/IGN_4_100_A_C_index") } -MODEL = "FR_IGN" +MODEL = "FR_AC" LANG = "fr" NER = "spacy" +heuristic_func = heuristic_cluster geocoder = Geocoder(*dict_model[MODEL]) -g_t = TextGeocoder(geocoder,NER,LANG,heuristic_mean) +g_t = TextGeocoder(geocoder,NER,LANG,heuristic_func) + @app.route('/') def home(): toponym = request.args.get("top", "") c_toponym = request.args.get("c_top", "") + msg = request.args.get("msg", "") + msg_code = request.args.get("msg_code", "info") if toponym and c_toponym: - lon,lat = geocoder.get_coord(toponym,c_toponym) - lon,lat = geocoder.wgs_coord(lon,lat) - return render_template("pair_topo.html",lat=lat,lon=lon,title="Toponyms Pair Geocoder") + lon,lat = geocoder.get_coords([toponym],[c_toponym]) + lon,lat = lon[0],lat[0] + print(lon,lat) + return render_template("pair_topo.html",lat=lat,lon=lon,title="Toponyms Pair Geocoder",dict_model=dict_model,msg_code=msg_code) else: - return render_template("pair_topo.html",title="Toponyms Pair Geocoder") + return render_template("pair_topo.html",title="Toponyms Pair Geocoder",dict_model=dict_model,msg_code=msg_code) @app.route('/text') def text(): - return render_template("text.html",title="Text Geocoder") + return render_template("text.html",title="Text Geocoder",dict_model=dict_model) @app.route('/geocode', methods=['POST', 'GET']) def geocode(): @@ -58,8 +65,29 @@ def geocode(): if r["text"] in place_coords: continue place_coords[r["text"]]={"lat":float(r["coord"]["lat"]),"lon":float(r["coord"]["lon"])} - return render_template("text.html",title="Text Geocoder",data={"type":"success","output":Markup(html_),"place_coords":place_coords}) + return render_template("text.html",title="Text Geocoder",data={"type":"success","output":Markup(html_),"place_coords":place_coords},dict_model=dict_model) + +@app.route("/loadmodel/<model_id>") +def loadModel(model_id): + global geocoder,g_t,LANG + if not model_id in dict_model: + return redirect(url_for(".home",msg="An error happend when loading the model \"{0}\"!".format(model_id),msg_code="danger")) + else: + geocoder = Geocoder(*dict_model[model_id]) + g_t = TextGeocoder(geocoder,NER,LANG,heuristic_func) + return redirect(url_for(".home",msg="Model \"{0}\" was loaded successfuly!".format(model_id),msg_code="success")) + +@app.route("/loadlang/<lang>") +def loadLang(lang): + global geocoder,g_t,LANG + try: + g_t = TextGeocoder(geocoder,NER,lang,heuristic_func) + LANG = lang + return redirect(url_for(".home",msg="Language is now set to \"{0}\"!".format(LANG),msg_code="success")) + except: + return redirect(url_for(".home",msg="\"{}\" language is not available!".format(lang),msg_code="danger")) + if __name__ == "__main__": - app.run(host="0.0.0.0") \ No newline at end of file + app.run(host="0.0.0.0",debug=True) \ No newline at end of file diff --git a/lib/geocoder/heuristics.py b/lib/geocoder/heuristics.py index e69de29..82d7110 100644 --- a/lib/geocoder/heuristics.py +++ b/lib/geocoder/heuristics.py @@ -0,0 +1,55 @@ +import pandas as pd +import numpy as np + +from haversine import haversine_vector, Unit +from sklearn.cluster import DBSCAN + +def heuristic_mean(geocoder,toponyms): + input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1]) + res_geocode = pd.DataFrame(input_,columns="t tc".split()) + lons,lats = geocoder.get_coords(input_[:,0],input_[:,1]) + res_geocode["lon"] = lons + res_geocode["lat"] = lats + results = {} + for tp in toponyms: + lat = res_geocode[res_geocode.t == tp].lat.mean() + lon = res_geocode[res_geocode.t == tp].lon.mean() + results[tp]={"lat":lat,"lon":lon} + return results + +def heuristic_no_context(geocoder,toponyms): + input_ = np.asarray([[t1,t1] for t2 in toponyms for t1 in toponyms if t2 != t1]) + res_geocode = pd.DataFrame(input_,columns="t tc".split()) + lons,lats = geocoder.get_coords(input_[:,0],input_[:,1]) + res_geocode["lon"] = lons + res_geocode["lat"] = lats + results = {} + for tp in toponyms: + lat = res_geocode[res_geocode.t == tp].lat.mean() + lon = res_geocode[res_geocode.t == tp].lon.mean() + results[tp]={"lat":lat,"lon":lon} + return results + +def heuristic_cluster(geocoder,toponyms,eps=100): + results = {} + input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1]) + res_geocode = pd.DataFrame(input_,columns="t tc".split()) + lons,lats = geocoder.get_coords(input_[:,0],input_[:,1]) + res_geocode["lon"] = lons + res_geocode["lat"] = lats + + clf = DBSCAN(eps=eps) + for t in toponyms: + tp_df = res_geocode[res_geocode.tc == t].copy() + + coords = tp_df["lon lat".split()].values + clf.fit(haversine_vector(coords,coords,unit="km",comb=True)) + + tp_df["cluster"] = clf.labels_ + counts_ = dict(tp_df.cluster.value_counts()) + max_cluster = max(counts_, key=counts_.get) + tp_df = tp_df[tp_df.cluster == max_cluster] + lat = tp_df.lat.median() + lon = tp_df.lon.median() # + results[t]={"lat":lat,"lon":lon} + return results \ No newline at end of file diff --git a/lib/geocoder/our_geocoder.py b/lib/geocoder/our_geocoder.py index cba4d0d..0345ea4 100644 --- a/lib/geocoder/our_geocoder.py +++ b/lib/geocoder/our_geocoder.py @@ -17,6 +17,7 @@ from lib.word_index import WordIndex from lib.ngram_index import NgramIndex from lib.utils_geo import haversine_tf_1circle + import stanza import spacy import os @@ -80,19 +81,7 @@ class Geocoder(object): ax.plot(lon,lat,marker='o', color='red', markersize=5) plt.show() -def heuristic_mean(geocoder,data): - toponyms = data.text.unique() - input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1]) - res_geocode = pd.DataFrame(input_,columns="t tc".split()) - lons,lats = geocoder.wgs_coord(*geocoder.get_coords(input_[:,0],input_[:,1])) - res_geocode["lon"] = lons - res_geocode["lat"] = lats - results = {} - for tp in toponyms: - lat = res_geocode[res_geocode.t == tp].lat.mean() - lon = res_geocode[res_geocode.t == tp].lon.mean() - results[tp]={"lat":lat,"lon":lon} - return results + class TextGeocoder(): def __init__(self,geocoder_model,ner_name,lang,heuristic_func,n_jobs=None): @@ -116,7 +105,7 @@ class TextGeocoder(): def geocode(self,entities): df = pd.DataFrame(entities) - heuristic_results = self.heuristic_func(self.geocoder_model,df) + heuristic_results = self.heuristic_func(self.geocoder_model,df.text.values) for e in range(len(entities)): entities[e]["coord"] = heuristic_results[entities[e]["text"]] return entities diff --git a/templates/skeleton.html b/templates/skeleton.html index a1139fd..8687b4e 100644 --- a/templates/skeleton.html +++ b/templates/skeleton.html @@ -64,11 +64,45 @@ <div class="navbar-nav"> <a class="nav-link" href="/">Toponyms Pair Geocoder</a> <a class="nav-link" href="/text">Text Geocoder</a> + + + </div> + <div class="navbar-nav ml-auto"> + <li class="nav-item dropdown"> + <a class="nav-link dropdown-toggle" href="#" id="navbarModelDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> + Choose Model + </a> + + <div class="dropdown-menu" aria-labelledby="navbarModelDropdown"> + {% for id_ in dict_model %} + <a class="dropdown-item" href="/loadmodel/{{id_}}">{{id_}}</a> + <br>{% endfor %} + </div> + </li> + <li class="nav-item dropdown"> + <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> + Choose Lang for Spacy + </a> + <div class="dropdown-menu" aria-labelledby="navbarDropdown"> + <a class="dropdown-item" href="/loadlang/fr">fr</a> + <a class="dropdown-item" href="/loadlang/en">en</a> + </div> + </li> </div> </div> </nav> + {% if request.args.get("msg","") != "" %} + <div class="alert alert-{{msg_code}} alert-dismissible fade show" role="alert"> + {{request.args.get("msg") }} + <button type="button" class="close" data-dismiss="alert" aria-label="Close"> + <span aria-hidden="true">×</span> + </button> + </div> + + {% endif %} <h2 class="text-center" style="margin-top: 0.5em;">{{title}}</h2> - {% block content %}{% endblock %} + + <br>{% block content %}{% endblock %} </main> <!-- JS SCRIPTS --> diff --git a/train_geocoder_v2.py b/train_geocoder_v2.py index 665661e..b44fada 100644 --- a/train_geocoder_v2.py +++ b/train_geocoder_v2.py @@ -68,7 +68,7 @@ if args.adjacency: if args.inclusion: PREFIX_OUTPUT_FN += "_I" if args.wikipedia: - PREFIX_OUTPUT_FN += "_C" + PREFIX_OUTPUT_FN += "_P" MODEL_OUTPUT_FN = "outputs/{0}.h5".format(PREFIX_OUTPUT_FN) INDEX_FN = "outputs/{0}_index".format(PREFIX_OUTPUT_FN) -- GitLab