From 2abd0e7c8a1e60328cfa08e5cf758e6468c0c9c0 Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Thu, 19 Nov 2020 17:39:34 +0100
Subject: [PATCH] update code + ADD dropdown menu in geocoding app

---
 geocoder_app.py              | 52 ++++++++++++++++++++++++++--------
 lib/geocoder/heuristics.py   | 55 ++++++++++++++++++++++++++++++++++++
 lib/geocoder/our_geocoder.py | 17 ++---------
 templates/skeleton.html      | 36 ++++++++++++++++++++++-
 train_geocoder_v2.py         |  2 +-
 5 files changed, 134 insertions(+), 28 deletions(-)

diff --git a/geocoder_app.py b/geocoder_app.py
index 26ab308..81d4a1e 100644
--- a/geocoder_app.py
+++ b/geocoder_app.py
@@ -1,6 +1,8 @@
-from flask import Flask, escape, request, render_template,jsonify,Markup, redirect
-from lib.geocoder import Geocoder,TextGeocoder,heuristic_mean
+from flask import Flask, escape, request, render_template,jsonify,Markup, redirect, url_for
+from lib.geocoder.our_geocoder import Geocoder,TextGeocoder
+from lib.geocoder.heuristics import *
 
+import spacy
 
 app = Flask(__name__)
 
@@ -14,31 +16,36 @@ dict_model = {
     "GB_C":("./outputs/GB_MODEL_2/GB.txt_100_4_100__C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__C_index"),
     "GB_AC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__A_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__A_C_index"),
     "GB_IC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__I_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__I_C_index")
-    ,"FR_IGN":("./outputs/IGN_4_100_A_C.h5","./outputs/IGN_4_100_A_C_index")
+    ,"FR_IGN":("./outputs/IGN/onlyAdjac/IGN_4_100_A_C.h5","./outputs/IGN/onlyAdjac/IGN_4_100_A_C_index")
 }
 
-MODEL = "FR_IGN"
+MODEL = "FR_AC"
 LANG = "fr"
 NER = "spacy"
 
+heuristic_func = heuristic_cluster
 
 geocoder = Geocoder(*dict_model[MODEL])
-g_t = TextGeocoder(geocoder,NER,LANG,heuristic_mean)
+g_t = TextGeocoder(geocoder,NER,LANG,heuristic_func)
+
 
 @app.route('/')
 def home():
     toponym = request.args.get("top", "")
     c_toponym = request.args.get("c_top", "")
+    msg = request.args.get("msg", "")
+    msg_code = request.args.get("msg_code", "info")
     if toponym and c_toponym:
-        lon,lat = geocoder.get_coord(toponym,c_toponym)
-        lon,lat = geocoder.wgs_coord(lon,lat)
-        return  render_template("pair_topo.html",lat=lat,lon=lon,title="Toponyms Pair Geocoder")
+        lon,lat = geocoder.get_coords([toponym],[c_toponym])
+        lon,lat = lon[0],lat[0]
+        print(lon,lat)
+        return  render_template("pair_topo.html",lat=lat,lon=lon,title="Toponyms Pair Geocoder",dict_model=dict_model,msg_code=msg_code)
     else:
-        return  render_template("pair_topo.html",title="Toponyms Pair Geocoder")
+        return  render_template("pair_topo.html",title="Toponyms Pair Geocoder",dict_model=dict_model,msg_code=msg_code)
 
 @app.route('/text')
 def text():
-    return render_template("text.html",title="Text Geocoder")
+    return render_template("text.html",title="Text Geocoder",dict_model=dict_model)
 
 @app.route('/geocode', methods=['POST', 'GET'])
 def geocode():
@@ -58,8 +65,29 @@ def geocode():
             if r["text"] in place_coords:
                 continue
             place_coords[r["text"]]={"lat":float(r["coord"]["lat"]),"lon":float(r["coord"]["lon"])}
-        return render_template("text.html",title="Text Geocoder",data={"type":"success","output":Markup(html_),"place_coords":place_coords})
+        return render_template("text.html",title="Text Geocoder",data={"type":"success","output":Markup(html_),"place_coords":place_coords},dict_model=dict_model)
+
 
+@app.route("/loadmodel/<model_id>")
+def loadModel(model_id):
+    global geocoder,g_t,LANG
+    if not model_id in dict_model:
+        return redirect(url_for(".home",msg="An error happend when loading the model \"{0}\"!".format(model_id),msg_code="danger"))
+    else: 
+        geocoder = Geocoder(*dict_model[model_id])
+        g_t = TextGeocoder(geocoder,NER,LANG,heuristic_func)
+        return redirect(url_for(".home",msg="Model \"{0}\" was loaded successfuly!".format(model_id),msg_code="success"))
+
+@app.route("/loadlang/<lang>")
+def loadLang(lang):
+    global geocoder,g_t,LANG
+    try:
+        g_t = TextGeocoder(geocoder,NER,lang,heuristic_func)
+        LANG = lang
+        return redirect(url_for(".home",msg="Language is now set to \"{0}\"!".format(LANG),msg_code="success"))
+    except:
+        return redirect(url_for(".home",msg="\"{}\" language is not available!".format(lang),msg_code="danger"))
+        
 
 if __name__ == "__main__":
-    app.run(host="0.0.0.0")
\ No newline at end of file
+    app.run(host="0.0.0.0",debug=True)
\ No newline at end of file
diff --git a/lib/geocoder/heuristics.py b/lib/geocoder/heuristics.py
index e69de29..82d7110 100644
--- a/lib/geocoder/heuristics.py
+++ b/lib/geocoder/heuristics.py
@@ -0,0 +1,55 @@
+import pandas as pd
+import numpy as np
+
+from haversine import haversine_vector, Unit
+from sklearn.cluster import DBSCAN
+
+def heuristic_mean(geocoder,toponyms):
+    input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1])
+    res_geocode = pd.DataFrame(input_,columns="t tc".split())
+    lons,lats = geocoder.get_coords(input_[:,0],input_[:,1])
+    res_geocode["lon"] = lons
+    res_geocode["lat"] = lats
+    results = {}
+    for tp in toponyms:
+        lat = res_geocode[res_geocode.t == tp].lat.mean()
+        lon = res_geocode[res_geocode.t == tp].lon.mean()
+        results[tp]={"lat":lat,"lon":lon}
+    return results
+
+def heuristic_no_context(geocoder,toponyms):
+    input_ = np.asarray([[t1,t1] for t2 in toponyms for t1 in toponyms if t2 != t1])
+    res_geocode = pd.DataFrame(input_,columns="t tc".split())
+    lons,lats = geocoder.get_coords(input_[:,0],input_[:,1])
+    res_geocode["lon"] = lons
+    res_geocode["lat"] = lats
+    results = {}
+    for tp in toponyms:
+        lat = res_geocode[res_geocode.t == tp].lat.mean()
+        lon = res_geocode[res_geocode.t == tp].lon.mean()
+        results[tp]={"lat":lat,"lon":lon}
+    return results
+
+def heuristic_cluster(geocoder,toponyms,eps=100):
+    results = {}
+    input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1])
+    res_geocode = pd.DataFrame(input_,columns="t tc".split())
+    lons,lats = geocoder.get_coords(input_[:,0],input_[:,1])
+    res_geocode["lon"] = lons
+    res_geocode["lat"] = lats
+
+    clf = DBSCAN(eps=eps)
+    for t in toponyms:
+        tp_df = res_geocode[res_geocode.tc == t].copy()
+
+        coords = tp_df["lon lat".split()].values
+        clf.fit(haversine_vector(coords,coords,unit="km",comb=True))
+
+        tp_df["cluster"] = clf.labels_
+        counts_ = dict(tp_df.cluster.value_counts())
+        max_cluster = max(counts_, key=counts_.get)
+        tp_df = tp_df[tp_df.cluster == max_cluster]
+        lat = tp_df.lat.median()
+        lon = tp_df.lon.median() #
+        results[t]={"lat":lat,"lon":lon}
+    return results
\ No newline at end of file
diff --git a/lib/geocoder/our_geocoder.py b/lib/geocoder/our_geocoder.py
index cba4d0d..0345ea4 100644
--- a/lib/geocoder/our_geocoder.py
+++ b/lib/geocoder/our_geocoder.py
@@ -17,6 +17,7 @@ from lib.word_index import WordIndex
 from lib.ngram_index import NgramIndex
 from lib.utils_geo import haversine_tf_1circle
 
+
 import stanza
 import spacy
 import os
@@ -80,19 +81,7 @@ class Geocoder(object):
             ax.plot(lon,lat,marker='o', color='red', markersize=5)
             plt.show()
 
-def heuristic_mean(geocoder,data):
-    toponyms = data.text.unique()
-    input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1])
-    res_geocode = pd.DataFrame(input_,columns="t tc".split())
-    lons,lats = geocoder.wgs_coord(*geocoder.get_coords(input_[:,0],input_[:,1]))
-    res_geocode["lon"] = lons
-    res_geocode["lat"] = lats
-    results = {}
-    for tp in toponyms:
-        lat = res_geocode[res_geocode.t == tp].lat.mean()
-        lon = res_geocode[res_geocode.t == tp].lon.mean()
-        results[tp]={"lat":lat,"lon":lon}
-    return results
+
     
 class TextGeocoder():
     def __init__(self,geocoder_model,ner_name,lang,heuristic_func,n_jobs=None):
@@ -116,7 +105,7 @@ class TextGeocoder():
 
     def geocode(self,entities):
         df = pd.DataFrame(entities)
-        heuristic_results = self.heuristic_func(self.geocoder_model,df)
+        heuristic_results = self.heuristic_func(self.geocoder_model,df.text.values)
         for e in range(len(entities)):
             entities[e]["coord"] = heuristic_results[entities[e]["text"]]
         return entities
diff --git a/templates/skeleton.html b/templates/skeleton.html
index a1139fd..8687b4e 100644
--- a/templates/skeleton.html
+++ b/templates/skeleton.html
@@ -64,11 +64,45 @@
                 <div class="navbar-nav">
                     <a class="nav-link" href="/">Toponyms Pair Geocoder</a>
                     <a class="nav-link" href="/text">Text Geocoder</a>
+
+
+                </div>
+                <div class="navbar-nav ml-auto">
+                    <li class="nav-item dropdown">
+                        <a class="nav-link dropdown-toggle" href="#" id="navbarModelDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
+                          Choose Model
+                        </a>
+
+                        <div class="dropdown-menu" aria-labelledby="navbarModelDropdown">
+                            {% for id_ in dict_model %}
+                            <a class="dropdown-item" href="/loadmodel/{{id_}}">{{id_}}</a>
+                            <br>{% endfor %}
+                        </div>
+                    </li>
+                    <li class="nav-item dropdown">
+                        <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
+                        Choose Lang for Spacy
+                        </a>
+                        <div class="dropdown-menu" aria-labelledby="navbarDropdown">
+                            <a class="dropdown-item" href="/loadlang/fr">fr</a>
+                            <a class="dropdown-item" href="/loadlang/en">en</a>
+                        </div>
+                    </li>
                 </div>
             </div>
         </nav>
+        {% if request.args.get("msg","") != "" %}
+        <div class="alert alert-{{msg_code}} alert-dismissible fade show" role="alert">
+            {{request.args.get("msg") }}
+            <button type="button" class="close" data-dismiss="alert" aria-label="Close">
+              <span aria-hidden="true">&times;</span>
+            </button>
+        </div>
+
+        {% endif %}
         <h2 class="text-center" style="margin-top: 0.5em;">{{title}}</h2>
-        {% block content %}{% endblock %}
+
+        <br>{% block content %}{% endblock %}
     </main>
 
     <!-- JS SCRIPTS -->
diff --git a/train_geocoder_v2.py b/train_geocoder_v2.py
index 665661e..b44fada 100644
--- a/train_geocoder_v2.py
+++ b/train_geocoder_v2.py
@@ -68,7 +68,7 @@ if args.adjacency:
 if args.inclusion:
     PREFIX_OUTPUT_FN += "_I"
 if args.wikipedia:
-    PREFIX_OUTPUT_FN += "_C"
+    PREFIX_OUTPUT_FN += "_P"
 
 MODEL_OUTPUT_FN = "outputs/{0}.h5".format(PREFIX_OUTPUT_FN)
 INDEX_FN = "outputs/{0}_index".format(PREFIX_OUTPUT_FN)
-- 
GitLab