Skip to content
Snippets Groups Projects
Commit be55584e authored by Jacques Fize's avatar Jacques Fize
Browse files

UPD

parent 0c92ec36
No related branches found
No related tags found
No related merge requests found
...@@ -79,7 +79,7 @@ class Geocoder(object): ...@@ -79,7 +79,7 @@ class Geocoder(object):
ax.plot(lon,lat,marker='o', color='red', markersize=5) ax.plot(lon,lat,marker='o', color='red', markersize=5)
plt.show() plt.show()
def heuritic_mean(geocoder,data): def heuristic_mean(geocoder,data):
toponyms = data.text.unique() toponyms = data.text.unique()
input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1]) input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1])
res_geocode = pd.DataFrame(input_,columns="t tc".split()) res_geocode = pd.DataFrame(input_,columns="t tc".split())
......
from lib.geocoder import Geocoder
geocoder = Geocoder("./outputs/FR_MODEL_2/FR.txt_100_4_100__A_I_C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__A_I_C_index")
import pandas as pd
df = pd.read_csv("data/rando_toponymes.tsv",sep="\t")
df["name"]=df.name.apply(lambda x:x.split("¦")[0])
def heuristic_mean(toponyms):
input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1])
if len(input_)<1:
input_=np.asarray([[toponyms[0],toponyms[0]]])
res_geocode = pd.DataFrame(input_,columns="t tc".split())
lons,lats = geocoder.wgs_coord(*geocoder.get_coords(input_[:,0],input_[:,1]))
res_geocode["lon"] = lons
res_geocode["lat"] = lats
results = {}
for tp in toponyms:
lat = res_geocode[res_geocode.t == tp].lat.mean()
lon = res_geocode[res_geocode.t == tp].lon.mean()
results[tp]={"lat":lat,"lon":lon}
return results
def heuristic_one_couple(toponyms):
input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 == t1])
if len(input_)<1:
input_=np.asarray([[toponyms[0],toponyms[0]]])
res_geocode = pd.DataFrame(input_,columns="t tc".split())
lons,lats = geocoder.wgs_coord(*geocoder.get_coords(input_[:,0],input_[:,1]))
res_geocode["lon"] = lons
res_geocode["lat"] = lats
results = {}
for tp in toponyms:
lat = res_geocode[res_geocode.t == tp].lat.mean()
lon = res_geocode[res_geocode.t == tp].lon.mean()
results[tp]={"lat":lat,"lon":lon}
return results
results_fin = []
for ix,group in df.groupby("filename"):
res_geocode = heuristic_one_couple(group.name_gazetteer.values)
results_fin.extend(group.name_gazetteer.apply(lambda x : res_geocode[x]).values.tolist())
dd = pd.DataFrame(results_fin).rename(columns={"lat":"lat_pred","lon":"lon_pred"})
df2 = pd.concat((df,dd),axis=1)
from lib.geo import haversine_pd
df2["dist_error"] = haversine_pd(df2.longitude,df2.latitude,df2.lon_pred,df2.lat_pred)
print(df2.dist_error.mean())
from flask import Flask, escape, request, render_template,jsonify,Markup from flask import Flask, escape, request, render_template,jsonify,Markup, redirect
from lib.geocoder import Geocoder,TextGeocoder,heuritic_mean from lib.geocoder import Geocoder,TextGeocoder,heuristic_mean
geocoder = Geocoder("./outputs/GB_MODEL_2/GB.txt_100_4_100__A_I_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__A_I_C_index")
app = Flask(__name__) app = Flask(__name__)
dict_model = {
"FR_AIC":("./outputs/FR_MODEL_2/FR.txt_100_4_100__A_I_C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__A_I_C_index"),
"FR_C":("./outputs/FR_MODEL_2/FR.txt_100_4_100__C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__C_index"),
"FR_AC":("./outputs/FR_MODEL_2/FR.txt_100_4_100__A_C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__A_C_index"),
"FR_IC":("./outputs/FR_MODEL_2/FR.txt_100_4_100__I_C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__I_C_index"),
"GB_AIC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__A_I_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__A_I_C_index"),
"GB_C":("./outputs/GB_MODEL_2/GB.txt_100_4_100__C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__C_index"),
"GB_AC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__A_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__A_C_index"),
"GB_IC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__I_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__I_C_index")
}
MODEL = "FR_AIC"
LANG = "fr"
NER = "spacy"
geocoder = Geocoder(*dict_model[MODEL])
g_t = TextGeocoder(geocoder,NER,LANG,heuristic_mean)
@app.route('/') @app.route('/')
def home(): def home():
toponym = request.args.get("top", "") toponym = request.args.get("top", "")
...@@ -23,7 +43,7 @@ def text(): ...@@ -23,7 +43,7 @@ def text():
def geocode(): def geocode():
if request.method == 'POST': if request.method == 'POST':
text = request.form["text"] text = request.form["text"]
g_t = TextGeocoder(geocoder,"spacy","en",heuritic_mean)
results = g_t.geocode(g_t.extract_geo_entities(text)) results = g_t.geocode(g_t.extract_geo_entities(text))
html_, pos_ = "", 0 html_, pos_ = "", 0
...@@ -41,4 +61,4 @@ def geocode(): ...@@ -41,4 +61,4 @@ def geocode():
if __name__ == "__main__": if __name__ == "__main__":
app.run(host="0.0.0.0",debug=True) app.run(host="0.0.0.0")
\ No newline at end of file \ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment