diff --git a/lib/geocoder.py b/lib/geocoder.py index 1f927526bca39fc237eb864e27ca7ce5826770df..2f43e7c34747480e68626270a0921dfc1c0da5e3 100644 --- a/lib/geocoder.py +++ b/lib/geocoder.py @@ -79,7 +79,7 @@ class Geocoder(object): ax.plot(lon,lat,marker='o', color='red', markersize=5) plt.show() -def heuritic_mean(geocoder,data): +def heuristic_mean(geocoder,data): toponyms = data.text.unique() input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1]) res_geocode = pd.DataFrame(input_,columns="t tc".split()) diff --git a/scripts/randoludo.py b/scripts/randoludo.py new file mode 100644 index 0000000000000000000000000000000000000000..7428edeaf51ef4c409a0da3a8a994f20e420b3f6 --- /dev/null +++ b/scripts/randoludo.py @@ -0,0 +1,46 @@ +from lib.geocoder import Geocoder +geocoder = Geocoder("./outputs/FR_MODEL_2/FR.txt_100_4_100__A_I_C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__A_I_C_index") +import pandas as pd +df = pd.read_csv("data/rando_toponymes.tsv",sep="\t") +df["name"]=df.name.apply(lambda x:x.split("¦")[0]) + +def heuristic_mean(toponyms): + input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 != t1]) + if len(input_)<1: + input_=np.asarray([[toponyms[0],toponyms[0]]]) + res_geocode = pd.DataFrame(input_,columns="t tc".split()) + lons,lats = geocoder.wgs_coord(*geocoder.get_coords(input_[:,0],input_[:,1])) + res_geocode["lon"] = lons + res_geocode["lat"] = lats + results = {} + for tp in toponyms: + lat = res_geocode[res_geocode.t == tp].lat.mean() + lon = res_geocode[res_geocode.t == tp].lon.mean() + results[tp]={"lat":lat,"lon":lon} + return results + +def heuristic_one_couple(toponyms): + input_ = np.asarray([[t1,t2] for t2 in toponyms for t1 in toponyms if t2 == t1]) + if len(input_)<1: + input_=np.asarray([[toponyms[0],toponyms[0]]]) + res_geocode = pd.DataFrame(input_,columns="t tc".split()) + lons,lats = geocoder.wgs_coord(*geocoder.get_coords(input_[:,0],input_[:,1])) + res_geocode["lon"] = lons + res_geocode["lat"] = lats + results = {} + for tp in toponyms: + lat = res_geocode[res_geocode.t == tp].lat.mean() + lon = res_geocode[res_geocode.t == tp].lon.mean() + results[tp]={"lat":lat,"lon":lon} + return results + +results_fin = [] +for ix,group in df.groupby("filename"): + res_geocode = heuristic_one_couple(group.name_gazetteer.values) + results_fin.extend(group.name_gazetteer.apply(lambda x : res_geocode[x]).values.tolist()) +dd = pd.DataFrame(results_fin).rename(columns={"lat":"lat_pred","lon":"lon_pred"}) +df2 = pd.concat((df,dd),axis=1) + +from lib.geo import haversine_pd +df2["dist_error"] = haversine_pd(df2.longitude,df2.latitude,df2.lon_pred,df2.lat_pred) +print(df2.dist_error.mean()) diff --git a/server.py b/server.py index 7209e00993320cf3c93c78155cdae723bd43a803..f15fa93503c6b5d203fd0df32a1719ae931182b9 100644 --- a/server.py +++ b/server.py @@ -1,9 +1,29 @@ -from flask import Flask, escape, request, render_template,jsonify,Markup -from lib.geocoder import Geocoder,TextGeocoder,heuritic_mean +from flask import Flask, escape, request, render_template,jsonify,Markup, redirect +from lib.geocoder import Geocoder,TextGeocoder,heuristic_mean + -geocoder = Geocoder("./outputs/GB_MODEL_2/GB.txt_100_4_100__A_I_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__A_I_C_index") app = Flask(__name__) +dict_model = { + "FR_AIC":("./outputs/FR_MODEL_2/FR.txt_100_4_100__A_I_C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__A_I_C_index"), + "FR_C":("./outputs/FR_MODEL_2/FR.txt_100_4_100__C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__C_index"), + "FR_AC":("./outputs/FR_MODEL_2/FR.txt_100_4_100__A_C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__A_C_index"), + "FR_IC":("./outputs/FR_MODEL_2/FR.txt_100_4_100__I_C.h5","./outputs/FR_MODEL_2/FR.txt_100_4_100__I_C_index"), + + "GB_AIC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__A_I_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__A_I_C_index"), + "GB_C":("./outputs/GB_MODEL_2/GB.txt_100_4_100__C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__C_index"), + "GB_AC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__A_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__A_C_index"), + "GB_IC":("./outputs/GB_MODEL_2/GB.txt_100_4_100__I_C.h5","./outputs/GB_MODEL_2/GB.txt_100_4_100__I_C_index") +} + +MODEL = "FR_AIC" +LANG = "fr" +NER = "spacy" + + +geocoder = Geocoder(*dict_model[MODEL]) +g_t = TextGeocoder(geocoder,NER,LANG,heuristic_mean) + @app.route('/') def home(): toponym = request.args.get("top", "") @@ -23,7 +43,7 @@ def text(): def geocode(): if request.method == 'POST': text = request.form["text"] - g_t = TextGeocoder(geocoder,"spacy","en",heuritic_mean) + results = g_t.geocode(g_t.extract_geo_entities(text)) html_, pos_ = "", 0 @@ -41,4 +61,4 @@ def geocode(): if __name__ == "__main__": - app.run(host="0.0.0.0",debug=True) \ No newline at end of file + app.run(host="0.0.0.0") \ No newline at end of file