Skip to content
Snippets Groups Projects
Commit 04d402c0 authored by Jacques Fize's avatar Jacques Fize
Browse files

DEBUG + ADD DEMO APP

parent 70d5b190
No related branches found
No related tags found
No related merge requests found
...@@ -98,7 +98,7 @@ logging.basicConfig( ...@@ -98,7 +98,7 @@ logging.basicConfig(
chrono = Chronometer() chrono = Chronometer()
args = ConfigurationReader("./parser_config/toponym_combination_embedding.json")\ args = ConfigurationReader("./parser_config/toponym_combination_embedding.json")\
.parse_args("-n 4 -t 0.002 -e 20 -a -w -i data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split()) .parse_args()#("-n 4 -t 0.002 -e 20 -a -w -i data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split())
# Initialisee CONSTANTS # Initialisee CONSTANTS
GEONAME_FN = args.geoname_input GEONAME_FN = args.geoname_input
...@@ -221,7 +221,7 @@ geoname2encodedname = dict(filtered["geonameid encode_name".split()].values) #in ...@@ -221,7 +221,7 @@ geoname2encodedname = dict(filtered["geonameid encode_name".split()].values) #in
if args.wikipedia_cooc: if args.wikipedia_cooc:
geoname2encodedname.update(extension) geoname2encodedname.update(extension)
index.save("outputs/index_{0}gram_{1}".format(NGRAM_SIZE,GEONAME_FN.split("/")[-1]))
logging.info("Done !") logging.info("Done !")
#CLEAR RAM #CLEAR RAM
...@@ -291,6 +291,9 @@ if args.inclusion: ...@@ -291,6 +291,9 @@ if args.inclusion:
if args.wikipedia_cooc: if args.wikipedia_cooc:
name += "_C" name += "_C"
index.save("outputs/"+name+"_index")
# NGRAM EMBDEDDING # NGRAM EMBDEDDING
logging.info("Generating N-GRAM Embedding...") logging.info("Generating N-GRAM Embedding...")
embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim,iter=50) embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim,iter=50)
......
from keras.models import load_model
import tensorflow as tf
import keras.backend as K
from utils import NgramIndex
from flask import Flask
ACCURACY_TOLERANCE = 0.002
def accuracy_at_k(y_true, y_pred):
"""
Metrics use to measure the accuracy of the coordinate prediction. But in comparison to the normal accuracy metrics, we add a tolerance threshold due to the (quasi) impossible
task for neural network to obtain the exact coordinate.
Parameters
----------
y_true : tf.Tensor
truth data
y_pred : tf.Tensor
predicted output
"""
global ACCURACY_TOLERANCE
diff = tf.abs(y_true - y_pred)
fit = tf.where(tf.less(diff,ACCURACY_TOLERANCE))
return K.size(fit[:,0])/K.size(y_pred),K.size(fit[:,1])/K.size(y_pred)
from tensorflow.python.keras.backend import set_session
from tensorflow.python.keras.models import load_model
sess = None
graph = None
class Geocoder(object):
"""
>>>geocoder = Geocoder("LSTM_FR.txt_20_4_0.002_None_A_I_C.h5","index_4gram_FR_backup.txt")
>>>lon,lat = geocoder.get_coord("Paris","New-York")
>>>lon,lat = geocoder.wgs_coord(lon,lat)
>>>geocoder.plot_coord("Paris,New-York",lat,lon)
if you want an interactive map using leafletJS, set to True the `interactive_map` parameter of `Geocoder.plot_coord()`
"""
def __init__(self,keras_model_fn,ngram_index_file):
global sess
global graph
sess = tf.Session()
graph = tf.get_default_graph()
set_session(sess)
self.keras_model = load_model(keras_model_fn,custom_objects={"accuracy_at_k":accuracy_at_k})
self.ngram_encoder = NgramIndex.load(ngram_index_file)
def get_coord(self,toponym,context_toponym):
global sess
global graph
p = self.ngram_encoder.complete(self.ngram_encoder.encode(toponym),self.ngram_encoder.max_len)
c = self.ngram_encoder.complete(self.ngram_encoder.encode(context_toponym),self.ngram_encoder.max_len)
with sess.as_default():
with graph.as_default():
lon,lat = self.keras_model.predict([[p],[c]])
return lon[0][0],lat[0][0]
def wgs_coord(self,lon,lat):
return ((lon*360)-180),((lat*180)-90)
def plot_coord(self,toponym,lat,lon,interactive_map=False,**kwargs):
if interactive_map:
import folium
import tempfile
import webbrowser
fp = tempfile.NamedTemporaryFile(delete=False)
m = folium.Map()
folium.Marker([lat, lon], popup=toponym).add_to(m)
m.save(fp.name)
webbrowser.open('file://' + fp.name)
else:
import matplotlib.pyplot as plt
import geopandas
fig, ax = plt.subplots(1,**kwargs)
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world.plot(color='white', edgecolor='black',ax=ax)
ax.plot(lon,lat,marker='o', color='red', markersize=5)
plt.show()
"""geocoder = Geocoder("outputs/LSTM_FR.txt_20_4_0.002_None_A_I_C.h5","outputs/index_4gram_FR_backup.txt")
lon,lat = geocoder.get_coord("Paris","New-York")
lon,lat = geocoder.wgs_coord(lon,lat)
geocoder.plot_coord("Paris,New-York",lat,lon,interactive_map=True)"""
from flask import Flask, escape, request, render_template
app = Flask(__name__)
# IMPORTANT: models have to be loaded AFTER SETTING THE SESSION for keras!
# Otherwise, their weights will be unavailable in the threads after the session there has been set
geocoder = Geocoder("outputs/LSTM_FR.txt_20_4_0.002_None_A_I_C.h5","outputs/index_4gram_FR_backup.txt")
@app.route('/',methods=["GET"])
def display():
toponym = request.args.get("top", "Paris")
c_toponym = request.args.get("c_top", "Cherbourg")
lon,lat = geocoder.get_coord(toponym,c_toponym)
lon,lat = geocoder.wgs_coord(lon,lat)
return render_template("skeleton.html",lat=lat,lon=lon)
app.run(host='0.0.0.0')
\ No newline at end of file
...@@ -15,4 +15,6 @@ keras ...@@ -15,4 +15,6 @@ keras
ngram ngram
shapely shapely
sqlitedict sqlitedict
nltk nltk
\ No newline at end of file folium
flask
# Evaluation process
import gensim
import glob
import re
import gensim
import random
from helpers import *
from scipy.spatial.distance import cosine
from shapely.geometry import Point
from scipy.stats.stats import pearsonr
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
NPAIR = 100000
fns = glob.glob("data/embeddings/*.bin")
def get_data(fn):
data = [int(x) for x in re.findall("\d+",fn)]
if not len(data) == 4:
return {"embedding_size":data[0],
"walk_length":data[1],
"number_of_walks":data[2],
"word2vec_window_size":data[3],
"filepath":fn,
"noise":data[4]
}
#raise Exception("filename should have 4 integers")
return {
"embedding_size":data[0],
"walk_length":data[1],
"number_of_walks":data[2],
"word2vec_window_size":data[3],
"filepath":fn
}
df = read_geonames("./data/geonamesData/FR.txt")
df["geometry"] = df["latitude longitude".split()].apply(lambda x:Point(x.longitude,x.latitude),axis=1)
# Create GeoDataFrame for faster spatial comparison operations
gdf = gpd.GeoDataFrame(df)
# Select a sample that concerns the departement "La Manche"
manche_gdf = gdf[gdf.admin2_code == "50"].copy()
df =pd.DataFrame([get_data(fn) for fn in fns])
def get_pearsons(model):
manche_gdf.loc[:,"geometry_centroid"]=manche_gdf.centroid
coords = dict(manche_gdf.loc[:,"geonameid geometry_centroid".split()].values)
places = list(coords.keys())
geodesic_d = []
embeddings_d = []
for i in tqdm(range(NPAIR),disable=True):
placeA=random.choice(places)
placeB=random.choice(places)
geodesic_d.append(coords[placeA].distance(coords[placeB]))
embeddings_d.append(cosine(model.wv[str(placeA)],model.wv[str(placeB)]))
return pearsonr(geodesic_d , embeddings_d) # Compute Pearson correlation and associated p-value
df["pearson"] = df.filepath.apply(lambda x : get_pearsons(gensim.models.KeyedVectors.load(x))[0])
df.fillna(0,inplace=True)
df.plot.scatter(x="walk_length", y="pearson",c="noise",cmap='inferno')
plt.show()
df.plot.scatter(x="number_of_walks", y="pearson",c="noise",cmap='inferno')
plt.show()
df.plot.scatter(x="word2vec_window_size", y="pearson",c="noise",cmap='inferno')
plt.show()
\ No newline at end of file
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=auto, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Geocoder Interface</title>
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<!-- Load Leaflet -->
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.3.4/dist/leaflet.css"
integrity="sha512-puBpdR0798OZvTTbP4A8Ix/l+A4dHDD0DGqYW6RQ+9jxkRFclaxxQb/SJAWZfWAkuyeQUytO7+7N4QKrDh+drA=="
crossorigin="" />
<script src="https://unpkg.com/leaflet@1.3.4/dist/leaflet.js"
integrity="sha512-nMMmRyTVoLYqjP9hrbed9S+FzjZHW5gY1TWCHA5ckwXZBadntCNs8kEqAWdrb9O7rxbCaA4lKTIWjDXZxflOcA=="
crossorigin=""></script>
</head>
<body>
<style>
body {
}
#mapid {
height: 400px;
width: 100%;
}
</style>
<main class="container-fluid">
<h1 style="text-align: center;color:white;text-shadow: 1px 1px 2px black;background-color: #999;">Geocoder Demo</h1>
<div id="mapid"></div>
<div class="container" style="background-color: white;padding: 5px;">
<h2>Input</h2>
<form action="/" method="get">
<div class="form-group">
<label for="formGroupExampleInput">Toponym</label>
<input type="text" class="form-control" name="top"
placeholder="Paris">
</div>
<div class="form-group">
<label for="formGroupExampleInput2">Context Toponym</label>
<input type="text" class="form-control" name="c_top"
placeholder="Cherbourg">
</div>
<button type="submit" class="btn btn-primary">Get Coords !</button>
</form>
</div>
</main>
<!-- JS SCRIPTS -->
<script src="https://code.jquery.com/jquery-3.4.1.slim.min.js"
integrity="sha384-J6qa4849blE2+poT4WnyKhv5vZF5SrPo0iEjwBvKU7imGFAV0wwj1yYfoRSJoZ+n"
crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js"
integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo"
crossorigin="anonymous"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.min.js"
integrity="sha384-wfSDF2E50Y2D1uUdj0O3uMBJnjuUD4Ih7YwaYd1iqfktj0Uod8GCExl3Og8ifwB6"
crossorigin="anonymous"></script>
<script>
// Initialize the map
// [50, -0.1] are the latitude and longitude
// 4 is the zoom
// mapid is the id of the div where the map will appear
var mymap = L
.map('mapid')
.setView([50, -0.1], 4);
// Add a tile to the map = a background. Comes from OpenStreetmap
L.tileLayer(
'http://tile.stamen.com/toner/{z}/{x}/{y}.png', {
attribution: 'Map data &copy; <a href="https://www.openstreetmap.org/">OpenStreetMap</a>',
maxZoom: 6,
}).addTo(mymap);
var marker = L.marker([{{lat}}, {{lon}}]).addTo(mymap);
</script>
</body>
</html>
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment