diff --git a/.gitattributes b/.gitattributes old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/combination_embeddings.py b/combination_embeddings.py old mode 100644 new mode 100755 index d2d13a5979294b0e5a0ef063839be6a584baeb2c..ab4a251956e774cd6f7d7a53ad24268711661af0 --- a/combination_embeddings.py +++ b/combination_embeddings.py @@ -73,21 +73,41 @@ def get_new_ids(cooc_data,id_first_value): topo_id[id_]=interlink return topo_id -def accuracy_at_k(y_true, y_pred): - """ - Metrics use to measure the accuracy of the coordinate prediction. But in comparison to the normal accuracy metrics, we add a tolerance threshold due to the (quasi) impossible - task for neural network to obtain the exact coordinate. - - Parameters - ---------- - y_true : tf.Tensor - truth data - y_pred : tf.Tensor - predicted output - """ - diff = tf.abs(y_true - y_pred) - fit = tf.where(tf.less(diff,ACCURACY_TOLERANCE)) - return K.size(fit[:,0])/K.size(y_pred),K.size(fit[:,1])/K.size(y_pred) +def lat_accuracy(LAT_TOL =1/180.): + def accuracy_at_k_lat(y_true, y_pred): + """ + Metrics use to measure the accuracy of the coordinate prediction. But in comparison to the normal accuracy metrics, we add a tolerance threshold due to the (quasi) impossible + task for neural network to obtain the exact coordinate. + + Parameters + ---------- + y_true : tf.Tensor + truth data + y_pred : tf.Tensor + predicted output + """ + diff = tf.abs(y_true - y_pred) + fit = tf.dtypes.cast(tf.less(diff,LAT_TOL),tf.int64) + return tf.reduce_sum(fit)/tf.size(y_pred,out_type=tf.dtypes.int64) + return accuracy_at_k_lat + +def lon_accuracy(LON_TOL=1/360.): + def accuracy_at_k_lon(y_true, y_pred): + """ + Metrics use to measure the accuracy of the coordinate prediction. But in comparison to the normal accuracy metrics, we add a tolerance threshold due to the (quasi) impossible + task for neural network to obtain the exact coordinate. + + Parameters + ---------- + y_true : tf.Tensor + truth data + y_pred : tf.Tensor + predicted output + """ + diff = tf.abs(y_true - y_pred) + fit = tf.dtypes.cast(tf.less(diff,LON_TOL),tf.int64) + return tf.reduce_sum(fit)/tf.size(y_pred,out_type=tf.dtypes.int64) + return accuracy_at_k_lon # LOGGING CONF logging.basicConfig( @@ -98,7 +118,7 @@ logging.basicConfig( chrono = Chronometer() args = ConfigurationReader("./parser_config/toponym_combination_embedding.json")\ - .parse_args()#("-n 4 -t 0.002 -e 20 -a -w -i data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split()) + .parse_args("-n 4 -t 0.002 -e 20 -i data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split()) # Initialisee CONSTANTS GEONAME_FN = args.geoname_input @@ -108,6 +128,7 @@ ACCURACY_TOLERANCE = args.tolerance_value EPOCHS = args.epochs ITER_ADJACENCY = args.adjacency_iteration COOC_SAMPLING_NUMBER = 3 +WORDVEC_ITER = 50 # check for output dir if not os.path.exists("outputs/"): @@ -296,7 +317,7 @@ index.save("outputs/"+name+"_index") # NGRAM EMBDEDDING logging.info("Generating N-GRAM Embedding...") -embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim,iter=50) +embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim,iter=WORDVEC_ITER) logging.info("Embedding generated !") # DEEP MODEL @@ -326,7 +347,7 @@ output_lat = Dense(1,activation="sigmoid",name="Output_LAT")(x2) model = Model(inputs = [input_1,input_2], outputs = [output_lon,output_lat])#input_3 -model.compile(loss=['mean_squared_error','mean_squared_error'], optimizer='adam',metrics=[accuracy_at_k]) +model.compile(loss=['mean_squared_error','mean_squared_error'], optimizer='adam',metrics={"Output_LON":lon_accuracy(),"Output_LAT":lat_accuracy()}) history = model.fit(x=[X_1_train,X_2_train], y=[y_lon_train,y_lat_train], verbose=True, batch_size=100, diff --git a/documentation/imgs/first_approach.png b/documentation/imgs/first_approach.png old mode 100644 new mode 100755 index 297c1a5025d993acfae6e501d88acac24dfc7e59..4b83d1184fc92e154510c934ecea41b4e80455ce Binary files a/documentation/imgs/first_approach.png and b/documentation/imgs/first_approach.png differ diff --git a/documentation/imgs/second_approach.png b/documentation/imgs/second_approach.png old mode 100644 new mode 100755 index e5e693fbaf11113de2673b366d4bf603047239c2..bdff5964c3796980e518eb0f9aa724bd836e0ca6 Binary files a/documentation/imgs/second_approach.png and b/documentation/imgs/second_approach.png differ diff --git a/documentation/imgs/third_approach.png b/documentation/imgs/third_approach.png old mode 100644 new mode 100755 index d96596ad9ee35b8ada81b0a68535e593ed8e1a0e..ea8e6aaa02e19084a61e346ebacff25139cc63cb Binary files a/documentation/imgs/third_approach.png and b/documentation/imgs/third_approach.png differ diff --git a/embeddings_lat_lon_type.py b/embeddings_lat_lon_type.py old mode 100644 new mode 100755 diff --git a/extractDataFromWikidata.py b/extractDataFromWikidata.py old mode 100644 new mode 100755 diff --git a/extractLearningDataset.py b/extractLearningDataset.py old mode 100644 new mode 100755 diff --git a/geonames_embedding.py b/geonames_embedding.py old mode 100644 new mode 100755 diff --git a/helpers.py b/helpers.py old mode 100644 new mode 100755 diff --git a/models.py b/models.py old mode 100644 new mode 100755 diff --git a/parser_config/embeddings_lat_lon.json b/parser_config/embeddings_lat_lon.json old mode 100644 new mode 100755 diff --git a/parser_config/toponym_combination_embedding.json b/parser_config/toponym_combination_embedding.json old mode 100644 new mode 100755 diff --git a/predict_toponym_coordinates.py b/predict_toponym_coordinates.py old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 index 798a0142faf7dba8870eb329f0d2d6731c39784a..4eb9e43ed0fb7aefddd5bdfe87ddf049a489394c --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -pyroutelib3 +#pyroutelib3 node2vec -osrm +#osrm geopandas pandas numpy @@ -18,3 +18,4 @@ sqlitedict nltk folium flask +numba diff --git a/scripts/classificationEmbeddings.py b/scripts/classificationEmbeddings.py old mode 100644 new mode 100755 diff --git a/scripts/evalgeonamesembeddings.py b/scripts/evalgeonamesembeddings.py old mode 100644 new mode 100755 diff --git a/scripts/evaluation-dbpedia-types.py b/scripts/evaluation-dbpedia-types.py old mode 100644 new mode 100755 diff --git a/scripts/evaluation-geonames.py b/scripts/evaluation-geonames.py old mode 100644 new mode 100755 diff --git a/scripts/extractWikidataClasseName.py b/scripts/extractWikidataClasseName.py old mode 100644 new mode 100755 diff --git a/scripts/filterDataWithtopNclasse.py b/scripts/filterDataWithtopNclasse.py old mode 100644 new mode 100755 diff --git a/scripts/getEmbeddingGeonamesPlacenames.py b/scripts/getEmbeddingGeonamesPlacenames.py old mode 100644 new mode 100755 diff --git a/scripts/getWikidataTypesNames.py b/scripts/getWikidataTypesNames.py old mode 100644 new mode 100755 diff --git a/templates/cover.css b/templates/cover.css new file mode 100755 index 0000000000000000000000000000000000000000..7c6d33cdd58d82b8936fd0209c691184883d5e67 --- /dev/null +++ b/templates/cover.css @@ -0,0 +1,106 @@ +/* + * Globals + */ + +/* Links */ +a, +a:focus, +a:hover { + color: #fff; +} + +/* Custom default button */ +.btn-secondary, +.btn-secondary:hover, +.btn-secondary:focus { + color: #333; + text-shadow: none; /* Prevent inheritance from `body` */ + background-color: #fff; + border: .05rem solid #fff; +} + + +/* + * Base structure + */ + +html, +body { + height: 100%; + background-color: #333; +} + +body { + display: -ms-flexbox; + display: flex; + color: #fff; + text-shadow: 0 .05rem .1rem rgba(0, 0, 0, .5); + box-shadow: inset 0 0 5rem rgba(0, 0, 0, .5); +} + +.cover-container { + max-width: 42em; +} + + +/* + * Header + */ +.masthead { + margin-bottom: 2rem; +} + +.masthead-brand { + margin-bottom: 0; +} + +.nav-masthead .nav-link { + padding: .25rem 0; + font-weight: 700; + color: rgba(255, 255, 255, .5); + background-color: transparent; + border-bottom: .25rem solid transparent; +} + +.nav-masthead .nav-link:hover, +.nav-masthead .nav-link:focus { + border-bottom-color: rgba(255, 255, 255, .25); +} + +.nav-masthead .nav-link + .nav-link { + margin-left: 1rem; +} + +.nav-masthead .active { + color: #fff; + border-bottom-color: #fff; +} + +@media (min-width: 48em) { + .masthead-brand { + float: left; + } + .nav-masthead { + float: right; + } +} + + +/* + * Cover + */ +.cover { + padding: 0 1.5rem; +} +.cover .btn-lg { + padding: .75rem 1.25rem; + font-weight: 700; +} + + +/* + * Footer + */ +.mastfoot { + color: rgba(255, 255, 255, .5); +} diff --git a/templates/skeleton.html b/templates/skeleton.html old mode 100644 new mode 100755 diff --git a/train_test_split_cooccurrence_data.py b/train_test_split_cooccurrence_data.py old mode 100644 new mode 100755 diff --git a/train_test_split_geonames.py b/train_test_split_geonames.py old mode 100644 new mode 100755 diff --git a/utils.py b/utils.py old mode 100644 new mode 100755 index bd767edd0f9024730e9b4dc322929b590d26e5d2..db250b77474f8e1a135a373b76461dad485f88c1 --- a/utils.py +++ b/utils.py @@ -524,7 +524,6 @@ class Grid(object): for c1 in range(len(self.inter_cells)): if self.inter_cells[c1].contains(a[1],a[2]): self.inter_cells[c1].add_object(*a) - break def get_adjacent_relationships(self,random_iteration=10): """