From c6cd0e182eb44dc9dc8f193efcacb125b02171c2 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Fri, 24 Jan 2020 10:55:01 +0100 Subject: [PATCH] DEBUG --- combination_embeddings.py | 16 ++++++++++++---- evalgeonamesembeddings.py | 10 +++++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/combination_embeddings.py b/combination_embeddings.py index 10b53d4..c459f59 100644 --- a/combination_embeddings.py +++ b/combination_embeddings.py @@ -50,7 +50,7 @@ logging.basicConfig( ) chrono = Chronometer() -args = ConfigurationReader("./parser_config/toponym_combination_embedding.json").parse_args("-i -a -n 2 -t 0.002 -e 5 -m CNN data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split()) +args = ConfigurationReader("./parser_config/toponym_combination_embedding.json").parse_args()#("-i -a -n 2 -t 0.002 -e 5 -m CNN data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split()) GEONAME_FN = args.geoname_input GEONAMES_HIERARCHY_FN = args.geoname_hierachy_input @@ -85,7 +85,8 @@ bounds = filtered.dissolve("i").bounds.values[0] rel_dict ={} if args.adjacency: - fn = "{0}_adjacency.json".format(GEONAME_FN.split("/")[-1]) + logging.info("Retrieve inclusion relationships ! ") + fn = "data/geonamesData/{0}_adjacency.json".format(GEONAME_FN.split("/")[-1]) if not os.path.exists(fn): g = Grid(*bounds,[360,180]) g.fit_data(filtered) @@ -93,7 +94,9 @@ if args.adjacency: rel_dict.update(dict([[int(i) for i in r.split("|")] for r in g.get_adjacent_relationships()])) json.dump(rel_dict,open(fn,'w')) else: - rel_dict.update(json.load(open(fn,'w'))) + logging.info("Open and load data from previous computation!") + rel_dict.update({int(k):int(v) for k,v in json.load(open(fn)).items()}) + logging.info("{0} adjacency relationships retrieved ! ".format(len(rel_dict))) if args.inclusion: # RETRIEVE INCLUSION RELATIONSHIPS @@ -176,6 +179,11 @@ def accuracy_at_k(y_true, y_pred): return K.size(fit[:,0])/K.size(y_pred),K.size(fit[:,1])/K.size(y_pred) name = "{0}_{1}_{2}_{3}".format(GEONAME_FN.split("/")[-1],EPOCHS,NGRAM_SIZE,ACCURACY_TOLERANCE) +if args.adjacency: + name+="_A" +if args.inclusion: + name+="_I" + logging.info("Generating N-GRAM Embedding...") embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim) logging.info("Embedding generated !") @@ -212,7 +220,7 @@ if CONV : input_2 = Input(shape=(max_len,)) #input_3 = Input(shape=(1,)) - embedding_layer = Embedding(num_words, embedding_dim,input_length=max_len, trainable=True) + embedding_layer = Embedding(num_words, embedding_dim,input_length=max_len, weights=[embedding_weights],trainable=False) x1 = Conv1D(filters=32, kernel_size=3, activation='relu')(embedding_layer(input_1)) x1 = Dropout(0.5)(x1) diff --git a/evalgeonamesembeddings.py b/evalgeonamesembeddings.py index bf2bc7d..c7d346d 100644 --- a/evalgeonamesembeddings.py +++ b/evalgeonamesembeddings.py @@ -43,7 +43,7 @@ df["geometry"] = df["latitude longitude".split()].apply(lambda x:Point(x.longitu gdf = gpd.GeoDataFrame(df) # Select a sample that concerns the departement "La Manche" -manche_gdf = gdf[gdf.admin2_code == "50"] +manche_gdf = gdf[gdf.admin2_code == "50"].copy() df =pd.DataFrame([get_data(fn) for fn in fns]) @@ -61,10 +61,10 @@ def get_pearsons(model): return pearsonr(geodesic_d , embeddings_d) # Compute Pearson correlation and associated p-value df["pearson"] = df.filepath.apply(lambda x : get_pearsons(gensim.models.KeyedVectors.load(x))[0]) - -df.plot.scatter(x="walk_length", y="pearson") +df.fillna(0,inplace=True) +df.plot.scatter(x="walk_length", y="pearson",c="noise",cmap='inferno') plt.show() -df.plot.scatter(x="number_of_walks", y="pearson") +df.plot.scatter(x="number_of_walks", y="pearson",c="noise",cmap='inferno') plt.show() -df.plot.scatter(x="word2vec_window_size", y="pearson") +df.plot.scatter(x="word2vec_window_size", y="pearson",c="noise",cmap='inferno') plt.show() \ No newline at end of file -- GitLab