diff --git a/README.md b/README.md index f402d3582d2161690e71214d3bfd1af483f09bdd..f5c52880524af61be5a7628a5e0646e81e8514b8 100644 --- a/README.md +++ b/README.md @@ -187,8 +187,7 @@ grid = GridSearchModel(\ grid.run() ``` - -#Â Authors and Acknowledgment +# Authors and Acknowledgment Proposed by **Jacques Fize**, **Ludovic Moncla** and **Bruno Martins** diff --git a/generate_dataset.py b/generate_dataset.py index 75713f0bf799bfbe2c69c44b4cda603258ba0c86..788fe37c40f4822f0745f20f1795837fc74134f9 100644 --- a/generate_dataset.py +++ b/generate_dataset.py @@ -23,6 +23,7 @@ parser.add_argument("--split-method", default="per_pair", type=str, choices="per args = parser.parse_args()#("../data/geonamesData/FR.txt ../data/wikipedia/cooccurrence_FR.txt ../data/geonamesData/hierarchy.txt".split()) PREFIX = args.geonames_dataset.split("/")[-1].split(".")[0] # Ouch ! +PREFIX = PREFIX + "_" + args.split_method # Â LOAD DATA geonames_data = read_geonames(args.geonames_dataset) diff --git a/parser_config/toponym_combination_embedding_v3.json b/parser_config/toponym_combination_embedding_v3.json index 5053a891b52541982cc68f7b156ac3b19e495d37..fac6e68cebb34a0dbe0539318284e39706597756 100644 --- a/parser_config/toponym_combination_embedding_v3.json +++ b/parser_config/toponym_combination_embedding_v3.json @@ -13,6 +13,7 @@ { "long": "--ngram-word2vec-iter", "type": "int", "default": 50 }, { "short": "-t", "long": "--tolerance-value", "type": "float", "default": 100 }, { "short": "-e", "long": "--epochs", "type": "int", "default": 100 }, - { "short": "-d", "long": "--dimension", "type": "int", "default": 256 } + { "short": "-d", "long": "--dimension", "type": "int", "default": 256 }, + { "short": "-l", "long": "--lstm-layer", "type": "int", "default": 2,"choices":[1,2] } ] } \ No newline at end of file diff --git a/train_geocoder_v2.py b/train_geocoder_v2.py index e5138f857ec4783dad182a6dc0ea0d3645c61674..e67f31173f8fd905857c591c11ad9f17d7ac130d 100644 --- a/train_geocoder_v2.py +++ b/train_geocoder_v2.py @@ -62,7 +62,7 @@ if args.adjacency: PREFIX_OUTPUT_FN += "_A" if args.inclusion: PREFIX_OUTPUT_FN += "_I" -if args.wikipedia_cooc: +if args.wikipedia: PREFIX_OUTPUT_FN += "_C" MODEL_OUTPUT_FN = "outputs/{0}.h5".format(PREFIX_OUTPUT_FN) @@ -170,9 +170,15 @@ x1 = embedding_layer(input_1) x2 = embedding_layer(input_2) #Â Each LSTM learn on a permutation of the input toponyms -x1 = Bidirectional(LSTM(100))(x1) -x2 = Bidirectional(LSTM(100))(x2) -x = concatenate([x1,x2]) +if args.lstm_layer == 2: + x1 = Bidirectional(LSTM(100))(x1) + x2 = Bidirectional(LSTM(100))(x2) + x = concatenate([x1,x2]) +else: + lstm_unique_layer = Bidirectional(LSTM(100)) + x1 = lstm_unique_layer(x1) + x2 = lstm_unique_layer(x2) + x = concatenate([x1,x2]) x1 = Dense(500,activation="relu")(x) x1 = Dense(500,activation="relu")(x1) @@ -188,6 +194,8 @@ output_coord = concatenate([output_lon,output_lat],name="output_coord") model = Model(inputs = [input_1,input_2], outputs = output_coord)#input_3 model.compile(loss={"output_coord":haversine_tf_1circle}, optimizer='adam',metrics={"output_coord":accuracy_k(ACCURACY_TOLERANCE)}) +print("Neural Network Architecture : ") +print(model.summary()) ############################################################################################# ################################# TRAINING LAUNCH ########################################### #############################################################################################