From 6e42d70f1844150f0327a436bca856e7a527de01 Mon Sep 17 00:00:00 2001 From: Jacques Fize <jacques.fize@insa-lyon.fr> Date: Tue, 3 Nov 2020 11:44:25 +0100 Subject: [PATCH] UPD --- README.md | 3 +-- generate_dataset.py | 1 + .../toponym_combination_embedding_v3.json | 3 ++- train_geocoder_v2.py | 16 ++++++++++++---- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f402d35..f5c5288 100644 --- a/README.md +++ b/README.md @@ -187,8 +187,7 @@ grid = GridSearchModel(\ grid.run() ``` - -#Â Authors and Acknowledgment +# Authors and Acknowledgment Proposed by **Jacques Fize**, **Ludovic Moncla** and **Bruno Martins** diff --git a/generate_dataset.py b/generate_dataset.py index 75713f0..788fe37 100644 --- a/generate_dataset.py +++ b/generate_dataset.py @@ -23,6 +23,7 @@ parser.add_argument("--split-method", default="per_pair", type=str, choices="per args = parser.parse_args()#("../data/geonamesData/FR.txt ../data/wikipedia/cooccurrence_FR.txt ../data/geonamesData/hierarchy.txt".split()) PREFIX = args.geonames_dataset.split("/")[-1].split(".")[0] # Ouch ! +PREFIX = PREFIX + "_" + args.split_method # Â LOAD DATA geonames_data = read_geonames(args.geonames_dataset) diff --git a/parser_config/toponym_combination_embedding_v3.json b/parser_config/toponym_combination_embedding_v3.json index 5053a89..fac6e68 100644 --- a/parser_config/toponym_combination_embedding_v3.json +++ b/parser_config/toponym_combination_embedding_v3.json @@ -13,6 +13,7 @@ { "long": "--ngram-word2vec-iter", "type": "int", "default": 50 }, { "short": "-t", "long": "--tolerance-value", "type": "float", "default": 100 }, { "short": "-e", "long": "--epochs", "type": "int", "default": 100 }, - { "short": "-d", "long": "--dimension", "type": "int", "default": 256 } + { "short": "-d", "long": "--dimension", "type": "int", "default": 256 }, + { "short": "-l", "long": "--lstm-layer", "type": "int", "default": 2,"choices":[1,2] } ] } \ No newline at end of file diff --git a/train_geocoder_v2.py b/train_geocoder_v2.py index e5138f8..e67f311 100644 --- a/train_geocoder_v2.py +++ b/train_geocoder_v2.py @@ -62,7 +62,7 @@ if args.adjacency: PREFIX_OUTPUT_FN += "_A" if args.inclusion: PREFIX_OUTPUT_FN += "_I" -if args.wikipedia_cooc: +if args.wikipedia: PREFIX_OUTPUT_FN += "_C" MODEL_OUTPUT_FN = "outputs/{0}.h5".format(PREFIX_OUTPUT_FN) @@ -170,9 +170,15 @@ x1 = embedding_layer(input_1) x2 = embedding_layer(input_2) #Â Each LSTM learn on a permutation of the input toponyms -x1 = Bidirectional(LSTM(100))(x1) -x2 = Bidirectional(LSTM(100))(x2) -x = concatenate([x1,x2]) +if args.lstm_layer == 2: + x1 = Bidirectional(LSTM(100))(x1) + x2 = Bidirectional(LSTM(100))(x2) + x = concatenate([x1,x2]) +else: + lstm_unique_layer = Bidirectional(LSTM(100)) + x1 = lstm_unique_layer(x1) + x2 = lstm_unique_layer(x2) + x = concatenate([x1,x2]) x1 = Dense(500,activation="relu")(x) x1 = Dense(500,activation="relu")(x1) @@ -188,6 +194,8 @@ output_coord = concatenate([output_lon,output_lat],name="output_coord") model = Model(inputs = [input_1,input_2], outputs = output_coord)#input_3 model.compile(loss={"output_coord":haversine_tf_1circle}, optimizer='adam',metrics={"output_coord":accuracy_k(ACCURACY_TOLERANCE)}) +print("Neural Network Architecture : ") +print(model.summary()) ############################################################################################# ################################# TRAINING LAUNCH ########################################### ############################################################################################# -- GitLab