From c0d73c7576667d673853d90bd0b40e6dc84e5fda Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Tue, 23 Mar 2021 16:14:52 +0100 Subject: [PATCH] add option for saving the best model each epoch --- ...g_v3.json => argument_train_geocoder.json} | 4 ++-- .../toponym_combination_embedding.json | 20 ------------------- .../toponym_combination_embedding_v2.json | 20 ------------------- train_geocoder.py | 11 +++++----- wikipediageocoding.ipynb | 4 ++-- 5 files changed, 10 insertions(+), 49 deletions(-) rename parser_config/{toponym_combination_embedding_v3.json => argument_train_geocoder.json} (92%) delete mode 100644 parser_config/toponym_combination_embedding.json delete mode 100644 parser_config/toponym_combination_embedding_v2.json diff --git a/parser_config/toponym_combination_embedding_v3.json b/parser_config/argument_train_geocoder.json similarity index 92% rename from parser_config/toponym_combination_embedding_v3.json rename to parser_config/argument_train_geocoder.json index 12bfcef..3fcd22c 100644 --- a/parser_config/toponym_combination_embedding_v3.json +++ b/parser_config/argument_train_geocoder.json @@ -16,7 +16,7 @@ { "short": "-d", "long": "--dimension", "type": "int", "default": 256 }, { "short": "-l", "long": "--lstm-layer", "type": "int", "default": 2, "choices": [1, 2] }, { "long": "--tokenization-method", "type": "str", "default": "char-level", "choices": ["char-level", "word-level", "bert"] }, - { "long": "--previous-state", "type": "str", "help": "If the model was trained before, give the path here" } - + { "long": "--previous-state", "type": "str", "help": "If the model was trained before, give the path here" }, + { "long": "--save-best-model", "action": "store_true" } ] } \ No newline at end of file diff --git a/parser_config/toponym_combination_embedding.json b/parser_config/toponym_combination_embedding.json deleted file mode 100644 index 260d6ec..0000000 --- a/parser_config/toponym_combination_embedding.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "description": "Toponym Combination", - "args": [ - { "short": "geoname_input", "help": "Filepath of the Geonames file you want to use." }, - { "short": "geoname_hierachy_input", "help": "Filepath of the Geonames file you want to use." }, - { "short": "-v", "long": "--verbose", "action": "store_true" }, - { "short": "-i", "long": "--inclusion", "action": "store_true" }, - { "short": "-a", "long": "--adjacency", "action": "store_true" }, - { "short": "-w", "long": "--wikipedia-cooc", "action": "store_true" }, - { "long": "--wikipedia-cooc-fn","help":"Cooccurrence data filename"}, - { "long": "--cooc-sample-size", "type": "int", "default": 1 }, - {"long": "--adjacency-iteration", "type":"int","default":1}, - { "short": "-n", "long": "--ngram-size", "type": "int", "default": 4 }, - { "long": "--ngram-word2vec-iter", "type": "int", "default": 50 }, - { "short": "-t", "long": "--tolerance-value", "type": "float", "default": 0.002 }, - { "short": "-e", "long": "--epochs", "type": "int", "default": 100 }, - { "short": "-d", "long": "--dimension", "type": "int", "default": 256 }, - { "long": "--admin_code_1", "default": "None" } - ] -} \ No newline at end of file diff --git a/parser_config/toponym_combination_embedding_v2.json b/parser_config/toponym_combination_embedding_v2.json deleted file mode 100644 index 345c1d7..0000000 --- a/parser_config/toponym_combination_embedding_v2.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "description": "Toponym Combination", - "args": [ - { "short": "geoname_input", "help": "Filepath of the Geonames file you want to use." }, - { "short": "geoname_hierachy_input", "help": "Filepath of the Geonames file you want to use." }, - { "short": "-v", "long": "--verbose", "action": "store_true" }, - { "short": "-i", "long": "--inclusion", "action": "store_true" }, - { "short": "-a", "long": "--adjacency", "action": "store_true" }, - { "short": "-w", "long": "--wikipedia-cooc", "action": "store_true" }, - { "long": "--wikipedia-cooc-fn","help":"Cooccurrence data filename"}, - { "long": "--cooc-sample-size", "type": "int", "default": 1 }, - {"long": "--adjacency-iteration", "type":"int","default":1}, - { "short": "-n", "long": "--ngram-size", "type": "int", "default": 4 }, - { "long": "--ngram-word2vec-iter", "type": "int", "default": 50 }, - { "short": "-t", "long": "--tolerance-value", "type": "float", "default": 100 }, - { "short": "-e", "long": "--epochs", "type": "int", "default": 100 }, - { "short": "-d", "long": "--dimension", "type": "int", "default": 256 }, - { "long": "--admin_code_1", "default": "None" } - ] -} \ No newline at end of file diff --git a/train_geocoder.py b/train_geocoder.py index b83c3d6..eaa5193 100644 --- a/train_geocoder.py +++ b/train_geocoder.py @@ -37,7 +37,7 @@ except: print("NO GPU FOUND...") #Â COMMAND ARGS -args = ConfigurationReader("./parser_config/toponym_combination_embedding_v3.json")\ +args = ConfigurationReader("parser_config/argument_train_geocoder.json")\ .parse_args()#("IGN ../data/IGN/IGN_inclusion.csv ../data/IGN/IGN_adjacent_corrected.csv ../data/IGN/IGN_cooc.csv -i -w -a -n 4 --ngram-word2vec-iter 1".split()) # @@ -50,6 +50,7 @@ ACCURACY_TOLERANCE = args.tolerance_value EPOCHS = args.epochs WORDVEC_ITER = args.ngram_word2vec_iter EMBEDDING_DIM = args.dimension +save_best_only = args.save_best_model ################################################# ########## FILENAME VARIABLE #################### ################################################# @@ -201,8 +202,8 @@ print(model.summary()) ################################# TRAINING LAUNCH ########################################### ############################################################################################# -checkpoint = ModelCheckpoint(MODEL_OUTPUT_FN + ".part", monitor='loss', verbose=1, - save_best_only=True, mode='auto', period=1) +checkpoint = ModelCheckpoint(MODEL_OUTPUT_FN , monitor='loss', verbose=1, + save_best_only=save_best_only, mode='auto', period=1) epoch_timer = EpochTimer(HISTORY_FN) @@ -213,8 +214,8 @@ history = model.fit(training_generator,verbose=True, callbacks=[checkpoint,epoch_timer],epochs=EPOCHS) - -model.save(MODEL_OUTPUT_FN) +if not save_best_only: + model.save(MODEL_OUTPUT_FN) #Â Erase Model Checkpoint file if os.path.exists(MODEL_OUTPUT_FN + ".part"): diff --git a/wikipediageocoding.ipynb b/wikipediageocoding.ipynb index 7dceba3..91dc00b 100644 --- a/wikipediageocoding.ipynb +++ b/wikipediageocoding.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ -- GitLab