From ebb505173fd9cee3a4553c17b96de012f770dbed Mon Sep 17 00:00:00 2001 From: Jacques Fize <jacques.fize@insa-lyon.fr> Date: Tue, 16 Mar 2021 10:26:38 +0100 Subject: [PATCH] Update train_geocoder.py --- train_geocoder.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/train_geocoder.py b/train_geocoder.py index 2ee31cd..b83c3d6 100644 --- a/train_geocoder.py +++ b/train_geocoder.py @@ -112,8 +112,7 @@ if args.tokenization_method == "bert": # Identify all ngram available pairs_of_toponym.toponym.apply(lambda x : index.split_and_add(x)) pairs_of_toponym.toponym_context.apply(lambda x : index.split_and_add(x)) -print(len(index.ngram_index)) -index.filter_top_ngram(10000) + num_words = len(index.index_ngram) # necessary for the embedding matrix -- GitLab