From ebb505173fd9cee3a4553c17b96de012f770dbed Mon Sep 17 00:00:00 2001
From: Jacques Fize <jacques.fize@insa-lyon.fr>
Date: Tue, 16 Mar 2021 10:26:38 +0100
Subject: [PATCH] Update train_geocoder.py

---
 train_geocoder.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/train_geocoder.py b/train_geocoder.py
index 2ee31cd..b83c3d6 100644
--- a/train_geocoder.py
+++ b/train_geocoder.py
@@ -112,8 +112,7 @@ if args.tokenization_method == "bert":
  # Identify all ngram available
 pairs_of_toponym.toponym.apply(lambda x : index.split_and_add(x))
 pairs_of_toponym.toponym_context.apply(lambda x : index.split_and_add(x))
-print(len(index.ngram_index))
-index.filter_top_ngram(10000)
+
 
 num_words = len(index.index_ngram) # necessary for the embedding matrix
 
-- 
GitLab