diff --git a/train_geocoder.py b/train_geocoder.py
index 2ee31cd9271798f7c3e0d144fa4fa39d28a5a980..b83c3d6dbc4770eed7c86fecd43d622235a94bfc 100644
--- a/train_geocoder.py
+++ b/train_geocoder.py
@@ -112,8 +112,7 @@ if args.tokenization_method == "bert":
  # Identify all ngram available
 pairs_of_toponym.toponym.apply(lambda x : index.split_and_add(x))
 pairs_of_toponym.toponym_context.apply(lambda x : index.split_and_add(x))
-print(len(index.ngram_index))
-index.filter_top_ngram(10000)
+
 
 num_words = len(index.index_ngram) # necessary for the embedding matrix