From 44940262a2979b00d610f804ac1ba339b158ff9a Mon Sep 17 00:00:00 2001 From: Ludovic Moncla <ludovic.moncla@insa-lyon.fr> Date: Sun, 13 Jun 2021 09:47:47 +0200 Subject: [PATCH] [FIX] add doc2vec_workers --- experimentsClassicClassifiers.py | 2 +- features_extractor.py | 4 ++-- settings.conf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/experimentsClassicClassifiers.py b/experimentsClassicClassifiers.py index 3208fb0..530e60d 100644 --- a/experimentsClassicClassifiers.py +++ b/experimentsClassicClassifiers.py @@ -91,7 +91,7 @@ for columnInput in [columnText]: features_techniques = [ ('counter', extractor.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )), ('tf_idf', extractor.tf_idf(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures)), - ('doc2vec', extractor.doc2vec(max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm))] + ('doc2vec', extractor.doc2vec(max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm, doc2vec_workers))] diff --git a/features_extractor.py b/features_extractor.py index e807b08..18bf313 100644 --- a/features_extractor.py +++ b/features_extractor.py @@ -61,7 +61,7 @@ class feature_extractor: - def doc2vec(self, max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm): + def doc2vec(self, max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm, doc2vec_workers): nlp = spacy.load("fr_core_news_sm") stopWords = set(stopwords.words('french')) @@ -84,7 +84,7 @@ class feature_extractor: #Tag test set tagged_test = [TaggedDocument(words=tokenize_fr_text(_d), tags = [str(i)]) for i, _d in enumerate(self.docs_test)] - model = Doc2Vec(vector_size=doc2vec_vec_size, min_count = doc2vec_min_count, dm = doc2vec_dm) + model = Doc2Vec(vector_size=doc2vec_vec_size, min_count = doc2vec_min_count, dm = doc2vec_dm, workers = doc2vec_workers) model.build_vocab(tagged_tr) model.train(tagged_tr, total_examples=model.corpus_count, epochs = max_epochs) diff --git a/settings.conf b/settings.conf index eebf815..023215e 100644 --- a/settings.conf +++ b/settings.conf @@ -6,5 +6,5 @@ doc2vec_vec_size = 700 max_epochs = 10 doc2vec_min_count = 12 doc2vec_dm = 0 -doc2vec_workers = 4 +doc2vec_workers = 8 min_word_per_article = 25 -- GitLab