Skip to content
Snippets Groups Projects
Commit 44940262 authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

[FIX] add doc2vec_workers

parent 2cae8dba
No related branches found
No related tags found
No related merge requests found
......@@ -91,7 +91,7 @@ for columnInput in [columnText]:
features_techniques = [
('counter', extractor.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )),
('tf_idf', extractor.tf_idf(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures)),
('doc2vec', extractor.doc2vec(max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm))]
('doc2vec', extractor.doc2vec(max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm, doc2vec_workers))]
......
......@@ -61,7 +61,7 @@ class feature_extractor:
def doc2vec(self, max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm):
def doc2vec(self, max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm, doc2vec_workers):
nlp = spacy.load("fr_core_news_sm")
stopWords = set(stopwords.words('french'))
......@@ -84,7 +84,7 @@ class feature_extractor:
#Tag test set
tagged_test = [TaggedDocument(words=tokenize_fr_text(_d), tags = [str(i)]) for i, _d in enumerate(self.docs_test)]
model = Doc2Vec(vector_size=doc2vec_vec_size, min_count = doc2vec_min_count, dm = doc2vec_dm)
model = Doc2Vec(vector_size=doc2vec_vec_size, min_count = doc2vec_min_count, dm = doc2vec_dm, workers = doc2vec_workers)
model.build_vocab(tagged_tr)
model.train(tagged_tr, total_examples=model.corpus_count, epochs = max_epochs)
......
......@@ -6,5 +6,5 @@ doc2vec_vec_size = 700
max_epochs = 10
doc2vec_min_count = 12
doc2vec_dm = 0
doc2vec_workers = 4
doc2vec_workers = 8
min_word_per_article = 25
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment