Skip to content
Snippets Groups Projects
Commit 44940262 authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

[FIX] add doc2vec_workers

parent 2cae8dba
Branches
No related tags found
No related merge requests found
...@@ -91,7 +91,7 @@ for columnInput in [columnText]: ...@@ -91,7 +91,7 @@ for columnInput in [columnText]:
features_techniques = [ features_techniques = [
('counter', extractor.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )), ('counter', extractor.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )),
('tf_idf', extractor.tf_idf(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures)), ('tf_idf', extractor.tf_idf(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures)),
('doc2vec', extractor.doc2vec(max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm))] ('doc2vec', extractor.doc2vec(max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm, doc2vec_workers))]
......
...@@ -61,7 +61,7 @@ class feature_extractor: ...@@ -61,7 +61,7 @@ class feature_extractor:
def doc2vec(self, max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm): def doc2vec(self, max_epochs, doc2vec_vec_size, doc2vec_min_count , doc2vec_dm, doc2vec_workers):
nlp = spacy.load("fr_core_news_sm") nlp = spacy.load("fr_core_news_sm")
stopWords = set(stopwords.words('french')) stopWords = set(stopwords.words('french'))
...@@ -84,7 +84,7 @@ class feature_extractor: ...@@ -84,7 +84,7 @@ class feature_extractor:
#Tag test set #Tag test set
tagged_test = [TaggedDocument(words=tokenize_fr_text(_d), tags = [str(i)]) for i, _d in enumerate(self.docs_test)] tagged_test = [TaggedDocument(words=tokenize_fr_text(_d), tags = [str(i)]) for i, _d in enumerate(self.docs_test)]
model = Doc2Vec(vector_size=doc2vec_vec_size, min_count = doc2vec_min_count, dm = doc2vec_dm) model = Doc2Vec(vector_size=doc2vec_vec_size, min_count = doc2vec_min_count, dm = doc2vec_dm, workers = doc2vec_workers)
model.build_vocab(tagged_tr) model.build_vocab(tagged_tr)
model.train(tagged_tr, total_examples=model.corpus_count, epochs = max_epochs) model.train(tagged_tr, total_examples=model.corpus_count, epochs = max_epochs)
......
...@@ -6,5 +6,5 @@ doc2vec_vec_size = 700 ...@@ -6,5 +6,5 @@ doc2vec_vec_size = 700
max_epochs = 10 max_epochs = 10
doc2vec_min_count = 12 doc2vec_min_count = 12
doc2vec_dm = 0 doc2vec_dm = 0
doc2vec_workers = 4 doc2vec_workers = 8
min_word_per_article = 25 min_word_per_article = 25
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment