From 6827bb9569092bba40929bd6c4a441babfc5ef36 Mon Sep 17 00:00:00 2001
From: lmoncla <ludovic.moncla@gmail.com>
Date: Mon, 7 Jun 2021 09:29:50 +0200
Subject: [PATCH] Update

---
 experimentsClassicClassifiers.py |  7 +++++++
 requirements.txt                 |  3 +++
 script.txt                       | 23 ++++++++++-------------
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/experimentsClassicClassifiers.py b/experimentsClassicClassifiers.py
index 39f8c22..e1c15c0 100644
--- a/experimentsClassicClassifiers.py
+++ b/experimentsClassicClassifiers.py
@@ -13,6 +13,11 @@ from evaluate_model import evaluate_model
 from sklearn.model_selection import GridSearchCV
 import configparser
 
+import nltk
+nltk.download('stopwords')
+nltk.download('punkt')
+
+
 
 parser = argparse.ArgumentParser()
 parser.add_argument("dataPath", help="Path of the dataframe")
@@ -77,6 +82,8 @@ doc2vec_lr = float(config.get('vectorizers','doc2vec_lr'))
 
 for columnInput in [columnText, 'firstParagraph']:
 
+    print('Process: ' + columnInput)
+
     extractor = feature_extractor(df,columnText, columnClass)
     #extractor_paragraphe = feature_extractor(df,'paragraphe', columnClass)
 
diff --git a/requirements.txt b/requirements.txt
index cb1f87b..ab54835 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,6 @@
+pandas
+matplotlib
+seaborn
 beautifulsoup4
 lxml
 Unidecode
diff --git a/script.txt b/script.txt
index 97eaef6..bcde25b 100644
--- a/script.txt
+++ b/script.txt
@@ -1,15 +1,12 @@
-mkdir -p reports/domaine_enccre
-mkdir -p reports/ensemble_domaine_enccre
-mkdir -p reports/normClass_artfl
 pip install -r requirements.txt
 python tmp_preprocess_data.py 
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 300 1500
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 50 1500 
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 50 800     
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 100 1500   
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 300 1500
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 50 1500
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 300 500            
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 300 1500
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 50 2000
-python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 50 500
+python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 300 1500
+python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 50 1500 
+python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 50 800     
+python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 100 1500   
+python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 300 1500
+python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 50 1500
+python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 300 500            
+python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 300 1500
+python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 50 2000
+python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 50 500
-- 
GitLab