From 6827bb9569092bba40929bd6c4a441babfc5ef36 Mon Sep 17 00:00:00 2001 From: lmoncla <ludovic.moncla@gmail.com> Date: Mon, 7 Jun 2021 09:29:50 +0200 Subject: [PATCH] Update --- experimentsClassicClassifiers.py | 7 +++++++ requirements.txt | 3 +++ script.txt | 23 ++++++++++------------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/experimentsClassicClassifiers.py b/experimentsClassicClassifiers.py index 39f8c22..e1c15c0 100644 --- a/experimentsClassicClassifiers.py +++ b/experimentsClassicClassifiers.py @@ -13,6 +13,11 @@ from evaluate_model import evaluate_model from sklearn.model_selection import GridSearchCV import configparser +import nltk +nltk.download('stopwords') +nltk.download('punkt') + + parser = argparse.ArgumentParser() parser.add_argument("dataPath", help="Path of the dataframe") @@ -77,6 +82,8 @@ doc2vec_lr = float(config.get('vectorizers','doc2vec_lr')) for columnInput in [columnText, 'firstParagraph']: + print('Process: ' + columnInput) + extractor = feature_extractor(df,columnText, columnClass) #extractor_paragraphe = feature_extractor(df,'paragraphe', columnClass) diff --git a/requirements.txt b/requirements.txt index cb1f87b..ab54835 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ +pandas +matplotlib +seaborn beautifulsoup4 lxml Unidecode diff --git a/script.txt b/script.txt index 97eaef6..bcde25b 100644 --- a/script.txt +++ b/script.txt @@ -1,15 +1,12 @@ -mkdir -p reports/domaine_enccre -mkdir -p reports/ensemble_domaine_enccre -mkdir -p reports/normClass_artfl pip install -r requirements.txt python tmp_preprocess_data.py -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 300 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 50 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 50 800 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 100 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 300 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 50 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 300 500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 300 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 50 2000 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 50 500 +python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 300 1500 +python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 50 1500 +python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 50 800 +python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 100 1500 +python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 300 1500 +python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 50 1500 +python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 300 500 +python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 300 1500 +python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 50 2000 +python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 50 500 -- GitLab