diff --git a/experimentsClassicClassifiers.py b/experimentsClassicClassifiers.py index 39f8c220fd99466a3c13d6e9eb9d51fe2ec3fccb..e1c15c0284363725d61d819616a70d3f45ffece4 100644 --- a/experimentsClassicClassifiers.py +++ b/experimentsClassicClassifiers.py @@ -13,6 +13,11 @@ from evaluate_model import evaluate_model from sklearn.model_selection import GridSearchCV import configparser +import nltk +nltk.download('stopwords') +nltk.download('punkt') + + parser = argparse.ArgumentParser() parser.add_argument("dataPath", help="Path of the dataframe") @@ -77,6 +82,8 @@ doc2vec_lr = float(config.get('vectorizers','doc2vec_lr')) for columnInput in [columnText, 'firstParagraph']: + print('Process: ' + columnInput) + extractor = feature_extractor(df,columnText, columnClass) #extractor_paragraphe = feature_extractor(df,'paragraphe', columnClass) diff --git a/requirements.txt b/requirements.txt index cb1f87bd29621957ff2ced2a9f3b9f000f6e0388..ab54835a61f9acc00afee644c0bfd94d19a4add5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ +pandas +matplotlib +seaborn beautifulsoup4 lxml Unidecode diff --git a/script.txt b/script.txt index 97eaef6d479680661b718d372831a786a679ae2a..bcde25b89f14f06264647817e97708e92a1a7a41 100644 --- a/script.txt +++ b/script.txt @@ -1,15 +1,12 @@ -mkdir -p reports/domaine_enccre -mkdir -p reports/ensemble_domaine_enccre -mkdir -p reports/normClass_artfl pip install -r requirements.txt python tmp_preprocess_data.py -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 300 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 50 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 50 800 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass ensemble_domaine_enccre 100 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 300 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 50 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass domaine_enccre 300 500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 300 1500 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 50 2000 -python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv contentWithoutClass normClass_artfl 50 500 +python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 300 1500 +python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 50 1500 +python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 50 800 +python experimentsClassicClassifiers.py data/dataframe_with_ensemble_domaine_enccre.csv contentWithoutClass ensemble_domaine_enccre 100 1500 +python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 300 1500 +python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 50 1500 +python experimentsClassicClassifiers.py data/dataframe_with_domaine_enccre.csv contentWithoutClass domaine_enccre 300 500 +python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 300 1500 +python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 50 2000 +python experimentsClassicClassifiers.py data/dataframe_with_normClass.csv contentWithoutClass normClass 50 500