From 4d425452a9b0fe85e1e496fd6e572d5acbf192a7 Mon Sep 17 00:00:00 2001 From: Khalleud <ledk14@gmail.com> Date: Thu, 3 Jun 2021 22:30:34 +0200 Subject: [PATCH] [UPDATE] main and script --- projet/experimentsClassicClassifiers.py | 6 +++--- projet/script.txt | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/projet/experimentsClassicClassifiers.py b/projet/experimentsClassicClassifiers.py index c65d3dd..be4fd36 100644 --- a/projet/experimentsClassicClassifiers.py +++ b/projet/experimentsClassicClassifiers.py @@ -43,14 +43,13 @@ if not os.path.exists(os.path.join('reports', columnClass, dir_name_report)): preprocessor = Preprocessor() -df_original = pd.read_csv(dataPath, sep="\t") +df_original = pd.read_csv(dataPath) df = df_original[[columnClass,columnText]].copy() -############ shall we remove articles with less n tokens ####### remove markers preprocessor.remove_null_rows(df, columnText) preprocessor.remove_null_rows(df, columnClass) -df = split_class(df, columnClass) +#df = split_class(df, columnClass) df = remove_weak_classes(df, columnClass, minOfInstancePerClass ) df = resample_classes(df, columnClass, maxOfInstancePerClass) @@ -211,3 +210,4 @@ for feature_technique_name, features in features_techniques_paragraphe: sys.stdout = sys.stdout # Reset the standard output to its original value sys.stdout = sys.__stdout__ + diff --git a/projet/script.txt b/projet/script.txt index ea24631..b5aa44e 100644 --- a/projet/script.txt +++ b/projet/script.txt @@ -1,3 +1,5 @@ +pip install -r requierments.txxt +python tmp_preprocess_data.py python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 300 1500 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 1500 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 800 -- GitLab