From 4d425452a9b0fe85e1e496fd6e572d5acbf192a7 Mon Sep 17 00:00:00 2001
From: Khalleud <ledk14@gmail.com>
Date: Thu, 3 Jun 2021 22:30:34 +0200
Subject: [PATCH] [UPDATE] main and script

---
 projet/experimentsClassicClassifiers.py | 6 +++---
 projet/script.txt                       | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/projet/experimentsClassicClassifiers.py b/projet/experimentsClassicClassifiers.py
index c65d3dd..be4fd36 100644
--- a/projet/experimentsClassicClassifiers.py
+++ b/projet/experimentsClassicClassifiers.py
@@ -43,14 +43,13 @@ if not os.path.exists(os.path.join('reports',  columnClass, dir_name_report)):
 preprocessor = Preprocessor()
 
 
-df_original = pd.read_csv(dataPath, sep="\t")
+df_original = pd.read_csv(dataPath)
 
 
 df = df_original[[columnClass,columnText]].copy()
-############ shall we remove articles with less n tokens ####### remove markers
 preprocessor.remove_null_rows(df, columnText)
 preprocessor.remove_null_rows(df, columnClass)
-df = split_class(df, columnClass)
+#df = split_class(df, columnClass)
 df = remove_weak_classes(df, columnClass, minOfInstancePerClass )
 df = resample_classes(df, columnClass, maxOfInstancePerClass)
 
@@ -211,3 +210,4 @@ for feature_technique_name, features in features_techniques_paragraphe:
             sys.stdout = sys.stdout # Reset the standard output to its original value
 
             sys.stdout = sys.__stdout__
+
diff --git a/projet/script.txt b/projet/script.txt
index ea24631..b5aa44e 100644
--- a/projet/script.txt
+++ b/projet/script.txt
@@ -1,3 +1,5 @@
+pip install -r requierments.txxt
+python tmp_preprocess_data.py 
 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 300 1500
 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 1500 
 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 800     
-- 
GitLab