diff --git a/projet/experimentsClassicClassifiers.py b/projet/experimentsClassicClassifiers.py index c65d3dd534606a17b16919694315d4cffa8552ef..be4fd36b4a88f38aa353b51476b8f5003e45feef 100644 --- a/projet/experimentsClassicClassifiers.py +++ b/projet/experimentsClassicClassifiers.py @@ -43,14 +43,13 @@ if not os.path.exists(os.path.join('reports', columnClass, dir_name_report)): preprocessor = Preprocessor() -df_original = pd.read_csv(dataPath, sep="\t") +df_original = pd.read_csv(dataPath) df = df_original[[columnClass,columnText]].copy() -############ shall we remove articles with less n tokens ####### remove markers preprocessor.remove_null_rows(df, columnText) preprocessor.remove_null_rows(df, columnClass) -df = split_class(df, columnClass) +#df = split_class(df, columnClass) df = remove_weak_classes(df, columnClass, minOfInstancePerClass ) df = resample_classes(df, columnClass, maxOfInstancePerClass) @@ -211,3 +210,4 @@ for feature_technique_name, features in features_techniques_paragraphe: sys.stdout = sys.stdout # Reset the standard output to its original value sys.stdout = sys.__stdout__ + diff --git a/projet/script.txt b/projet/script.txt index ea246316d6d8ab75ef31491e2ccdd2c743085be9..b5aa44e65c6480650b7b4c3e25b404592bb2e079 100644 --- a/projet/script.txt +++ b/projet/script.txt @@ -1,3 +1,5 @@ +pip install -r requierments.txxt +python tmp_preprocess_data.py python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 300 1500 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 1500 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 800