Skip to content
Snippets Groups Projects
Commit 4d425452 authored by Khalleud's avatar Khalleud
Browse files

[UPDATE] main and script

parent b772854d
No related branches found
No related tags found
1 merge request!1Branch v1
......@@ -43,14 +43,13 @@ if not os.path.exists(os.path.join('reports', columnClass, dir_name_report)):
preprocessor = Preprocessor()
df_original = pd.read_csv(dataPath, sep="\t")
df_original = pd.read_csv(dataPath)
df = df_original[[columnClass,columnText]].copy()
############ shall we remove articles with less n tokens ####### remove markers
preprocessor.remove_null_rows(df, columnText)
preprocessor.remove_null_rows(df, columnClass)
df = split_class(df, columnClass)
#df = split_class(df, columnClass)
df = remove_weak_classes(df, columnClass, minOfInstancePerClass )
df = resample_classes(df, columnClass, maxOfInstancePerClass)
......@@ -211,3 +210,4 @@ for feature_technique_name, features in features_techniques_paragraphe:
sys.stdout = sys.stdout # Reset the standard output to its original value
sys.stdout = sys.__stdout__
pip install -r requierments.txxt
python tmp_preprocess_data.py
python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 300 1500
python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 1500
python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 800
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment