Skip to content
Snippets Groups Projects
Commit 4d425452 authored by Khalleud's avatar Khalleud
Browse files

[UPDATE] main and script

parent b772854d
No related branches found
No related tags found
1 merge request!1Branch v1
...@@ -43,14 +43,13 @@ if not os.path.exists(os.path.join('reports', columnClass, dir_name_report)): ...@@ -43,14 +43,13 @@ if not os.path.exists(os.path.join('reports', columnClass, dir_name_report)):
preprocessor = Preprocessor() preprocessor = Preprocessor()
df_original = pd.read_csv(dataPath, sep="\t") df_original = pd.read_csv(dataPath)
df = df_original[[columnClass,columnText]].copy() df = df_original[[columnClass,columnText]].copy()
############ shall we remove articles with less n tokens ####### remove markers
preprocessor.remove_null_rows(df, columnText) preprocessor.remove_null_rows(df, columnText)
preprocessor.remove_null_rows(df, columnClass) preprocessor.remove_null_rows(df, columnClass)
df = split_class(df, columnClass) #df = split_class(df, columnClass)
df = remove_weak_classes(df, columnClass, minOfInstancePerClass ) df = remove_weak_classes(df, columnClass, minOfInstancePerClass )
df = resample_classes(df, columnClass, maxOfInstancePerClass) df = resample_classes(df, columnClass, maxOfInstancePerClass)
...@@ -211,3 +210,4 @@ for feature_technique_name, features in features_techniques_paragraphe: ...@@ -211,3 +210,4 @@ for feature_technique_name, features in features_techniques_paragraphe:
sys.stdout = sys.stdout # Reset the standard output to its original value sys.stdout = sys.stdout # Reset the standard output to its original value
sys.stdout = sys.__stdout__ sys.stdout = sys.__stdout__
pip install -r requierments.txxt
python tmp_preprocess_data.py
python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 300 1500 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 300 1500
python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 1500 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 1500
python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 800 python experimentsClassicClassifiers.py data/EDdA_dataframe_withContent.tsv content ensemble_domaine_enccre 50 800
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment