diff --git a/predihood/classes/Data.py b/predihood/classes/Data.py index b42b89231b06a3de8c7f2da102eac62854494245..4057c473cc4429ca8c11ca1be48c2892056feed9 100644 --- a/predihood/classes/Data.py +++ b/predihood/classes/Data.py @@ -17,7 +17,7 @@ warnings.filterwarnings("ignore", category=RuntimeWarning) class Data: - def __init__(self, normalization="density", filtering=True): + def __init__(self, normalization="density", filtering=True, add_assessment=False): """ Constructor of the Data class. Initialize attributes. Args: @@ -31,6 +31,7 @@ class Data: self.indicators = None self.normalization = normalization self.filtering = filtering + self.add_assessment = add_assessment # retrieve indicators self.get_indicators() diff --git a/predihood/classes/Dataset.py b/predihood/classes/Dataset.py index 9e3d24bd7264fda657ac7848b9dae3ce2601369d..366e019b3590f7155fe5295538080ee69795c229 100644 --- a/predihood/classes/Dataset.py +++ b/predihood/classes/Dataset.py @@ -13,7 +13,7 @@ class Dataset: """ This class represents assessed IRIS with their indicators ans EV values. There are options, such as removing outliers or rural IRIS. """ - def __init__(self, data, env, _type, selected_indicators=None, indicators_to_remove=None, train_size=TRAIN_SIZE, test_size=TEST_SIZE, outliers=False): + def __init__(self, data, env, _type, selected_indicators=None, indicators_to_remove=None, train_size=TRAIN_SIZE, test_size=TEST_SIZE, outliers=False, remove_rural=False): """ Constructor of the Dataset class. Initialize attributes. @@ -43,6 +43,7 @@ class Dataset: else: self.env = "building_type" self.train_size, self.test_size = check_train_test_percentages(train_size, test_size) self.outliers = outliers + self.remove_rural = remove_rural def init_all_in_one(self): """ @@ -50,8 +51,8 @@ class Dataset: When the type is "unsupervised", split data into X and Y is not relevant (as there is no train/test sets). """ if self.type == "supervised": - if self.outliers: - self.remove_outliers() + if self.outliers: self.remove_outliers() + if self.remove_rural: self.remove_rural_iris() self.init_X() self.init_Y() self.train_test() diff --git a/predihood/predict.py b/predihood/predict.py index d0c2e278bb567294f81101430daf590fdb11bf96..c97e79659985fcf5639fcfa8f6901161099e0a50 100644 --- a/predihood/predict.py +++ b/predihood/predict.py @@ -61,9 +61,9 @@ def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=Fal results[env] = OrderedDict() log.debug("--- %s ---", env) - dataset = Dataset(data_not_filtered, env, selected_indicators=data_not_filtered.indicators, train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised') + dataset = Dataset(data_not_filtered, env, selected_indicators=data_not_filtered.indicators, train_size=train_size, test_size=test_size, outliers=remove_outliers, remove_rural=remove_rural, _type='supervised') dataset.init_all_in_one() - if remove_rural: dataset.remove_rural_iris() + # if remove_rural: dataset.remove_rural_iris() mean_classifier = 0.0 algo = MethodPrediction(name="", dataset=dataset, classifier=clf) @@ -78,9 +78,9 @@ def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=Fal log.info("accuracy for %s without filtering: %f", env, algo.accuracy) for top_k, lst in lists.items(): - dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised') + dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, remove_rural=remove_rural, _type='supervised') dataset.init_all_in_one() - if remove_rural: dataset.remove_rural_iris() + # if remove_rural: dataset.remove_rural_iris() algo2 = MethodPrediction(name='', dataset=dataset, classifier=clf) algo2.fit() algo2.compute_performance()