diff --git a/README.md b/README.md index 86a26b9aafa2f11fad2226b141702d3be3509280..34293fd964c82f04e2e8aa328b35d507b7e4939c 100755 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Finding a real estate in a new city is still a challenge. We often arrive in a c - Python, version >=3.8 - [MongoDB](https://www.mongodb.com/), version >=4 for importing the database about neighbourhoods. +- We recommend to run Predihood on Firefox or Chrome (Safari get some issues) ### Installation @@ -28,7 +29,7 @@ This command install dependencies, including [mongiris](https://gitlab.liris.cnr Next, to install the database, run the MongoDB server and execute this command (from the MongoDB's executables directory if needed): ``` -./mongorestore --archive=/path/to/dump-iris.bin +./mongorestore --archive=/path/to/dump-dbinsee.bin ``` where `/path/to/` is the path to the dump file of the IRIS collection (provided with the package mongiris in `mongiris/data/dump-iris.bin`). diff --git a/predihood/classes/Dataset.py b/predihood/classes/Dataset.py index 366e019b3590f7155fe5295538080ee69795c229..f7ef563d1e0f4a40809e309d7f69df0e0a51cf2e 100644 --- a/predihood/classes/Dataset.py +++ b/predihood/classes/Dataset.py @@ -41,7 +41,7 @@ class Dataset: self.Y_test = None if env in ENVIRONMENT_VARIABLES: self.env = env else: self.env = "building_type" - self.train_size, self.test_size = check_train_test_percentages(train_size, test_size) + self.train_size, self.test_size, _ = check_train_test_percentages(train_size, test_size) self.outliers = outliers self.remove_rural = remove_rural @@ -51,11 +51,15 @@ class Dataset: When the type is "unsupervised", split data into X and Y is not relevant (as there is no train/test sets). """ if self.type == "supervised": - if self.outliers: self.remove_outliers() if self.remove_rural: self.remove_rural_iris() self.init_X() self.init_Y() self.train_test() + if self.outliers: + self.remove_outliers() # after train_test() because Isolation Forest needs X_train and X_test + self.init_X() + self.init_Y() + self.train_test() # need to compute train_test to update X and Y after dropping outliers def init_X(self): """ @@ -100,10 +104,9 @@ class Dataset: isolation_forest.fit(self.X_train) predictions = isolation_forest.predict(self.X_test) for i in range(len(predictions)): - if predictions[i] == -1: - code = self.data.loc[i, "CODE"] - log.debug(code, "has been removed since it is an outlier.") - self.data = self.data.drop(i, axis=0) # delete IRIS that are detected as outliers + if predictions[i] == -1: # this IRIS is an outlier according to isolation forest algorithm + # delete IRIS that are detected as outliers, ignore id the iris is not present in the dataset (e.g. because it has been remove as a rural IRIS) + self.data = self.data.drop(i, axis=0, errors="ignore") def remove_rural_iris(self): """ diff --git a/predihood/main.py b/predihood/main.py index 86f70cfde4f23d12f772fbeb61ccfbfb009d0e74..35757523f496a05a7c7c566345068a3c1aeaff0e 100755 --- a/predihood/main.py +++ b/predihood/main.py @@ -17,7 +17,7 @@ from predihood.classes.Data import Data from predihood.classifiers_list import AVAILABLE_CLASSIFIERS from predihood.config import FILE_MANUAL_ASSESSMENT_EXPERTS from predihood.predict import compute_all_accuracies, predict_one_iris -from predihood.utility_functions import signature, get_classifier, set_classifier, add_assessment_to_file +from predihood.utility_functions import signature_of_algorithm, get_classifier, set_classifier, add_assessment_to_file from sklearn.utils._testing import ignore_warnings log = logging.getLogger(__name__) @@ -90,7 +90,11 @@ def get_classifiers(): Returns: A list containing the names of the available classifiers. """ - return {"classifiers": list(AVAILABLE_CLASSIFIERS.keys())} + available_classifiers = list(AVAILABLE_CLASSIFIERS.keys()) + if available_classifiers is not None: + return {"classifiers": available_classifiers} + else: + return {"error": "The list of classifiers can not be retrieved. You might reload the page."} @app.route('/getParameters', methods=["GET"]) @@ -103,10 +107,14 @@ def get_parameters(): """ if 'name' in request.args: name = request.args['name'] - parameters = signature(name) - return parameters + parameters, message = signature_of_algorithm(name) + if parameters != {} and message == "OK": + return parameters + else: + if message == "no_documentation": return "There is no documentation for the selected algorithm, therefore parameters can not be retrieved." + elif message == "no_parameters": return "There was a problem while retrieving algorithm's parameters. You can check the Python's console." else: - return None + return "There is no parameters to get. Be sure to select an algorithm in the list." @app.route('/run', methods=["GET"]) @@ -134,8 +142,8 @@ def run_algorithm(): # 3. run experiment on data to get accuracies for each EV and each list of selected indicators data = Data(normalization="density", filtering=True, add_assessment=add_manual_assessment) data.init_all_in_one() - accuracies = compute_all_accuracies(data, clf, train_size, test_size, remove_outliers, remove_rural) - return {"results": accuracies, "tops_k": predihood.config.TOPS_K} + accuracies, message = compute_all_accuracies(data, clf, train_size, test_size, remove_outliers, remove_rural) + return {"results": accuracies, "tops_k": predihood.config.TOPS_K, "message": message} @app.route('/predict_iris', methods=["GET"]) @@ -152,8 +160,8 @@ def predict_iris(): data = Data(normalization="density", filtering=True) data.init_all_in_one() - predictions = predict_one_iris(iris_code_to_predict, data, clf, 0.8, 0.2, False) # clf - return {"predictions": predictions} + predictions, message = predict_one_iris(iris_code_to_predict, data, clf, 0.8, 0.2, False) # clf + return {"predictions": predictions, "message": message} @app.route('/getIrisPolygon', methods=["GET"]) diff --git a/predihood/predict.py b/predihood/predict.py index dbab57cb097dbd5828de4da9037e6579772f10d3..e86577580ebea326165d483b97fdcaebefb0820d 100644 --- a/predihood/predict.py +++ b/predihood/predict.py @@ -47,7 +47,7 @@ def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=Fal a dictionary of results for each EV and each list of selected indicators """ log.info("... Computing accuracies ...") - train_size, test_size = check_train_test_percentages(train_size, test_size) + train_size, test_size, message = check_train_test_percentages(train_size, test_size) data_not_filtered = Data(normalization="density", filtering=False, add_assessment=data.add_assessment) data_not_filtered.init_all_in_one() @@ -98,7 +98,7 @@ def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=Fal log.info("mean for classifier: %f", mean_classifier) results = OrderedDict(results) log.info(results) - return results + return results, message def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outliers=False): @@ -116,7 +116,7 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier Returns: A dictionary containing predictions for each EV. """ - train_size, test_size = check_train_test_percentages(train_size, test_size) + train_size, test_size, message = check_train_test_percentages(train_size, test_size) lists = retrieve_lists() predictions = {} @@ -139,7 +139,7 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier else: predictions[env] = get_most_frequent(predictions_lst) # get the most frequent value and the number of occurrences print(predictions) # {'building_type': {'most_frequent': 'Towers', 'count_frequent': 7}, 'building_usage': {'most_frequent': 'Housing', 'count_frequent': 4}, ... } - return predictions + return predictions, message if __name__ == '__main__': diff --git a/predihood/static/js/algorithms.js b/predihood/static/js/algorithms.js index ccbb4d2b8a2c25aa49f5fab0415754354d50f425..149f6c065d9a272a06fce4d2d83f31cb4974a3d9 100644 --- a/predihood/static/js/algorithms.js +++ b/predihood/static/js/algorithms.js @@ -58,7 +58,8 @@ $("#testPercentage") // run the classifier with specified parameters and display results in the results section. $("#runBtn").click("on", function () { // $("body").css("cursor", "progress"); - document.body.classList.add('busy-cursor'); + // $("html,body").css("cursor", "progress"); + document.body.style.cursor = 'wait'; $(".wrapperTable input[type='checkbox']:not(:checked)").each(function () { $(this).parent().parent().empty(); // remove tables that are not checked in the interface }); @@ -97,7 +98,7 @@ $("#runBtn").click("on", function () { request_run = $.ajax({ "type": "GET", "url": "/run", - "async": false, + //"async": false, data: { "clf": chosen_clf, "parameters": JSON.stringify(userParameters) @@ -108,6 +109,7 @@ $("#runBtn").click("on", function () { // - the results table, highlighted cells are best means for each EV // - the list of parameters associated to the results // - the mean for this classifier (all EV combined) + if(result["message"] === "KO") { alert("Your train_size or test_size parameter was set badly. The algorithm has been run with default parameters which are 80% for train_size and 20% for test_size.") } let keep = $("<label class='h5'><input type='checkbox' style='margin-right: 1rem;'/>" + chosen_clf + "</label>"); let table = $("<table id='tableToExport'>") .addClass("table table-hover table-responsive") @@ -195,17 +197,15 @@ $("#runBtn").click("on", function () { // append all to HTML $("#resultsDiv").append(containing_table); - // $("body").css("cursor", "default"); - document.body.classList.remove('busy-cursor'); + document.body.style.cursor = 'default'; }, error: function (result, textStatus, errorThrown) { console.log(errorThrown); - alert("something went wrong while training. Please check your parameters<br>" + textStatus); - // $("body").css("cursor", "default"); - document.body.classList.remove('busy-cursor'); + alert("Something went wrong while training. Please check your parameters." + textStatus); + document.body.style.cursor = 'default'; + } }); - return false; // do not reload }); @@ -366,15 +366,19 @@ function getParameters(name) { }, contentType: 'application/json;charset=UTF-8', success: function (result) { - current_parameters = result; // store current parameters (for the chosen classifier) in a global variable - let parameters = result; - console.log(parameters) - if (Object.keys(parameters).length > 0) { - // at least one key - addParameters(parameters); + if(result instanceof String) { + alert(result) } else { - // nothing to append, empty the div (no selection) - $("#tuningSection").css("visibility", "hidden"); + current_parameters = result; // store current parameters (for the chosen classifier) in a global variable + let parameters = result; + console.log(parameters) + if (Object.keys(parameters).length > 0) { + // at least one key + addParameters(parameters); + } else { + // nothing to append, empty the div (no selection) + $("#tuningSection").css("visibility", "hidden"); + } } }, error: function (result, textStatus, errorThrown) { diff --git a/predihood/static/js/carto.js b/predihood/static/js/carto.js index df7e98ae0edf57705b12806ce1370f44e605686f..af46adf57e06e7fbe93feb8764814e71a92afdda 100644 --- a/predihood/static/js/carto.js +++ b/predihood/static/js/carto.js @@ -78,32 +78,37 @@ function displayPopup(e) { // common part of the popup let divInformation = $("<div>"); + let download; + if(preferred_language_carto === "french") { + download = $("<i class='fas fa-download' style='margin-left: 1rem;' title='Exporter cette table comme un fichier Excel.'></i>"); divInformation - .append("CODE IRIS : " + layer.feature.properties.CODE_IRIS).append($("<br>")) + .append("CODE IRIS : " + code_iris).append($("<br>")) .append("IRIS : " + layer.feature.properties.NOM_IRIS).append($("<br>")) .append("COMMUNE : " + layer.feature.properties.NOM_COM).append($("<br>")) let moreInfosLink = $("<a>"); moreInfosLink - .prop("href", "details-iris.html?code_iris="+layer.feature.properties.CODE_IRIS) + .prop("href", "details-iris.html?code_iris="+code_iris) .prop("target", "_blank") .text("Plus de détails") .append($("<br>")); - divInformation.append(moreInfosLink); + divInformation.append(moreInfosLink).append(download); } else { + download = $("<i class='fas fa-download' style='margin-left: 1rem;' title='Export this table as an Excel file.'></i>"); divInformation - .append("IRIS CODE: " + layer.feature.properties.CODE_IRIS).append($("<br>")) + .append("IRIS CODE: " + code_iris).append($("<br>")) .append("IRIS: " + layer.feature.properties.NOM_IRIS).append($("<br>")) - .append("TOWNSHIP: " + layer.feature.properties.NOM_COM).append($("<br>")) + .append("TOWNSHIP: " + layer.feature.properties.NOM_COM).append($("<br>")); let moreInfosLink = $("<a>"); moreInfosLink - .prop("href", "details-iris.html?code_iris="+layer.feature.properties.CODE_IRIS) + .prop("href", "details-iris.html?code_iris="+code_iris) .prop("target", "_blank") .text("More details") .append($("<br>")); - divInformation.append(moreInfosLink); + divInformation.append(moreInfosLink).append(download); } + if (selected_algorithm !== "undefined" && selected_algorithm !== undefined) { predictions = predict(code_iris, selected_algorithm) console.log(predictions) @@ -119,11 +124,36 @@ function displayPopup(e) { } previously_selected_algorithm = selected_algorithm; - let divPredictions = $("<div>").prop("id", "divPredictions") - if(predictions !== undefined) { - for(let key in predictions) { divPredictions.append(capitalizeFirstLetter(key.split("_").join(" "))+': ' + predictions[key]["most_frequent"] + " (" + predictions[key]["count_frequent"] + "/7)").append($('<br>')); } + let divPredictions = $("<div>").prop("id", "divPredictions"); + let tablePredictions = $("<table>").prop("id", "tablePredictions"); + tablePredictions.append("<th><td>Environment value</td><td>Prediction</td><td>Frequency</td></th>") + let row, colEnv, colValue, colFrequency; + if (predictions !== undefined) { + for (let key in predictions) { + divPredictions.append( + capitalizeFirstLetter(key.split("_").join(" ")) + ': ' + + predictions[key]["most_frequent"] + + " (" + predictions[key]["count_frequent"] + "/7)" + ).append($('<br>')); + row = $("<tr>"); + colEnv = $("<td>").html(capitalizeFirstLetter(key.split("_").join(" "))); + colValue = $("<td>").html(predictions[key]["most_frequent"]); + colFrequency = $("<td>").html(predictions[key]["count_frequent"] + "/7"); + row.append(colEnv, colValue, colFrequency) + tablePredictions.append(row); + } + console.log(tablePredictions); + download.on("click", function (e) { + e.preventDefault(); + tablePredictions.table2excel({ + type: 'xls', + filename: 'predictions'+code_iris+'.xls', + preserveColors: true + }); + }); } + let messageTooltip = divInformation[0].outerHTML + selectAlgorithm[0].outerHTML + divPredictions[0].outerHTML; console.log(messageTooltip) layer.bindPopup(messageTooltip) diff --git a/predihood/static/js/prediction.js b/predihood/static/js/prediction.js index 882ea7468d2f14f37c9a989f4410e65c1b4a9c65..786798b33b346179c1ab8757e93a42b77ee18524 100644 --- a/predihood/static/js/prediction.js +++ b/predihood/static/js/prediction.js @@ -17,13 +17,15 @@ function predict(iris_code, algorithm_name) { "async": false, contentType: 'application/json;charset=UTF-8', success: function(result) { - console.log(result) - console.log(result['predictions']) + console.log(result); + console.log(result['predictions']); predictions = result['predictions']; + if(result['message'] === "KO") { alert("Your train_size or test_size parameter was set badly. The algorithm has been run with default parameters which are 80% for train_size and 20% for test_size."); } $("body").css("cursor", "default"); }, error: function(result, textStatus, errorThrown) { console.log(errorThrown); + alert("There is an issue to predict environment of this IRIS. You can check the console to have more details. " + result + textStatus + errorThrown); $("body").css("cursor", "default"); } }); diff --git a/predihood/tests.py b/predihood/tests.py index d445c260b8588caf9c8ed0ae29b84f39f6cfb8ad..483616ba616669cbf2a3025c9bfa75c863b67afd 100644 --- a/predihood/tests.py +++ b/predihood/tests.py @@ -10,7 +10,7 @@ import unittest from predihood.config import FOLDER_DATASETS, ENVIRONMENT_VALUES from predihood.utility_functions import check_train_test_percentages, intersection, union, similarity, \ get_most_frequent, address_to_code, address_to_city, indicator_full_to_short_label, \ - indicator_short_to_full_label, get_classifier, set_classifier, signature, add_assessment_to_file + indicator_short_to_full_label, get_classifier, set_classifier, signature_of_algorithm, add_assessment_to_file from sklearn.neighbors import KNeighborsClassifier @@ -69,7 +69,7 @@ class TestCase(unittest.TestCase): def test_signature(self): # test if the signature function is able to pick correct parameters classifier_name = "KNeighbors Classifier" - classifier_signature = signature(classifier_name) + classifier_signature = signature_of_algorithm(classifier_name) assert "n_neighbors" in classifier_signature assert classifier_signature["n_neighbors"]["types"] == ["int"] assert classifier_signature["n_neighbors"]["default"] == "5" diff --git a/predihood/utility_functions.py b/predihood/utility_functions.py index 1eb5b1ab82ef091440ba2349b1b586eda3493034..249563157597c7f624364b1ad79ef04ed6faf19f 100644 --- a/predihood/utility_functions.py +++ b/predihood/utility_functions.py @@ -2,6 +2,8 @@ import ast import inspect import json import logging +import sys + import numpy as np import pandas as pd import re @@ -9,6 +11,7 @@ import requests import stringdist from area import area +from inspect import signature from predihood import model from predihood.classifiers_list import AVAILABLE_CLASSIFIERS from predihood.config import TRAIN_SIZE, TEST_SIZE, ENVIRONMENT_VARIABLES, TRANSLATION, OLD_PREFIX, NEW_PREFIX, FILE_MANUAL_ASSESSMENT @@ -203,7 +206,7 @@ def set_classifier(classifier, parameters): return classifier -def signature(chosen_algorithm): +def signature_of_algorithm(chosen_algorithm): """ Get the signature of an algorithm, i.e. its parameters, the default values and the type of each parameter. The documentation of the algorithm must be in NumPy style. @@ -216,11 +219,26 @@ def signature(chosen_algorithm): - the default value - a description of the parameter (e.g. "The train_size parameter aims at tuning the size of the sample during the learning step.") """ - # special case for no selection - if chosen_algorithm == "Algorithm": return json.dumps({}) + if chosen_algorithm == "Algorithm": return json.dumps({}), "no_selection" # special case for no selection + + # getting parameters and default values + parameters = {} + model = get_classifier(chosen_algorithm) + if sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor < 8): + params = inspect.getfullargspec(model.__init__).args[1:] # get parameter' names -- [1:] to remove self parameter + defaults = inspect.getfullargspec(model.__init__).defaults # get default values + else: + # getfullargspec is deprecated since Python 3.8, need to use inspect?signature instead + sig = inspect.signature(model.__init__) + params, defaults = [], [] + for param in sig.parameters.values(): + params.append(param.name) + defaults.append(param.default) + if params is None or defaults is None: return json.dumps({}), "no_parameters" + assert len(params) == len(defaults) + + # getting the definition of parameters try: - # model = eval(_chosen_algorithm) # never use eval on untrusted strings - model = get_classifier(chosen_algorithm) doc = model.__doc__ param_section = "Parameters" dashes = "-" * len(param_section) # ------- @@ -229,11 +247,7 @@ def signature(chosen_algorithm): # sub_doc is the param section of the docs (i.e. without attributes and some text) sub_doc = doc[doc.find(param_section) + len(param_section) + number_spaces + len(dashes) + len("\n"):doc.find(attribute_section)] except: - raise Exception("This algorithm does not have a documentation...") - params = inspect.getfullargspec(model.__init__).args[1:] # get parameter' names -- [1:] to remove self parameter - defaults = inspect.getfullargspec(model.__init__).defaults # get default values - assert len(params) == len(defaults) - parameters = {} + return json.dumps({}), "no_documentation" for i in range(len(params)): param_name = str(params[i]) + " : " index_param = sub_doc.find(param_name) @@ -284,7 +298,7 @@ def signature(chosen_algorithm): if len(types) == 0: types.append(type_of_default) # fill missing types types[:] = [x for x in types if "None" not in x and "NoneType" not in x] # remove None type parameters[param_name[:-3]] = {"types": types, "default": default, "description": first_sentence} # -3 to remove " : " - return parameters + return parameters, "OK" def check_train_test_percentages(train_size, test_size): @@ -299,17 +313,21 @@ def check_train_test_percentages(train_size, test_size): the train and test sizes """ if 0 < train_size < 1 and 0 < test_size < 1 and train_size + test_size == 1: - return train_size, test_size # default case + message = "OK" # default case - everything is correct elif 0 < train_size < 1 and 0 < test_size < 1: + message = "KO" # train_size or test_size is not correct so we set default parameters train_size = TRAIN_SIZE # 0.8 test_size = TEST_SIZE # 0.2 if 1 <= train_size < 100 and 1 <= test_size < 100 and train_size + test_size == 100: - return train_size / 100, test_size / 100 # default case + message = "OK" # default case - everything is correct + train_size = train_size/100 + test_size = test_size/100 elif 1 <= train_size < 100 and 1 <= test_size < 100: + message = "KO" # train_size or test_size is not correct so we set default parameters train_size = TRAIN_SIZE # 0.8 test_size = TEST_SIZE # 0.2 - return train_size, test_size + return train_size, test_size, message # 2. list functions