Skip to content
Snippets Groups Projects
Commit 2da36af2 authored by Nelly Barret's avatar Nelly Barret
Browse files

[M] bug fixed: signature, loading sign, outliers+rural

parent 446f0fcc
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,7 @@ Finding a real estate in a new city is still a challenge. We often arrive in a c
- Python, version >=3.8
- [MongoDB](https://www.mongodb.com/), version >=4 for importing the database about neighbourhoods.
- We recommend to run Predihood on Firefox or Chrome (Safari get some issues)
### Installation
......@@ -28,7 +29,7 @@ This command install dependencies, including [mongiris](https://gitlab.liris.cnr
Next, to install the database, run the MongoDB server and execute this command (from the MongoDB's executables directory if needed):
```
./mongorestore --archive=/path/to/dump-iris.bin
./mongorestore --archive=/path/to/dump-dbinsee.bin
```
where `/path/to/` is the path to the dump file of the IRIS collection (provided with the package mongiris in `mongiris/data/dump-iris.bin`).
......
......@@ -41,7 +41,7 @@ class Dataset:
self.Y_test = None
if env in ENVIRONMENT_VARIABLES: self.env = env
else: self.env = "building_type"
self.train_size, self.test_size = check_train_test_percentages(train_size, test_size)
self.train_size, self.test_size, _ = check_train_test_percentages(train_size, test_size)
self.outliers = outliers
self.remove_rural = remove_rural
......@@ -51,11 +51,15 @@ class Dataset:
When the type is "unsupervised", split data into X and Y is not relevant (as there is no train/test sets).
"""
if self.type == "supervised":
if self.outliers: self.remove_outliers()
if self.remove_rural: self.remove_rural_iris()
self.init_X()
self.init_Y()
self.train_test()
if self.outliers:
self.remove_outliers() # after train_test() because Isolation Forest needs X_train and X_test
self.init_X()
self.init_Y()
self.train_test() # need to compute train_test to update X and Y after dropping outliers
def init_X(self):
"""
......@@ -100,10 +104,9 @@ class Dataset:
isolation_forest.fit(self.X_train)
predictions = isolation_forest.predict(self.X_test)
for i in range(len(predictions)):
if predictions[i] == -1:
code = self.data.loc[i, "CODE"]
log.debug(code, "has been removed since it is an outlier.")
self.data = self.data.drop(i, axis=0) # delete IRIS that are detected as outliers
if predictions[i] == -1: # this IRIS is an outlier according to isolation forest algorithm
# delete IRIS that are detected as outliers, ignore id the iris is not present in the dataset (e.g. because it has been remove as a rural IRIS)
self.data = self.data.drop(i, axis=0, errors="ignore")
def remove_rural_iris(self):
"""
......
......@@ -17,7 +17,7 @@ from predihood.classes.Data import Data
from predihood.classifiers_list import AVAILABLE_CLASSIFIERS
from predihood.config import FILE_MANUAL_ASSESSMENT_EXPERTS
from predihood.predict import compute_all_accuracies, predict_one_iris
from predihood.utility_functions import signature, get_classifier, set_classifier, add_assessment_to_file
from predihood.utility_functions import signature_of_algorithm, get_classifier, set_classifier, add_assessment_to_file
from sklearn.utils._testing import ignore_warnings
log = logging.getLogger(__name__)
......@@ -90,7 +90,11 @@ def get_classifiers():
Returns:
A list containing the names of the available classifiers.
"""
return {"classifiers": list(AVAILABLE_CLASSIFIERS.keys())}
available_classifiers = list(AVAILABLE_CLASSIFIERS.keys())
if available_classifiers is not None:
return {"classifiers": available_classifiers}
else:
return {"error": "The list of classifiers can not be retrieved. You might reload the page."}
@app.route('/getParameters', methods=["GET"])
......@@ -103,10 +107,14 @@ def get_parameters():
"""
if 'name' in request.args:
name = request.args['name']
parameters = signature(name)
return parameters
parameters, message = signature_of_algorithm(name)
if parameters != {} and message == "OK":
return parameters
else:
if message == "no_documentation": return "There is no documentation for the selected algorithm, therefore parameters can not be retrieved."
elif message == "no_parameters": return "There was a problem while retrieving algorithm's parameters. You can check the Python's console."
else:
return None
return "There is no parameters to get. Be sure to select an algorithm in the list."
@app.route('/run', methods=["GET"])
......@@ -134,8 +142,8 @@ def run_algorithm():
# 3. run experiment on data to get accuracies for each EV and each list of selected indicators
data = Data(normalization="density", filtering=True, add_assessment=add_manual_assessment)
data.init_all_in_one()
accuracies = compute_all_accuracies(data, clf, train_size, test_size, remove_outliers, remove_rural)
return {"results": accuracies, "tops_k": predihood.config.TOPS_K}
accuracies, message = compute_all_accuracies(data, clf, train_size, test_size, remove_outliers, remove_rural)
return {"results": accuracies, "tops_k": predihood.config.TOPS_K, "message": message}
@app.route('/predict_iris', methods=["GET"])
......@@ -152,8 +160,8 @@ def predict_iris():
data = Data(normalization="density", filtering=True)
data.init_all_in_one()
predictions = predict_one_iris(iris_code_to_predict, data, clf, 0.8, 0.2, False) # clf
return {"predictions": predictions}
predictions, message = predict_one_iris(iris_code_to_predict, data, clf, 0.8, 0.2, False) # clf
return {"predictions": predictions, "message": message}
@app.route('/getIrisPolygon', methods=["GET"])
......
......@@ -47,7 +47,7 @@ def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=Fal
a dictionary of results for each EV and each list of selected indicators
"""
log.info("... Computing accuracies ...")
train_size, test_size = check_train_test_percentages(train_size, test_size)
train_size, test_size, message = check_train_test_percentages(train_size, test_size)
data_not_filtered = Data(normalization="density", filtering=False, add_assessment=data.add_assessment)
data_not_filtered.init_all_in_one()
......@@ -98,7 +98,7 @@ def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=Fal
log.info("mean for classifier: %f", mean_classifier)
results = OrderedDict(results)
log.info(results)
return results
return results, message
def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outliers=False):
......@@ -116,7 +116,7 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier
Returns:
A dictionary containing predictions for each EV.
"""
train_size, test_size = check_train_test_percentages(train_size, test_size)
train_size, test_size, message = check_train_test_percentages(train_size, test_size)
lists = retrieve_lists()
predictions = {}
......@@ -139,7 +139,7 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier
else:
predictions[env] = get_most_frequent(predictions_lst) # get the most frequent value and the number of occurrences
print(predictions) # {'building_type': {'most_frequent': 'Towers', 'count_frequent': 7}, 'building_usage': {'most_frequent': 'Housing', 'count_frequent': 4}, ... }
return predictions
return predictions, message
if __name__ == '__main__':
......
......@@ -58,7 +58,8 @@ $("#testPercentage")
// run the classifier with specified parameters and display results in the results section.
$("#runBtn").click("on", function () {
// $("body").css("cursor", "progress");
document.body.classList.add('busy-cursor');
// $("html,body").css("cursor", "progress");
document.body.style.cursor = 'wait';
$(".wrapperTable input[type='checkbox']:not(:checked)").each(function () {
$(this).parent().parent().empty(); // remove tables that are not checked in the interface
});
......@@ -97,7 +98,7 @@ $("#runBtn").click("on", function () {
request_run = $.ajax({
"type": "GET",
"url": "/run",
"async": false,
//"async": false,
data: {
"clf": chosen_clf,
"parameters": JSON.stringify(userParameters)
......@@ -108,6 +109,7 @@ $("#runBtn").click("on", function () {
// - the results table, highlighted cells are best means for each EV
// - the list of parameters associated to the results
// - the mean for this classifier (all EV combined)
if(result["message"] === "KO") { alert("Your train_size or test_size parameter was set badly. The algorithm has been run with default parameters which are 80% for train_size and 20% for test_size.") }
let keep = $("<label class='h5'><input type='checkbox' style='margin-right: 1rem;'/>" + chosen_clf + "</label>");
let table = $("<table id='tableToExport'>")
.addClass("table table-hover table-responsive")
......@@ -195,17 +197,15 @@ $("#runBtn").click("on", function () {
// append all to HTML
$("#resultsDiv").append(containing_table);
// $("body").css("cursor", "default");
document.body.classList.remove('busy-cursor');
document.body.style.cursor = 'default';
},
error: function (result, textStatus, errorThrown) {
console.log(errorThrown);
alert("something went wrong while training. Please check your parameters<br>" + textStatus);
// $("body").css("cursor", "default");
document.body.classList.remove('busy-cursor');
alert("Something went wrong while training. Please check your parameters." + textStatus);
document.body.style.cursor = 'default';
}
});
return false; // do not reload
});
......@@ -366,15 +366,19 @@ function getParameters(name) {
},
contentType: 'application/json;charset=UTF-8',
success: function (result) {
current_parameters = result; // store current parameters (for the chosen classifier) in a global variable
let parameters = result;
console.log(parameters)
if (Object.keys(parameters).length > 0) {
// at least one key
addParameters(parameters);
if(result instanceof String) {
alert(result)
} else {
// nothing to append, empty the div (no selection)
$("#tuningSection").css("visibility", "hidden");
current_parameters = result; // store current parameters (for the chosen classifier) in a global variable
let parameters = result;
console.log(parameters)
if (Object.keys(parameters).length > 0) {
// at least one key
addParameters(parameters);
} else {
// nothing to append, empty the div (no selection)
$("#tuningSection").css("visibility", "hidden");
}
}
},
error: function (result, textStatus, errorThrown) {
......
......@@ -78,32 +78,37 @@ function displayPopup(e) {
// common part of the popup
let divInformation = $("<div>");
let download;
if(preferred_language_carto === "french") {
download = $("<i class='fas fa-download' style='margin-left: 1rem;' title='Exporter cette table comme un fichier Excel.'></i>");
divInformation
.append("CODE IRIS : " + layer.feature.properties.CODE_IRIS).append($("<br>"))
.append("CODE IRIS : " + code_iris).append($("<br>"))
.append("IRIS : " + layer.feature.properties.NOM_IRIS).append($("<br>"))
.append("COMMUNE : " + layer.feature.properties.NOM_COM).append($("<br>"))
let moreInfosLink = $("<a>");
moreInfosLink
.prop("href", "details-iris.html?code_iris="+layer.feature.properties.CODE_IRIS)
.prop("href", "details-iris.html?code_iris="+code_iris)
.prop("target", "_blank")
.text("Plus de détails")
.append($("<br>"));
divInformation.append(moreInfosLink);
divInformation.append(moreInfosLink).append(download);
} else {
download = $("<i class='fas fa-download' style='margin-left: 1rem;' title='Export this table as an Excel file.'></i>");
divInformation
.append("IRIS CODE: " + layer.feature.properties.CODE_IRIS).append($("<br>"))
.append("IRIS CODE: " + code_iris).append($("<br>"))
.append("IRIS: " + layer.feature.properties.NOM_IRIS).append($("<br>"))
.append("TOWNSHIP: " + layer.feature.properties.NOM_COM).append($("<br>"))
.append("TOWNSHIP: " + layer.feature.properties.NOM_COM).append($("<br>"));
let moreInfosLink = $("<a>");
moreInfosLink
.prop("href", "details-iris.html?code_iris="+layer.feature.properties.CODE_IRIS)
.prop("href", "details-iris.html?code_iris="+code_iris)
.prop("target", "_blank")
.text("More details")
.append($("<br>"));
divInformation.append(moreInfosLink);
divInformation.append(moreInfosLink).append(download);
}
if (selected_algorithm !== "undefined" && selected_algorithm !== undefined) {
predictions = predict(code_iris, selected_algorithm)
console.log(predictions)
......@@ -119,11 +124,36 @@ function displayPopup(e) {
}
previously_selected_algorithm = selected_algorithm;
let divPredictions = $("<div>").prop("id", "divPredictions")
if(predictions !== undefined) {
for(let key in predictions) { divPredictions.append(capitalizeFirstLetter(key.split("_").join(" "))+': ' + predictions[key]["most_frequent"] + " (" + predictions[key]["count_frequent"] + "/7)").append($('<br>')); }
let divPredictions = $("<div>").prop("id", "divPredictions");
let tablePredictions = $("<table>").prop("id", "tablePredictions");
tablePredictions.append("<th><td>Environment value</td><td>Prediction</td><td>Frequency</td></th>")
let row, colEnv, colValue, colFrequency;
if (predictions !== undefined) {
for (let key in predictions) {
divPredictions.append(
capitalizeFirstLetter(key.split("_").join(" ")) + ': '
+ predictions[key]["most_frequent"]
+ " (" + predictions[key]["count_frequent"] + "/7)"
).append($('<br>'));
row = $("<tr>");
colEnv = $("<td>").html(capitalizeFirstLetter(key.split("_").join(" ")));
colValue = $("<td>").html(predictions[key]["most_frequent"]);
colFrequency = $("<td>").html(predictions[key]["count_frequent"] + "/7");
row.append(colEnv, colValue, colFrequency)
tablePredictions.append(row);
}
console.log(tablePredictions);
download.on("click", function (e) {
e.preventDefault();
tablePredictions.table2excel({
type: 'xls',
filename: 'predictions'+code_iris+'.xls',
preserveColors: true
});
});
}
let messageTooltip = divInformation[0].outerHTML + selectAlgorithm[0].outerHTML + divPredictions[0].outerHTML;
console.log(messageTooltip)
layer.bindPopup(messageTooltip)
......
......@@ -17,13 +17,15 @@ function predict(iris_code, algorithm_name) {
"async": false,
contentType: 'application/json;charset=UTF-8',
success: function(result) {
console.log(result)
console.log(result['predictions'])
console.log(result);
console.log(result['predictions']);
predictions = result['predictions'];
if(result['message'] === "KO") { alert("Your train_size or test_size parameter was set badly. The algorithm has been run with default parameters which are 80% for train_size and 20% for test_size."); }
$("body").css("cursor", "default");
},
error: function(result, textStatus, errorThrown) {
console.log(errorThrown);
alert("There is an issue to predict environment of this IRIS. You can check the console to have more details. " + result + textStatus + errorThrown);
$("body").css("cursor", "default");
}
});
......
......@@ -10,7 +10,7 @@ import unittest
from predihood.config import FOLDER_DATASETS, ENVIRONMENT_VALUES
from predihood.utility_functions import check_train_test_percentages, intersection, union, similarity, \
get_most_frequent, address_to_code, address_to_city, indicator_full_to_short_label, \
indicator_short_to_full_label, get_classifier, set_classifier, signature, add_assessment_to_file
indicator_short_to_full_label, get_classifier, set_classifier, signature_of_algorithm, add_assessment_to_file
from sklearn.neighbors import KNeighborsClassifier
......@@ -69,7 +69,7 @@ class TestCase(unittest.TestCase):
def test_signature(self):
# test if the signature function is able to pick correct parameters
classifier_name = "KNeighbors Classifier"
classifier_signature = signature(classifier_name)
classifier_signature = signature_of_algorithm(classifier_name)
assert "n_neighbors" in classifier_signature
assert classifier_signature["n_neighbors"]["types"] == ["int"]
assert classifier_signature["n_neighbors"]["default"] == "5"
......
......@@ -2,6 +2,8 @@ import ast
import inspect
import json
import logging
import sys
import numpy as np
import pandas as pd
import re
......@@ -9,6 +11,7 @@ import requests
import stringdist
from area import area
from inspect import signature
from predihood import model
from predihood.classifiers_list import AVAILABLE_CLASSIFIERS
from predihood.config import TRAIN_SIZE, TEST_SIZE, ENVIRONMENT_VARIABLES, TRANSLATION, OLD_PREFIX, NEW_PREFIX, FILE_MANUAL_ASSESSMENT
......@@ -203,7 +206,7 @@ def set_classifier(classifier, parameters):
return classifier
def signature(chosen_algorithm):
def signature_of_algorithm(chosen_algorithm):
"""
Get the signature of an algorithm, i.e. its parameters, the default values and the type of each parameter. The documentation of the algorithm must be in NumPy style.
......@@ -216,11 +219,26 @@ def signature(chosen_algorithm):
- the default value
- a description of the parameter (e.g. "The train_size parameter aims at tuning the size of the sample during the learning step.")
"""
# special case for no selection
if chosen_algorithm == "Algorithm": return json.dumps({})
if chosen_algorithm == "Algorithm": return json.dumps({}), "no_selection" # special case for no selection
# getting parameters and default values
parameters = {}
model = get_classifier(chosen_algorithm)
if sys.version_info.major < 3 or (sys.version_info.major == 3 and sys.version_info.minor < 8):
params = inspect.getfullargspec(model.__init__).args[1:] # get parameter' names -- [1:] to remove self parameter
defaults = inspect.getfullargspec(model.__init__).defaults # get default values
else:
# getfullargspec is deprecated since Python 3.8, need to use inspect?signature instead
sig = inspect.signature(model.__init__)
params, defaults = [], []
for param in sig.parameters.values():
params.append(param.name)
defaults.append(param.default)
if params is None or defaults is None: return json.dumps({}), "no_parameters"
assert len(params) == len(defaults)
# getting the definition of parameters
try:
# model = eval(_chosen_algorithm) # never use eval on untrusted strings
model = get_classifier(chosen_algorithm)
doc = model.__doc__
param_section = "Parameters"
dashes = "-" * len(param_section) # -------
......@@ -229,11 +247,7 @@ def signature(chosen_algorithm):
# sub_doc is the param section of the docs (i.e. without attributes and some text)
sub_doc = doc[doc.find(param_section) + len(param_section) + number_spaces + len(dashes) + len("\n"):doc.find(attribute_section)]
except:
raise Exception("This algorithm does not have a documentation...")
params = inspect.getfullargspec(model.__init__).args[1:] # get parameter' names -- [1:] to remove self parameter
defaults = inspect.getfullargspec(model.__init__).defaults # get default values
assert len(params) == len(defaults)
parameters = {}
return json.dumps({}), "no_documentation"
for i in range(len(params)):
param_name = str(params[i]) + " : "
index_param = sub_doc.find(param_name)
......@@ -284,7 +298,7 @@ def signature(chosen_algorithm):
if len(types) == 0: types.append(type_of_default) # fill missing types
types[:] = [x for x in types if "None" not in x and "NoneType" not in x] # remove None type
parameters[param_name[:-3]] = {"types": types, "default": default, "description": first_sentence} # -3 to remove " : "
return parameters
return parameters, "OK"
def check_train_test_percentages(train_size, test_size):
......@@ -299,17 +313,21 @@ def check_train_test_percentages(train_size, test_size):
the train and test sizes
"""
if 0 < train_size < 1 and 0 < test_size < 1 and train_size + test_size == 1:
return train_size, test_size # default case
message = "OK" # default case - everything is correct
elif 0 < train_size < 1 and 0 < test_size < 1:
message = "KO" # train_size or test_size is not correct so we set default parameters
train_size = TRAIN_SIZE # 0.8
test_size = TEST_SIZE # 0.2
if 1 <= train_size < 100 and 1 <= test_size < 100 and train_size + test_size == 100:
return train_size / 100, test_size / 100 # default case
message = "OK" # default case - everything is correct
train_size = train_size/100
test_size = test_size/100
elif 1 <= train_size < 100 and 1 <= test_size < 100:
message = "KO" # train_size or test_size is not correct so we set default parameters
train_size = TRAIN_SIZE # 0.8
test_size = TEST_SIZE # 0.2
return train_size, test_size
return train_size, test_size, message
# 2. list functions
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment