From 46cffd863c8aa8fcc4fe0b6d607397f6c21c5520 Mon Sep 17 00:00:00 2001 From: Nelly Barret <nelly.barret@etu.univ-lyon1.fr> Date: Mon, 20 Jul 2020 19:13:13 +0200 Subject: [PATCH] [AM] documentation is finished and generated --- doc/Data.html | 968 +++++++++++++ doc/Dataset.html | 515 +++++++ doc/Method.html | 178 +++ doc/MethodCleaning.html | 705 +++++++++ doc/MethodPrediction.html | 282 ++++ doc/MethodSelection.html | 272 ++++ doc/__init__.html | 57 + doc/cleaning.html | 275 ++++ doc/config.html | 153 ++ doc/main.html | 683 +++++++++ doc/model.html | 490 +++++++ doc/predict.html | 345 +++++ doc/selection.html | 406 ++++++ doc/utility_functions.html | 1264 +++++++++++++++++ predihood/classes/Dataset.py | 63 +- predihood/classes/Method.py | 16 +- predihood/classes/MethodCleaning.py | 55 +- predihood/classes/MethodPrediction.py | 7 +- predihood/classes/MethodSelection.py | 33 +- predihood/cleaning.py | 18 +- .../distribution-plots/distribution_geo.png | Bin 28979 -> 28987 bytes predihood/main.py | 60 +- predihood/model.py | 54 +- predihood/predict.py | 71 +- predihood/selection.py | 129 +- predihood/utility_functions.py | 395 +++--- 26 files changed, 7070 insertions(+), 424 deletions(-) create mode 100644 doc/Data.html create mode 100644 doc/Dataset.html create mode 100644 doc/Method.html create mode 100644 doc/MethodCleaning.html create mode 100644 doc/MethodPrediction.html create mode 100644 doc/MethodSelection.html create mode 100644 doc/__init__.html create mode 100644 doc/cleaning.html create mode 100644 doc/config.html create mode 100644 doc/main.html create mode 100644 doc/model.html create mode 100644 doc/predict.html create mode 100644 doc/selection.html create mode 100644 doc/utility_functions.html diff --git a/doc/Data.html b/doc/Data.html new file mode 100644 index 00000000..86a0e887 --- /dev/null +++ b/doc/Data.html @@ -0,0 +1,968 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>Data API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>Data</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import logging +import sys + +import numpy as np +import os +import pandas as pd +import warnings +from area import area + +from predihood import model +from predihood.cleaning import clean +from predihood.config import FILE_CLEANED_DATA, FOLDER_DATASETS, ENVIRONMENT_VARIABLES, FILE_GROUPING, NEW_PREFIX, OLD_PREFIX +from predihood.utility_functions import address_to_code, append_indicator, append_target + +log = logging.getLogger(__name__) +warnings.filterwarnings("ignore", category=RuntimeWarning) + + +class Data: + def __init__(self, normalization="density", filtering=True): + """ + Constructor of the Data class. Initialize attributes. + Args: + normalization: A string to indicate the choice for normalization ("density" for density, "population" for population and None to do not normalize) + filtering: True or False to indicate if useless indicators will be removed or not + """ + self.old_path = FILE_CLEANED_DATA + self.dataset_path = os.path.join(FOLDER_DATASETS, "data_"+str(normalization)+".csv") + self.filtered_path = os.path.join(FOLDER_DATASETS, "data_"+str(normalization)+"_filtered.csv") + self.data = None + self.indicators = None + self.normalization = normalization + self.filtering = filtering + + # retrieve indicators + self.get_indicators() + log.debug("Starting with %d indicators", len(self.indicators)) + # define indicators that are not relevant for the prediction and remove them + indicators_to_remove = ["IRIS", "REG", "DEP", "UU2010", "COM", "LIBCOM", "TRIRIS", "GRD_QUART", "LIBIRIS", "TYP_IRIS", "MODIF_IRIS", "LAB_IRIS", "LIB_IRIS", "LIB_COM", "CODGEO", "LIBGEO"] + for indicator in indicators_to_remove: + if indicator in self.indicators: + self.indicators.remove(indicator) + log.debug("Descriptive indicators: %d. It remains %d indicators", len(indicators_to_remove), len(self.indicators)) + + def get_indicators(self): + """ + Get indicators from the dataset if it exists, else from the database. + """ + if self.data is not None: + self.indicators = self.data.columns.tolist() + # self.data.columns gets all columns, so to get indicators, we remove "CODE" (since it is not relevant for prediction). + # We keep "AREA" and "DENSITY" since they are use as features in prediction + if "CODE" in self.indicators: self.indicators.remove("CODE") + for env in ENVIRONMENT_VARIABLES: + if env in self.indicators: + self.indicators.remove(env) + else: + self.indicators = model.get_indicators_list() + + def init_all_in_one(self): + """ + Create or read the dataset. It applies normalization and/or filtering if needed. + """ + if not os.path.exists(self.dataset_path): + self.create() + self.apply_normalization() + if self.filtering: self.apply_filtering() + elif not os.path.exists(self.filtered_path): + if self.filtering: self.apply_filtering() + else: + self.read() + + def read(self): + """ + Read the dataset stored in the CSV file (and get indicators from this dataset). + """ + if self.filtering: + self.data = pd.read_csv(self.filtered_path) + else: + self.data = pd.read_csv(self.dataset_path) + self.get_indicators() + + def create(self): + """ + Construct the dataset from HomeInLove data, i.e. ungroup addresses, retrieve information about IRIS and construct a CSV file. + """ + log.info("... construction of dataset is in progress ...") + + # 1. read data to transform + if not os.path.exists(self.old_path): clean() + data = pd.read_csv(self.old_path) + + # 2. define some variables + raw_data = [] + append_col = True + null_iris = 0 # count IRIS that can't be retrieve from its code + foreigners = 0 # count addresses that are not located in France + problems = 0 # count IRIS that encounter problems and that will be not added to the dataset + cols = [] + columns = ["address", "country"] + columns.extend(ENVIRONMENT_VARIABLES) + cols_departure = [OLD_PREFIX+env for env in columns] + cols_arrival = [NEW_PREFIX+env for env in columns] + + # 3. get data for departure and arrival IRIS + departures = data[cols_departure] + arrivals = data[cols_arrival] + # remove "old_address", "old_country", "new_address", "new_country" + cols_departure.pop(0) + cols_departure.pop(0) + cols_arrival.pop(0) + cols_arrival.pop(0) + cols.append("CODE") + cols.append("AREA") + cols.append("DENSITY") + + # 4. for each departure and arrival IRIS (one line in cleanedData.csv), get its INSEE indicators and its EV. + # for each IRIS store its code, compute and store its area and its density, store all its INSEE indicators and its 6 EV. + for (index1, row1), (index2, row2) in zip(departures.iterrows(), arrivals.iterrows()): + dep_row, arr_row = [], [] + # convert address to IRIS code and append it to data only if the address is in France + if row1[OLD_PREFIX + "country"] == "France": + dep_code = address_to_code(row1[OLD_PREFIX + "address"]) + dep_row.append(dep_code) + dep_iris = model.get_iris_from_code(dep_code) # get IRIS information + coords_dep = model.get_coords_from_code(dep_code) + area_dep = area(coords_dep) / 1000000 if coords_dep is not None else None # convert area from m^2 to km^2 + dep_row.append(area_dep) # add area as a feature + density = dep_iris["properties"]["raw_indicators"]["P14_POP"] / area_dep if area_dep is not None and area_dep > 0 else None + dep_row.append(density) # add density as a feature + + if dep_iris is not None: + # a. append INSEE indicators + for raw_indicator in self.indicators: + dep_row, cols = append_indicator(raw_indicator, dep_iris, dep_row, append_col, cols) + + # b. append EV + for target in cols_departure: + dep_row = append_target(row1, target, dep_row) + append_col = False + if len(dep_row) > 0: raw_data.append(dep_row) + else: problems += 1 + else: null_iris += 1 + elif row1[OLD_PREFIX+"country"] == "" or row1[OLD_PREFIX+"country"] is None or row1[OLD_PREFIX+"country"] is np.nan: null_iris += 1 + else: foreigners += 1 + if row2[NEW_PREFIX + "country"] == "France": + arr_code = address_to_code(row2[NEW_PREFIX + "address"]) + arr_row.append(arr_code) + arr_iris = model.get_iris_from_code(arr_code) + coords_arr = model.get_coords_from_code(arr_code) + area_arr = area(coords_arr) / 1000000 if coords_arr is not None else None # convert area from m^2 to km^2 + arr_row.append(area_arr) # add area as a feature + density = arr_iris["properties"]["raw_indicators"]["P14_POP"] / area_arr if area_arr is not None and area_arr > 0 else None + arr_row.append(density) # add density as a feature + + if arr_iris is not None: + # a. append INSEE indicators + for raw_indicator in self.indicators: + arr_row, cols = append_indicator(raw_indicator, arr_iris, arr_row, append_col, cols) + + # b. append targets + for target in cols_arrival: + arr_row = append_target(row2, target, arr_row) + append_col = False + if len(arr_row) > 0: raw_data.append(arr_row) + else: problems += 1 + else: null_iris += 1 + elif row2[NEW_PREFIX + "country"] == "" or row2[NEW_PREFIX + "country"] is None or row2[NEW_PREFIX + "country"] is np.nan: null_iris += 1 + else: foreigners += 1 + + sys.stdout.write("\r%.2f%%" % ((index1 * 100) / len(departures))) # update progress percentage + sys.stdout.flush() + print() + + cols.extend(ENVIRONMENT_VARIABLES) + log.info("%d addresses are not located in France.", foreigners) + log.info("%d null IRIS have been removed from the dataset.", null_iris) + log.info("%d IRIS have encountered problems.", problems) + + # 5. convert IRIS data into a DataFrame, fill missing values and remove fully empty columns. + self.data = pd.DataFrame(raw_data, columns=cols) + self.data.sort_values("CODE", inplace=True) + self.fill_missing_values("median") + nan_columns = self.data.columns[self.data.isna().all()].tolist() + log.debug("There are %d NaN columns", len(nan_columns)) + self.data.drop(nan_columns, axis=1, inplace=True) + self.indicators = [indicator for indicator in self.indicators if indicator not in nan_columns] # remove names of NaN columns in the list of indicators + + def fill_missing_values(self, method="median"): + """ + Fill NaN values given the method. + Args: + method: A string corresponding tto the method for filling NaN values ("zero", "mean" or "median"). Default is median. + """ + assert method in ["zero", "mean", "median"] + cols = ["CODE", "AREA", "DENSITY"] + cols.extend(ENVIRONMENT_VARIABLES) + for col in self.data.iteritems(): + value = 0 + col_name = col[0] + if col_name not in cols: + # fill missing INSEE indicators + if method == "zero": + value = 0 + elif method == "mean": + value = self.data[col_name].mean() + elif method == "median": + value = self.data[col_name].median() + else: + value = np.mean(self.data[col_name]) + elif col_name not in ["CODE", "AREA", "DENSITY"]: + # fill missing EV + if method == "zero": + env_values = {"building_type": "Housing estates", + "building_usage": "Other activities", + "landscape": "Urban", + "morphological_position": "Central", + "geographical_position": "Centre", + "social_class": "Lower"} + else: # method == "mean" or method == "median" + env_values = {"building_type": "Mixed", + "building_usage": "Other activities", + "landscape": "Green areas", + "morphological_position": "Urban", + "geographical_position": "Centre", + "social_class": "Middle"} + value = env_values[col_name] + # else: -> column is CODE or AREA or DENSITY -> do nothing + self.data[col_name].replace([np.nan], [value], inplace=True) + + def filter_too_detailed_indicators(self): + """ + Remove too detailed indicators from the dataset. The list of indicators is given by the file `regrouping.csv`. + """ + regrouping = pd.read_csv(FILE_GROUPING, sep="\t") + status = regrouping.index[regrouping["STATUS"] == 1].tolist() # indicators that are an element of a subset + col_names = [regrouping.iloc[status]["INDICATOR"] for status in status] + # self.filtered_data = self.data[:] # copy by value, not by reference + counter1, counter2 = 0, 0 + for column in col_names: + if column in self.data and column in self.indicators and column: + del self.data[column] + self.indicators.remove(column) + counter1 += 1 + log.debug("%d indicators have been using regrouping.", counter1) + + def apply_filtering(self): + """ + Remove (filter) too detailed indicators and constant columns from the dataset. + """ + if self.data is None: + self.data = pd.read_csv(self.dataset_path) + self.get_indicators() + + # 1. remove indicators that have been defined as useless + self.filter_too_detailed_indicators() + + # 2. remove constant columns, i.e. with a null variance + constant_columns = self.data.columns[self.data.nunique() == 1].tolist() + self.data.drop(constant_columns, axis=1, inplace=True) + self.indicators = [indicator for indicator in self.indicators if indicator not in constant_columns] + + self.data.to_csv(self.filtered_path, index=None) # index=None: avoid line numbering + + def apply_normalization(self): + """ + Normalize the dataset with the given method, i.e. None, "population" or "density". Default is "density". + """ + assert self.normalization in [None, "population", "density"] + do_not_normalize = ["CODE", "AREA", "DENSITY", "P14_POP"] + do_not_normalize.extend(ENVIRONMENT_VARIABLES) # extend does not return a list + for index, row in self.data.iterrows(): + for column in row.iteritems(): + col_name = column[0] + if col_name not in do_not_normalize: + if self.normalization == "population": + self.data.at[index, col_name] = row[col_name] / row["P14_POP"] + elif self.normalization == "density": + density = row[self.data.columns.get_loc("DENSITY")] + self.data.at[index, col_name] = row[col_name] / density if density > 0 else 0 + elif self.normalization is None: + self.data.at[index, col_name] = row[col_name] + self.data.to_csv(self.dataset_path, index=None) # index=None: avoid line numbering</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-classes">Classes</h2> +<dl> +<dt id="Data.Data"><code class="flex name class"> +<span>class <span class="ident">Data</span></span> +<span>(</span><span>normalization='density', filtering=True)</span> +</code></dt> +<dd> +<div class="desc"><p>Constructor of the Data class. Initialize attributes.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>normalization</code></strong></dt> +<dd>A string to indicate the choice for normalization ("density" for density, "population" for population and None to do not normalize)</dd> +<dt><strong><code>filtering</code></strong></dt> +<dd>True or False to indicate if useless indicators will be removed or not</dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">class Data: + def __init__(self, normalization="density", filtering=True): + """ + Constructor of the Data class. Initialize attributes. + Args: + normalization: A string to indicate the choice for normalization ("density" for density, "population" for population and None to do not normalize) + filtering: True or False to indicate if useless indicators will be removed or not + """ + self.old_path = FILE_CLEANED_DATA + self.dataset_path = os.path.join(FOLDER_DATASETS, "data_"+str(normalization)+".csv") + self.filtered_path = os.path.join(FOLDER_DATASETS, "data_"+str(normalization)+"_filtered.csv") + self.data = None + self.indicators = None + self.normalization = normalization + self.filtering = filtering + + # retrieve indicators + self.get_indicators() + log.debug("Starting with %d indicators", len(self.indicators)) + # define indicators that are not relevant for the prediction and remove them + indicators_to_remove = ["IRIS", "REG", "DEP", "UU2010", "COM", "LIBCOM", "TRIRIS", "GRD_QUART", "LIBIRIS", "TYP_IRIS", "MODIF_IRIS", "LAB_IRIS", "LIB_IRIS", "LIB_COM", "CODGEO", "LIBGEO"] + for indicator in indicators_to_remove: + if indicator in self.indicators: + self.indicators.remove(indicator) + log.debug("Descriptive indicators: %d. It remains %d indicators", len(indicators_to_remove), len(self.indicators)) + + def get_indicators(self): + """ + Get indicators from the dataset if it exists, else from the database. + """ + if self.data is not None: + self.indicators = self.data.columns.tolist() + # self.data.columns gets all columns, so to get indicators, we remove "CODE" (since it is not relevant for prediction). + # We keep "AREA" and "DENSITY" since they are use as features in prediction + if "CODE" in self.indicators: self.indicators.remove("CODE") + for env in ENVIRONMENT_VARIABLES: + if env in self.indicators: + self.indicators.remove(env) + else: + self.indicators = model.get_indicators_list() + + def init_all_in_one(self): + """ + Create or read the dataset. It applies normalization and/or filtering if needed. + """ + if not os.path.exists(self.dataset_path): + self.create() + self.apply_normalization() + if self.filtering: self.apply_filtering() + elif not os.path.exists(self.filtered_path): + if self.filtering: self.apply_filtering() + else: + self.read() + + def read(self): + """ + Read the dataset stored in the CSV file (and get indicators from this dataset). + """ + if self.filtering: + self.data = pd.read_csv(self.filtered_path) + else: + self.data = pd.read_csv(self.dataset_path) + self.get_indicators() + + def create(self): + """ + Construct the dataset from HomeInLove data, i.e. ungroup addresses, retrieve information about IRIS and construct a CSV file. + """ + log.info("... construction of dataset is in progress ...") + + # 1. read data to transform + if not os.path.exists(self.old_path): clean() + data = pd.read_csv(self.old_path) + + # 2. define some variables + raw_data = [] + append_col = True + null_iris = 0 # count IRIS that can't be retrieve from its code + foreigners = 0 # count addresses that are not located in France + problems = 0 # count IRIS that encounter problems and that will be not added to the dataset + cols = [] + columns = ["address", "country"] + columns.extend(ENVIRONMENT_VARIABLES) + cols_departure = [OLD_PREFIX+env for env in columns] + cols_arrival = [NEW_PREFIX+env for env in columns] + + # 3. get data for departure and arrival IRIS + departures = data[cols_departure] + arrivals = data[cols_arrival] + # remove "old_address", "old_country", "new_address", "new_country" + cols_departure.pop(0) + cols_departure.pop(0) + cols_arrival.pop(0) + cols_arrival.pop(0) + cols.append("CODE") + cols.append("AREA") + cols.append("DENSITY") + + # 4. for each departure and arrival IRIS (one line in cleanedData.csv), get its INSEE indicators and its EV. + # for each IRIS store its code, compute and store its area and its density, store all its INSEE indicators and its 6 EV. + for (index1, row1), (index2, row2) in zip(departures.iterrows(), arrivals.iterrows()): + dep_row, arr_row = [], [] + # convert address to IRIS code and append it to data only if the address is in France + if row1[OLD_PREFIX + "country"] == "France": + dep_code = address_to_code(row1[OLD_PREFIX + "address"]) + dep_row.append(dep_code) + dep_iris = model.get_iris_from_code(dep_code) # get IRIS information + coords_dep = model.get_coords_from_code(dep_code) + area_dep = area(coords_dep) / 1000000 if coords_dep is not None else None # convert area from m^2 to km^2 + dep_row.append(area_dep) # add area as a feature + density = dep_iris["properties"]["raw_indicators"]["P14_POP"] / area_dep if area_dep is not None and area_dep > 0 else None + dep_row.append(density) # add density as a feature + + if dep_iris is not None: + # a. append INSEE indicators + for raw_indicator in self.indicators: + dep_row, cols = append_indicator(raw_indicator, dep_iris, dep_row, append_col, cols) + + # b. append EV + for target in cols_departure: + dep_row = append_target(row1, target, dep_row) + append_col = False + if len(dep_row) > 0: raw_data.append(dep_row) + else: problems += 1 + else: null_iris += 1 + elif row1[OLD_PREFIX+"country"] == "" or row1[OLD_PREFIX+"country"] is None or row1[OLD_PREFIX+"country"] is np.nan: null_iris += 1 + else: foreigners += 1 + if row2[NEW_PREFIX + "country"] == "France": + arr_code = address_to_code(row2[NEW_PREFIX + "address"]) + arr_row.append(arr_code) + arr_iris = model.get_iris_from_code(arr_code) + coords_arr = model.get_coords_from_code(arr_code) + area_arr = area(coords_arr) / 1000000 if coords_arr is not None else None # convert area from m^2 to km^2 + arr_row.append(area_arr) # add area as a feature + density = arr_iris["properties"]["raw_indicators"]["P14_POP"] / area_arr if area_arr is not None and area_arr > 0 else None + arr_row.append(density) # add density as a feature + + if arr_iris is not None: + # a. append INSEE indicators + for raw_indicator in self.indicators: + arr_row, cols = append_indicator(raw_indicator, arr_iris, arr_row, append_col, cols) + + # b. append targets + for target in cols_arrival: + arr_row = append_target(row2, target, arr_row) + append_col = False + if len(arr_row) > 0: raw_data.append(arr_row) + else: problems += 1 + else: null_iris += 1 + elif row2[NEW_PREFIX + "country"] == "" or row2[NEW_PREFIX + "country"] is None or row2[NEW_PREFIX + "country"] is np.nan: null_iris += 1 + else: foreigners += 1 + + sys.stdout.write("\r%.2f%%" % ((index1 * 100) / len(departures))) # update progress percentage + sys.stdout.flush() + print() + + cols.extend(ENVIRONMENT_VARIABLES) + log.info("%d addresses are not located in France.", foreigners) + log.info("%d null IRIS have been removed from the dataset.", null_iris) + log.info("%d IRIS have encountered problems.", problems) + + # 5. convert IRIS data into a DataFrame, fill missing values and remove fully empty columns. + self.data = pd.DataFrame(raw_data, columns=cols) + self.data.sort_values("CODE", inplace=True) + self.fill_missing_values("median") + nan_columns = self.data.columns[self.data.isna().all()].tolist() + log.debug("There are %d NaN columns", len(nan_columns)) + self.data.drop(nan_columns, axis=1, inplace=True) + self.indicators = [indicator for indicator in self.indicators if indicator not in nan_columns] # remove names of NaN columns in the list of indicators + + def fill_missing_values(self, method="median"): + """ + Fill NaN values given the method. + Args: + method: A string corresponding tto the method for filling NaN values ("zero", "mean" or "median"). Default is median. + """ + assert method in ["zero", "mean", "median"] + cols = ["CODE", "AREA", "DENSITY"] + cols.extend(ENVIRONMENT_VARIABLES) + for col in self.data.iteritems(): + value = 0 + col_name = col[0] + if col_name not in cols: + # fill missing INSEE indicators + if method == "zero": + value = 0 + elif method == "mean": + value = self.data[col_name].mean() + elif method == "median": + value = self.data[col_name].median() + else: + value = np.mean(self.data[col_name]) + elif col_name not in ["CODE", "AREA", "DENSITY"]: + # fill missing EV + if method == "zero": + env_values = {"building_type": "Housing estates", + "building_usage": "Other activities", + "landscape": "Urban", + "morphological_position": "Central", + "geographical_position": "Centre", + "social_class": "Lower"} + else: # method == "mean" or method == "median" + env_values = {"building_type": "Mixed", + "building_usage": "Other activities", + "landscape": "Green areas", + "morphological_position": "Urban", + "geographical_position": "Centre", + "social_class": "Middle"} + value = env_values[col_name] + # else: -> column is CODE or AREA or DENSITY -> do nothing + self.data[col_name].replace([np.nan], [value], inplace=True) + + def filter_too_detailed_indicators(self): + """ + Remove too detailed indicators from the dataset. The list of indicators is given by the file `regrouping.csv`. + """ + regrouping = pd.read_csv(FILE_GROUPING, sep="\t") + status = regrouping.index[regrouping["STATUS"] == 1].tolist() # indicators that are an element of a subset + col_names = [regrouping.iloc[status]["INDICATOR"] for status in status] + # self.filtered_data = self.data[:] # copy by value, not by reference + counter1, counter2 = 0, 0 + for column in col_names: + if column in self.data and column in self.indicators and column: + del self.data[column] + self.indicators.remove(column) + counter1 += 1 + log.debug("%d indicators have been using regrouping.", counter1) + + def apply_filtering(self): + """ + Remove (filter) too detailed indicators and constant columns from the dataset. + """ + if self.data is None: + self.data = pd.read_csv(self.dataset_path) + self.get_indicators() + + # 1. remove indicators that have been defined as useless + self.filter_too_detailed_indicators() + + # 2. remove constant columns, i.e. with a null variance + constant_columns = self.data.columns[self.data.nunique() == 1].tolist() + self.data.drop(constant_columns, axis=1, inplace=True) + self.indicators = [indicator for indicator in self.indicators if indicator not in constant_columns] + + self.data.to_csv(self.filtered_path, index=None) # index=None: avoid line numbering + + def apply_normalization(self): + """ + Normalize the dataset with the given method, i.e. None, "population" or "density". Default is "density". + """ + assert self.normalization in [None, "population", "density"] + do_not_normalize = ["CODE", "AREA", "DENSITY", "P14_POP"] + do_not_normalize.extend(ENVIRONMENT_VARIABLES) # extend does not return a list + for index, row in self.data.iterrows(): + for column in row.iteritems(): + col_name = column[0] + if col_name not in do_not_normalize: + if self.normalization == "population": + self.data.at[index, col_name] = row[col_name] / row["P14_POP"] + elif self.normalization == "density": + density = row[self.data.columns.get_loc("DENSITY")] + self.data.at[index, col_name] = row[col_name] / density if density > 0 else 0 + elif self.normalization is None: + self.data.at[index, col_name] = row[col_name] + self.data.to_csv(self.dataset_path, index=None) # index=None: avoid line numbering</code></pre> +</details> +<h3>Methods</h3> +<dl> +<dt id="Data.Data.apply_filtering"><code class="name flex"> +<span>def <span class="ident">apply_filtering</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Remove (filter) too detailed indicators and constant columns from the dataset.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def apply_filtering(self): + """ + Remove (filter) too detailed indicators and constant columns from the dataset. + """ + if self.data is None: + self.data = pd.read_csv(self.dataset_path) + self.get_indicators() + + # 1. remove indicators that have been defined as useless + self.filter_too_detailed_indicators() + + # 2. remove constant columns, i.e. with a null variance + constant_columns = self.data.columns[self.data.nunique() == 1].tolist() + self.data.drop(constant_columns, axis=1, inplace=True) + self.indicators = [indicator for indicator in self.indicators if indicator not in constant_columns] + + self.data.to_csv(self.filtered_path, index=None) # index=None: avoid line numbering</code></pre> +</details> +</dd> +<dt id="Data.Data.apply_normalization"><code class="name flex"> +<span>def <span class="ident">apply_normalization</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Normalize the dataset with the given method, i.e. None, "population" or "density". Default is "density".</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def apply_normalization(self): + """ + Normalize the dataset with the given method, i.e. None, "population" or "density". Default is "density". + """ + assert self.normalization in [None, "population", "density"] + do_not_normalize = ["CODE", "AREA", "DENSITY", "P14_POP"] + do_not_normalize.extend(ENVIRONMENT_VARIABLES) # extend does not return a list + for index, row in self.data.iterrows(): + for column in row.iteritems(): + col_name = column[0] + if col_name not in do_not_normalize: + if self.normalization == "population": + self.data.at[index, col_name] = row[col_name] / row["P14_POP"] + elif self.normalization == "density": + density = row[self.data.columns.get_loc("DENSITY")] + self.data.at[index, col_name] = row[col_name] / density if density > 0 else 0 + elif self.normalization is None: + self.data.at[index, col_name] = row[col_name] + self.data.to_csv(self.dataset_path, index=None) # index=None: avoid line numbering</code></pre> +</details> +</dd> +<dt id="Data.Data.create"><code class="name flex"> +<span>def <span class="ident">create</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Construct the dataset from HomeInLove data, i.e. ungroup addresses, retrieve information about IRIS and construct a CSV file.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def create(self): + """ + Construct the dataset from HomeInLove data, i.e. ungroup addresses, retrieve information about IRIS and construct a CSV file. + """ + log.info("... construction of dataset is in progress ...") + + # 1. read data to transform + if not os.path.exists(self.old_path): clean() + data = pd.read_csv(self.old_path) + + # 2. define some variables + raw_data = [] + append_col = True + null_iris = 0 # count IRIS that can't be retrieve from its code + foreigners = 0 # count addresses that are not located in France + problems = 0 # count IRIS that encounter problems and that will be not added to the dataset + cols = [] + columns = ["address", "country"] + columns.extend(ENVIRONMENT_VARIABLES) + cols_departure = [OLD_PREFIX+env for env in columns] + cols_arrival = [NEW_PREFIX+env for env in columns] + + # 3. get data for departure and arrival IRIS + departures = data[cols_departure] + arrivals = data[cols_arrival] + # remove "old_address", "old_country", "new_address", "new_country" + cols_departure.pop(0) + cols_departure.pop(0) + cols_arrival.pop(0) + cols_arrival.pop(0) + cols.append("CODE") + cols.append("AREA") + cols.append("DENSITY") + + # 4. for each departure and arrival IRIS (one line in cleanedData.csv), get its INSEE indicators and its EV. + # for each IRIS store its code, compute and store its area and its density, store all its INSEE indicators and its 6 EV. + for (index1, row1), (index2, row2) in zip(departures.iterrows(), arrivals.iterrows()): + dep_row, arr_row = [], [] + # convert address to IRIS code and append it to data only if the address is in France + if row1[OLD_PREFIX + "country"] == "France": + dep_code = address_to_code(row1[OLD_PREFIX + "address"]) + dep_row.append(dep_code) + dep_iris = model.get_iris_from_code(dep_code) # get IRIS information + coords_dep = model.get_coords_from_code(dep_code) + area_dep = area(coords_dep) / 1000000 if coords_dep is not None else None # convert area from m^2 to km^2 + dep_row.append(area_dep) # add area as a feature + density = dep_iris["properties"]["raw_indicators"]["P14_POP"] / area_dep if area_dep is not None and area_dep > 0 else None + dep_row.append(density) # add density as a feature + + if dep_iris is not None: + # a. append INSEE indicators + for raw_indicator in self.indicators: + dep_row, cols = append_indicator(raw_indicator, dep_iris, dep_row, append_col, cols) + + # b. append EV + for target in cols_departure: + dep_row = append_target(row1, target, dep_row) + append_col = False + if len(dep_row) > 0: raw_data.append(dep_row) + else: problems += 1 + else: null_iris += 1 + elif row1[OLD_PREFIX+"country"] == "" or row1[OLD_PREFIX+"country"] is None or row1[OLD_PREFIX+"country"] is np.nan: null_iris += 1 + else: foreigners += 1 + if row2[NEW_PREFIX + "country"] == "France": + arr_code = address_to_code(row2[NEW_PREFIX + "address"]) + arr_row.append(arr_code) + arr_iris = model.get_iris_from_code(arr_code) + coords_arr = model.get_coords_from_code(arr_code) + area_arr = area(coords_arr) / 1000000 if coords_arr is not None else None # convert area from m^2 to km^2 + arr_row.append(area_arr) # add area as a feature + density = arr_iris["properties"]["raw_indicators"]["P14_POP"] / area_arr if area_arr is not None and area_arr > 0 else None + arr_row.append(density) # add density as a feature + + if arr_iris is not None: + # a. append INSEE indicators + for raw_indicator in self.indicators: + arr_row, cols = append_indicator(raw_indicator, arr_iris, arr_row, append_col, cols) + + # b. append targets + for target in cols_arrival: + arr_row = append_target(row2, target, arr_row) + append_col = False + if len(arr_row) > 0: raw_data.append(arr_row) + else: problems += 1 + else: null_iris += 1 + elif row2[NEW_PREFIX + "country"] == "" or row2[NEW_PREFIX + "country"] is None or row2[NEW_PREFIX + "country"] is np.nan: null_iris += 1 + else: foreigners += 1 + + sys.stdout.write("\r%.2f%%" % ((index1 * 100) / len(departures))) # update progress percentage + sys.stdout.flush() + print() + + cols.extend(ENVIRONMENT_VARIABLES) + log.info("%d addresses are not located in France.", foreigners) + log.info("%d null IRIS have been removed from the dataset.", null_iris) + log.info("%d IRIS have encountered problems.", problems) + + # 5. convert IRIS data into a DataFrame, fill missing values and remove fully empty columns. + self.data = pd.DataFrame(raw_data, columns=cols) + self.data.sort_values("CODE", inplace=True) + self.fill_missing_values("median") + nan_columns = self.data.columns[self.data.isna().all()].tolist() + log.debug("There are %d NaN columns", len(nan_columns)) + self.data.drop(nan_columns, axis=1, inplace=True) + self.indicators = [indicator for indicator in self.indicators if indicator not in nan_columns] # remove names of NaN columns in the list of indicators</code></pre> +</details> +</dd> +<dt id="Data.Data.fill_missing_values"><code class="name flex"> +<span>def <span class="ident">fill_missing_values</span></span>(<span>self, method='median')</span> +</code></dt> +<dd> +<div class="desc"><p>Fill NaN values given the method.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>method</code></strong></dt> +<dd>A string corresponding tto the method for filling NaN values ("zero", "mean" or "median"). Default is median.</dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def fill_missing_values(self, method="median"): + """ + Fill NaN values given the method. + Args: + method: A string corresponding tto the method for filling NaN values ("zero", "mean" or "median"). Default is median. + """ + assert method in ["zero", "mean", "median"] + cols = ["CODE", "AREA", "DENSITY"] + cols.extend(ENVIRONMENT_VARIABLES) + for col in self.data.iteritems(): + value = 0 + col_name = col[0] + if col_name not in cols: + # fill missing INSEE indicators + if method == "zero": + value = 0 + elif method == "mean": + value = self.data[col_name].mean() + elif method == "median": + value = self.data[col_name].median() + else: + value = np.mean(self.data[col_name]) + elif col_name not in ["CODE", "AREA", "DENSITY"]: + # fill missing EV + if method == "zero": + env_values = {"building_type": "Housing estates", + "building_usage": "Other activities", + "landscape": "Urban", + "morphological_position": "Central", + "geographical_position": "Centre", + "social_class": "Lower"} + else: # method == "mean" or method == "median" + env_values = {"building_type": "Mixed", + "building_usage": "Other activities", + "landscape": "Green areas", + "morphological_position": "Urban", + "geographical_position": "Centre", + "social_class": "Middle"} + value = env_values[col_name] + # else: -> column is CODE or AREA or DENSITY -> do nothing + self.data[col_name].replace([np.nan], [value], inplace=True)</code></pre> +</details> +</dd> +<dt id="Data.Data.filter_too_detailed_indicators"><code class="name flex"> +<span>def <span class="ident">filter_too_detailed_indicators</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Remove too detailed indicators from the dataset. The list of indicators is given by the file <code>regrouping.csv</code>.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def filter_too_detailed_indicators(self): + """ + Remove too detailed indicators from the dataset. The list of indicators is given by the file `regrouping.csv`. + """ + regrouping = pd.read_csv(FILE_GROUPING, sep="\t") + status = regrouping.index[regrouping["STATUS"] == 1].tolist() # indicators that are an element of a subset + col_names = [regrouping.iloc[status]["INDICATOR"] for status in status] + # self.filtered_data = self.data[:] # copy by value, not by reference + counter1, counter2 = 0, 0 + for column in col_names: + if column in self.data and column in self.indicators and column: + del self.data[column] + self.indicators.remove(column) + counter1 += 1 + log.debug("%d indicators have been using regrouping.", counter1)</code></pre> +</details> +</dd> +<dt id="Data.Data.get_indicators"><code class="name flex"> +<span>def <span class="ident">get_indicators</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Get indicators from the dataset if it exists, else from the database.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_indicators(self): + """ + Get indicators from the dataset if it exists, else from the database. + """ + if self.data is not None: + self.indicators = self.data.columns.tolist() + # self.data.columns gets all columns, so to get indicators, we remove "CODE" (since it is not relevant for prediction). + # We keep "AREA" and "DENSITY" since they are use as features in prediction + if "CODE" in self.indicators: self.indicators.remove("CODE") + for env in ENVIRONMENT_VARIABLES: + if env in self.indicators: + self.indicators.remove(env) + else: + self.indicators = model.get_indicators_list()</code></pre> +</details> +</dd> +<dt id="Data.Data.init_all_in_one"><code class="name flex"> +<span>def <span class="ident">init_all_in_one</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Create or read the dataset. It applies normalization and/or filtering if needed.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def init_all_in_one(self): + """ + Create or read the dataset. It applies normalization and/or filtering if needed. + """ + if not os.path.exists(self.dataset_path): + self.create() + self.apply_normalization() + if self.filtering: self.apply_filtering() + elif not os.path.exists(self.filtered_path): + if self.filtering: self.apply_filtering() + else: + self.read()</code></pre> +</details> +</dd> +<dt id="Data.Data.read"><code class="name flex"> +<span>def <span class="ident">read</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Read the dataset stored in the CSV file (and get indicators from this dataset).</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def read(self): + """ + Read the dataset stored in the CSV file (and get indicators from this dataset). + """ + if self.filtering: + self.data = pd.read_csv(self.filtered_path) + else: + self.data = pd.read_csv(self.dataset_path) + self.get_indicators()</code></pre> +</details> +</dd> +</dl> +</dd> +</dl> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-classes">Classes</a></h3> +<ul> +<li> +<h4><code><a title="Data.Data" href="#Data.Data">Data</a></code></h4> +<ul class=""> +<li><code><a title="Data.Data.apply_filtering" href="#Data.Data.apply_filtering">apply_filtering</a></code></li> +<li><code><a title="Data.Data.apply_normalization" href="#Data.Data.apply_normalization">apply_normalization</a></code></li> +<li><code><a title="Data.Data.create" href="#Data.Data.create">create</a></code></li> +<li><code><a title="Data.Data.fill_missing_values" href="#Data.Data.fill_missing_values">fill_missing_values</a></code></li> +<li><code><a title="Data.Data.filter_too_detailed_indicators" href="#Data.Data.filter_too_detailed_indicators">filter_too_detailed_indicators</a></code></li> +<li><code><a title="Data.Data.get_indicators" href="#Data.Data.get_indicators">get_indicators</a></code></li> +<li><code><a title="Data.Data.init_all_in_one" href="#Data.Data.init_all_in_one">init_all_in_one</a></code></li> +<li><code><a title="Data.Data.read" href="#Data.Data.read">read</a></code></li> +</ul> +</li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/Dataset.html b/doc/Dataset.html new file mode 100644 index 00000000..bfedcc2b --- /dev/null +++ b/doc/Dataset.html @@ -0,0 +1,515 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>Dataset API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>Dataset</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import logging +import pandas as pd + +from predihood.config import TRAIN_SIZE, TEST_SIZE, ENVIRONMENT_VARIABLES, RANDOM_STATE, FILE_ENV +from predihood.utility_functions import check_dataset_size +from sklearn.ensemble import IsolationForest +from sklearn.model_selection import train_test_split + +log = logging.getLogger(__name__) + + +class Dataset: + """ + This class represents assessed IRIS with their indicators ans EV values. There are options, such as removing outliers or rural IRIS. + """ + def __init__(self, data, env, _type, selected_indicators=None, indicators_to_remove=None, train_size=TRAIN_SIZE, test_size=TEST_SIZE, outliers=False): + """ + Constructor of the Dataset class. Initialize attributes. + + Args: + data: an instance of Data class. Don"t forget to initialize data after created it with "init_all_in_one()" method + env: a string representing the EV, i.e. a value in ["building_type", "building_usage", "landscape", "morphological_position", "geographical_position", "social_class"] + selected_indicators: a list containing the indicators to keep in the dataset + indicators_to_remove: a list containing the indicators to remove in the dataset + train_size: a integer or a float corresponding to the size of the dataset used for training + test_size: a integer or a float corresponding to the size of the dataset used for test + outliers: True or False to remove outliers from dataset (detected with IsolationForest algorithm) + """ + self.data = data.data[:] # data must be a Data object + self.indicators = data.indicators[:] + self.filtering = data.filtering + self.normalization = data.normalization + self.selected_indicators = selected_indicators[:] if selected_indicators is not None else None + self.indicators_to_remove = indicators_to_remove[:] if indicators_to_remove is not None else None + self.type = _type + self.X = None + self.Y = None + self.X_train = None + self.Y_train = None + self.X_test = None + self.Y_test = None + if env in ENVIRONMENT_VARIABLES: self.env = env + else: self.env = "building_type" + self.train_size, self.test_size = check_dataset_size(train_size, test_size) + self.outliers = outliers + + def init_all_in_one(self): + """ + Initialize the dataset by initializing X and Y ; generating X_train, Y_train, X_test, Y_test ; removing outliers if needed. + When the type is "unsupervised", split data into X and Y is not relevant (as there is no train/test sets). + """ + if self.type == "supervised": + if self.outliers: + self.remove_outliers() + self.init_X() + self.init_Y() + self.train_test() + + def init_X(self): + """ + Initialize self.X by getting indicators in dataset. + """ + assert self.data is not None + if self.selected_indicators is not None: # select given indicators + self.X = self.data.loc[:, self.selected_indicators] + if self.indicators_to_remove: # remove given indicators + for indicator in self.indicators_to_remove: + if indicator in self.selected_indicators and indicator in self.X.columns: + self.selected_indicators.remove(indicator) + self.X = self.X.drop([indicator], axis=1) + else: + self.X = self.data.loc[:, self.indicators] + if self.indicators_to_remove: # remove given indicators + for indicator in self.indicators_to_remove: + if indicator in self.indicators and indicator in self.X.columns: + self.indicators.remove(indicator) + self.X = self.X.drop([indicator], axis=1) + + def init_Y(self): + """ + Initialize self.Y by getting EV in dataset. + """ + assert self.data is not None + self.Y = self.data[self.env].values + + def train_test(self): + """ + Create X_train, Y_train, X_test, Y_test with train_test_split method + """ + if len(self.X) <= 0: self.init_X() + if len(self.Y) <= 0: self.init_Y() + self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(self.X, self.Y, train_size=self.train_size, test_size=self.test_size, random_state=RANDOM_STATE) # , stratify=self.Y) + + def remove_outliers(self): + """ + Detect and remove IRIS that are outliers from dataset. + """ + isolation_forest = IsolationForest(random_state=RANDOM_STATE, n_estimators=100) # IsolationForest is used to detect outliers + isolation_forest.fit(self.X_train) + predictions = isolation_forest.predict(self.X_test) + for i in range(len(predictions)): + if predictions[i] == -1: + code = self.data.loc[i, "CODE"] + log.debug(code, "has been removed since it is an outlier.") + self.data = self.data.drop(i, axis=0) # delete IRIS that are detected as outliers + + def remove_rural_iris(self): + """ + Remove from dataset IRIS that are assessed as rural (because they bias the prediction). + """ + self.data = self.data[self.data.morphological_position != "Rural"] + + def get_environment_variable(self): + """ + Get values for a given EV for each assessed IRIS and store it in a CSV file. + """ + assert self.env in ENVIRONMENT_VARIABLES + data_env = pd.DataFrame(self.data[['CODE', self.env]]) + data_env.to_csv(FILE_ENV) + + def get_all_environmental_variable(self): + """ + Get EV for each assessed IRIS and store it in a CSV file. + """ + columns = ['CODE'] + columns.extend(ENVIRONMENT_VARIABLES) + data_env = pd.DataFrame(self.data[columns]) + data_env.to_csv(FILE_ENV)</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-classes">Classes</h2> +<dl> +<dt id="Dataset.Dataset"><code class="flex name class"> +<span>class <span class="ident">Dataset</span></span> +<span>(</span><span>data, env, _type, selected_indicators=None, indicators_to_remove=None, train_size=0.8, test_size=0.2, outliers=False)</span> +</code></dt> +<dd> +<div class="desc"><p>This class represents assessed IRIS with their indicators ans EV values. There are options, such as removing outliers or rural IRIS.</p> +<p>Constructor of the Dataset class. Initialize attributes.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>data</code></strong></dt> +<dd>an instance of Data class. Don"t forget to initialize data after created it with "init_all_in_one()" method</dd> +<dt><strong><code>env</code></strong></dt> +<dd>a string representing the EV, i.e. a value in ["building_type", "building_usage", "landscape", "morphological_position", "geographical_position", "social_class"]</dd> +<dt><strong><code>selected_indicators</code></strong></dt> +<dd>a list containing the indicators to keep in the dataset</dd> +<dt><strong><code>indicators_to_remove</code></strong></dt> +<dd>a list containing the indicators to remove in the dataset</dd> +<dt><strong><code>train_size</code></strong></dt> +<dd>a integer or a float corresponding to the size of the dataset used for training</dd> +<dt><strong><code>test_size</code></strong></dt> +<dd>a integer or a float corresponding to the size of the dataset used for test</dd> +<dt><strong><code>outliers</code></strong></dt> +<dd>True or False to remove outliers from dataset (detected with IsolationForest algorithm)</dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">class Dataset: + """ + This class represents assessed IRIS with their indicators ans EV values. There are options, such as removing outliers or rural IRIS. + """ + def __init__(self, data, env, _type, selected_indicators=None, indicators_to_remove=None, train_size=TRAIN_SIZE, test_size=TEST_SIZE, outliers=False): + """ + Constructor of the Dataset class. Initialize attributes. + + Args: + data: an instance of Data class. Don"t forget to initialize data after created it with "init_all_in_one()" method + env: a string representing the EV, i.e. a value in ["building_type", "building_usage", "landscape", "morphological_position", "geographical_position", "social_class"] + selected_indicators: a list containing the indicators to keep in the dataset + indicators_to_remove: a list containing the indicators to remove in the dataset + train_size: a integer or a float corresponding to the size of the dataset used for training + test_size: a integer or a float corresponding to the size of the dataset used for test + outliers: True or False to remove outliers from dataset (detected with IsolationForest algorithm) + """ + self.data = data.data[:] # data must be a Data object + self.indicators = data.indicators[:] + self.filtering = data.filtering + self.normalization = data.normalization + self.selected_indicators = selected_indicators[:] if selected_indicators is not None else None + self.indicators_to_remove = indicators_to_remove[:] if indicators_to_remove is not None else None + self.type = _type + self.X = None + self.Y = None + self.X_train = None + self.Y_train = None + self.X_test = None + self.Y_test = None + if env in ENVIRONMENT_VARIABLES: self.env = env + else: self.env = "building_type" + self.train_size, self.test_size = check_dataset_size(train_size, test_size) + self.outliers = outliers + + def init_all_in_one(self): + """ + Initialize the dataset by initializing X and Y ; generating X_train, Y_train, X_test, Y_test ; removing outliers if needed. + When the type is "unsupervised", split data into X and Y is not relevant (as there is no train/test sets). + """ + if self.type == "supervised": + if self.outliers: + self.remove_outliers() + self.init_X() + self.init_Y() + self.train_test() + + def init_X(self): + """ + Initialize self.X by getting indicators in dataset. + """ + assert self.data is not None + if self.selected_indicators is not None: # select given indicators + self.X = self.data.loc[:, self.selected_indicators] + if self.indicators_to_remove: # remove given indicators + for indicator in self.indicators_to_remove: + if indicator in self.selected_indicators and indicator in self.X.columns: + self.selected_indicators.remove(indicator) + self.X = self.X.drop([indicator], axis=1) + else: + self.X = self.data.loc[:, self.indicators] + if self.indicators_to_remove: # remove given indicators + for indicator in self.indicators_to_remove: + if indicator in self.indicators and indicator in self.X.columns: + self.indicators.remove(indicator) + self.X = self.X.drop([indicator], axis=1) + + def init_Y(self): + """ + Initialize self.Y by getting EV in dataset. + """ + assert self.data is not None + self.Y = self.data[self.env].values + + def train_test(self): + """ + Create X_train, Y_train, X_test, Y_test with train_test_split method + """ + if len(self.X) <= 0: self.init_X() + if len(self.Y) <= 0: self.init_Y() + self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(self.X, self.Y, train_size=self.train_size, test_size=self.test_size, random_state=RANDOM_STATE) # , stratify=self.Y) + + def remove_outliers(self): + """ + Detect and remove IRIS that are outliers from dataset. + """ + isolation_forest = IsolationForest(random_state=RANDOM_STATE, n_estimators=100) # IsolationForest is used to detect outliers + isolation_forest.fit(self.X_train) + predictions = isolation_forest.predict(self.X_test) + for i in range(len(predictions)): + if predictions[i] == -1: + code = self.data.loc[i, "CODE"] + log.debug(code, "has been removed since it is an outlier.") + self.data = self.data.drop(i, axis=0) # delete IRIS that are detected as outliers + + def remove_rural_iris(self): + """ + Remove from dataset IRIS that are assessed as rural (because they bias the prediction). + """ + self.data = self.data[self.data.morphological_position != "Rural"] + + def get_environment_variable(self): + """ + Get values for a given EV for each assessed IRIS and store it in a CSV file. + """ + assert self.env in ENVIRONMENT_VARIABLES + data_env = pd.DataFrame(self.data[['CODE', self.env]]) + data_env.to_csv(FILE_ENV) + + def get_all_environmental_variable(self): + """ + Get EV for each assessed IRIS and store it in a CSV file. + """ + columns = ['CODE'] + columns.extend(ENVIRONMENT_VARIABLES) + data_env = pd.DataFrame(self.data[columns]) + data_env.to_csv(FILE_ENV)</code></pre> +</details> +<h3>Methods</h3> +<dl> +<dt id="Dataset.Dataset.get_all_environmental_variable"><code class="name flex"> +<span>def <span class="ident">get_all_environmental_variable</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Get EV for each assessed IRIS and store it in a CSV file.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_all_environmental_variable(self): + """ + Get EV for each assessed IRIS and store it in a CSV file. + """ + columns = ['CODE'] + columns.extend(ENVIRONMENT_VARIABLES) + data_env = pd.DataFrame(self.data[columns]) + data_env.to_csv(FILE_ENV)</code></pre> +</details> +</dd> +<dt id="Dataset.Dataset.get_environment_variable"><code class="name flex"> +<span>def <span class="ident">get_environment_variable</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Get values for a given EV for each assessed IRIS and store it in a CSV file.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_environment_variable(self): + """ + Get values for a given EV for each assessed IRIS and store it in a CSV file. + """ + assert self.env in ENVIRONMENT_VARIABLES + data_env = pd.DataFrame(self.data[['CODE', self.env]]) + data_env.to_csv(FILE_ENV)</code></pre> +</details> +</dd> +<dt id="Dataset.Dataset.init_X"><code class="name flex"> +<span>def <span class="ident">init_X</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Initialize self.X by getting indicators in dataset.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def init_X(self): + """ + Initialize self.X by getting indicators in dataset. + """ + assert self.data is not None + if self.selected_indicators is not None: # select given indicators + self.X = self.data.loc[:, self.selected_indicators] + if self.indicators_to_remove: # remove given indicators + for indicator in self.indicators_to_remove: + if indicator in self.selected_indicators and indicator in self.X.columns: + self.selected_indicators.remove(indicator) + self.X = self.X.drop([indicator], axis=1) + else: + self.X = self.data.loc[:, self.indicators] + if self.indicators_to_remove: # remove given indicators + for indicator in self.indicators_to_remove: + if indicator in self.indicators and indicator in self.X.columns: + self.indicators.remove(indicator) + self.X = self.X.drop([indicator], axis=1)</code></pre> +</details> +</dd> +<dt id="Dataset.Dataset.init_Y"><code class="name flex"> +<span>def <span class="ident">init_Y</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Initialize self.Y by getting EV in dataset.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def init_Y(self): + """ + Initialize self.Y by getting EV in dataset. + """ + assert self.data is not None + self.Y = self.data[self.env].values</code></pre> +</details> +</dd> +<dt id="Dataset.Dataset.init_all_in_one"><code class="name flex"> +<span>def <span class="ident">init_all_in_one</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Initialize the dataset by initializing X and Y ; generating X_train, Y_train, X_test, Y_test ; removing outliers if needed. +When the type is "unsupervised", split data into X and Y is not relevant (as there is no train/test sets).</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def init_all_in_one(self): + """ + Initialize the dataset by initializing X and Y ; generating X_train, Y_train, X_test, Y_test ; removing outliers if needed. + When the type is "unsupervised", split data into X and Y is not relevant (as there is no train/test sets). + """ + if self.type == "supervised": + if self.outliers: + self.remove_outliers() + self.init_X() + self.init_Y() + self.train_test()</code></pre> +</details> +</dd> +<dt id="Dataset.Dataset.remove_outliers"><code class="name flex"> +<span>def <span class="ident">remove_outliers</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Detect and remove IRIS that are outliers from dataset.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def remove_outliers(self): + """ + Detect and remove IRIS that are outliers from dataset. + """ + isolation_forest = IsolationForest(random_state=RANDOM_STATE, n_estimators=100) # IsolationForest is used to detect outliers + isolation_forest.fit(self.X_train) + predictions = isolation_forest.predict(self.X_test) + for i in range(len(predictions)): + if predictions[i] == -1: + code = self.data.loc[i, "CODE"] + log.debug(code, "has been removed since it is an outlier.") + self.data = self.data.drop(i, axis=0) # delete IRIS that are detected as outliers</code></pre> +</details> +</dd> +<dt id="Dataset.Dataset.remove_rural_iris"><code class="name flex"> +<span>def <span class="ident">remove_rural_iris</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Remove from dataset IRIS that are assessed as rural (because they bias the prediction).</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def remove_rural_iris(self): + """ + Remove from dataset IRIS that are assessed as rural (because they bias the prediction). + """ + self.data = self.data[self.data.morphological_position != "Rural"]</code></pre> +</details> +</dd> +<dt id="Dataset.Dataset.train_test"><code class="name flex"> +<span>def <span class="ident">train_test</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Create X_train, Y_train, X_test, Y_test with train_test_split method</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def train_test(self): + """ + Create X_train, Y_train, X_test, Y_test with train_test_split method + """ + if len(self.X) <= 0: self.init_X() + if len(self.Y) <= 0: self.init_Y() + self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(self.X, self.Y, train_size=self.train_size, test_size=self.test_size, random_state=RANDOM_STATE) # , stratify=self.Y)</code></pre> +</details> +</dd> +</dl> +</dd> +</dl> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-classes">Classes</a></h3> +<ul> +<li> +<h4><code><a title="Dataset.Dataset" href="#Dataset.Dataset">Dataset</a></code></h4> +<ul class=""> +<li><code><a title="Dataset.Dataset.get_all_environmental_variable" href="#Dataset.Dataset.get_all_environmental_variable">get_all_environmental_variable</a></code></li> +<li><code><a title="Dataset.Dataset.get_environment_variable" href="#Dataset.Dataset.get_environment_variable">get_environment_variable</a></code></li> +<li><code><a title="Dataset.Dataset.init_X" href="#Dataset.Dataset.init_X">init_X</a></code></li> +<li><code><a title="Dataset.Dataset.init_Y" href="#Dataset.Dataset.init_Y">init_Y</a></code></li> +<li><code><a title="Dataset.Dataset.init_all_in_one" href="#Dataset.Dataset.init_all_in_one">init_all_in_one</a></code></li> +<li><code><a title="Dataset.Dataset.remove_outliers" href="#Dataset.Dataset.remove_outliers">remove_outliers</a></code></li> +<li><code><a title="Dataset.Dataset.remove_rural_iris" href="#Dataset.Dataset.remove_rural_iris">remove_rural_iris</a></code></li> +<li><code><a title="Dataset.Dataset.train_test" href="#Dataset.Dataset.train_test">train_test</a></code></li> +</ul> +</li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/Method.html b/doc/Method.html new file mode 100644 index 00000000..bf7918a9 --- /dev/null +++ b/doc/Method.html @@ -0,0 +1,178 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>Method API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>Method</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">from predihood.config import RANDOM_STATE + + +class Method: + """ + This class represents the general concept of a Method that is applied on data. There are two specific concepts of Method: MethodSelection and MethodPrediction. + """ + def __init__(self, name, dataset=None, classifier=None): + """ + Constructor of the Method class. Initialize attributes. + Args: + name: a string that represents the name of the method, e.g. "feature selection" or "correlation matrix" + dataset: a Dataset object on which the method will be applied + classifier: an object that can be used (e.g. fit) on data + """ + self.name = name + self.dataset = dataset + self.classifier = classifier + self.parameters = None # same as return + + def fit(self): + """ + Fit the classifier on dataset. + """ + self.classifier.random_state = RANDOM_STATE # defined random_state to have reproducibility + if self.dataset.type == "supervised": + self.classifier.fit(X=self.dataset.X_train, y=self.dataset.Y_train) + else: + # unsupervised learning does not need split between X and Y + if self.dataset.selected_indicators is not None: + self.classifier.fit(self.dataset.data[self.dataset.selected_indicators]) + else: + self.classifier.fit(self.dataset.data[self.dataset.indicators])</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-classes">Classes</h2> +<dl> +<dt id="Method.Method"><code class="flex name class"> +<span>class <span class="ident">Method</span></span> +<span>(</span><span>name, dataset=None, classifier=None)</span> +</code></dt> +<dd> +<div class="desc"><p>This class represents the general concept of a Method that is applied on data. There are two specific concepts of Method: MethodSelection and MethodPrediction.</p> +<p>Constructor of the Method class. Initialize attributes.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>name</code></strong></dt> +<dd>a string that represents the name of the method, e.g. "feature selection" or "correlation matrix"</dd> +<dt><strong><code>dataset</code></strong></dt> +<dd>a Dataset object on which the method will be applied</dd> +<dt><strong><code>classifier</code></strong></dt> +<dd>an object that can be used (e.g. fit) on data</dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">class Method: + """ + This class represents the general concept of a Method that is applied on data. There are two specific concepts of Method: MethodSelection and MethodPrediction. + """ + def __init__(self, name, dataset=None, classifier=None): + """ + Constructor of the Method class. Initialize attributes. + Args: + name: a string that represents the name of the method, e.g. "feature selection" or "correlation matrix" + dataset: a Dataset object on which the method will be applied + classifier: an object that can be used (e.g. fit) on data + """ + self.name = name + self.dataset = dataset + self.classifier = classifier + self.parameters = None # same as return + + def fit(self): + """ + Fit the classifier on dataset. + """ + self.classifier.random_state = RANDOM_STATE # defined random_state to have reproducibility + if self.dataset.type == "supervised": + self.classifier.fit(X=self.dataset.X_train, y=self.dataset.Y_train) + else: + # unsupervised learning does not need split between X and Y + if self.dataset.selected_indicators is not None: + self.classifier.fit(self.dataset.data[self.dataset.selected_indicators]) + else: + self.classifier.fit(self.dataset.data[self.dataset.indicators])</code></pre> +</details> +<h3>Methods</h3> +<dl> +<dt id="Method.Method.fit"><code class="name flex"> +<span>def <span class="ident">fit</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Fit the classifier on dataset.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def fit(self): + """ + Fit the classifier on dataset. + """ + self.classifier.random_state = RANDOM_STATE # defined random_state to have reproducibility + if self.dataset.type == "supervised": + self.classifier.fit(X=self.dataset.X_train, y=self.dataset.Y_train) + else: + # unsupervised learning does not need split between X and Y + if self.dataset.selected_indicators is not None: + self.classifier.fit(self.dataset.data[self.dataset.selected_indicators]) + else: + self.classifier.fit(self.dataset.data[self.dataset.indicators])</code></pre> +</details> +</dd> +</dl> +</dd> +</dl> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-classes">Classes</a></h3> +<ul> +<li> +<h4><code><a title="Method.Method" href="#Method.Method">Method</a></code></h4> +<ul class=""> +<li><code><a title="Method.Method.fit" href="#Method.Method.fit">fit</a></code></li> +</ul> +</li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/MethodCleaning.html b/doc/MethodCleaning.html new file mode 100644 index 00000000..c46676df --- /dev/null +++ b/doc/MethodCleaning.html @@ -0,0 +1,705 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>MethodCleaning API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>MethodCleaning</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import logging +import os +import warnings +import logging +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from predihood.classes.Method import Method +from predihood.config import ENVIRONMENT_VARIABLES, FILE_CLEANED_DATA, FOLDER_DISTRIBUTION, OLD_PREFIX, NEW_PREFIX +from predihood.utility_functions import similarity, auto_label + +log = logging.getLogger(__name__) +warnings.simplefilter(action='ignore', category=FutureWarning) + + +class MethodCleaning(Method): + """ + This class represents the method for cleaning data given by the French company. + """ + def __init__(self, name, dataset): + """ + Constructor of the MethodCleansing class. Initialize attributes. + """ + Method.__init__(self, name, dataset) + self.values_by_env = {} # dictionary to store values for each EV, e.g. [House, Houses] + self.columns_dep = [] # departure columns, e.g. old_building_type, old_building_usage,... + self.columns_arr = [] # arrival columns, e.g. new_building_type, new_building_usage, ... + self.columns = { + "occupation": { + "name_dep": "old_occupation", + "data_dep": self.dataset.old_occupation, + "name_arr": "new_occupation", + "data_arr": self.dataset.new_occupation + } + } + + for env in ENVIRONMENT_VARIABLES: + temp = { + "name_dep": OLD_PREFIX + env, + "name_arr": NEW_PREFIX + env, + "data_dep": self.dataset[OLD_PREFIX + str(env)], + "data_arr": self.dataset[NEW_PREFIX + str(env)] + } + self.columns[env] = temp + + for env in self.columns: self.columns_dep.append(self.columns[env]["data_dep"]) + for env in self.columns: self.columns_arr.append(self.columns[env]["data_arr"]) + + # define outliers (to be removed) + self.outliers = ['Oui', 'Moyenne-sup', 'Location'] + + # plot variables + self.before = {} # departure values for each EV + self.after = {} # arrival values for each EV + self.labels = {} # labels for EV, e.g. urban, green areas, forest and country-side for the landscape variable + + def clean(self): + """ + Clean data from bad naming conventions. The idea of this function is to create a dictionary with all spellings for each value of each EV, e.g. [["Houses", "House"], ["Upper middle", "upper middle", "upper midddle"], ["Green areas"], ["Countryside"]]. + This dictionary is constructed by computing similarities between each values and store each spelling and finally let the user choose the best one. + """ + # + log.info("The data needs to be cleaned. For each list, write the correct word. For each EV, you will get its number of corrections and its error rate.") + # 1. getting wrong values in a dictionary ordered by env variable + self.values_by_env = {} + for col_dep, col_arr in zip(self.columns_dep, self.columns_arr): + col_name = col_dep.name[len(OLD_PREFIX):] + self.values_by_env[col_name] = [] + for val in col_dep.unique(): # get possible values for the current column + index = similarity(val, self.values_by_env[col_name]) + # if the value is similar to another, add it, else create an new array with it + if index >= 0: + self.values_by_env[col_name][index].append(val) + elif index == -1: + self.values_by_env[col_name].append([val]) + for val in col_arr.unique(): + index = similarity(val, self.values_by_env[col_name]) + if index >= 0: + self.values_by_env[col_name][index].append(val) + elif index == -1: + self.values_by_env[col_name].append([val]) + + # 2. renaming these wrong values in data + for key, value in self.values_by_env.items(): + col_name_old = OLD_PREFIX + key + col_name_new = NEW_PREFIX + key + nb_replacement_dep = 0 + nb_replacement_arr = 0 + for i in range(len(value)): + if len(value[i]) > 1: + arr_without_duplicates = list(dict.fromkeys(value[i])) + chosen_label = input(str(arr_without_duplicates) + ": ") + for label in value[i]: + if label != chosen_label: # if label == chosen_label: skip it because no replacement is needed + nb_replacement_dep += pd.Series(self.dataset[col_name_old] == label).sum() + nb_replacement_arr += pd.Series(self.dataset[col_name_new] == label).sum() + self.dataset.loc[self.dataset[col_name_old] == label, col_name_old] = chosen_label + self.dataset.loc[self.dataset[col_name_new] == label, col_name_new] = chosen_label + size = int(self.dataset.count()[OLD_PREFIX + key]) + int(self.dataset.count()[NEW_PREFIX + key]) + mean_error = ((nb_replacement_dep + nb_replacement_arr) / size) * 100 + log.debug("%d IRIS have been corrected for the EV %s, corresponding to an error rate of %.0f %%", (nb_replacement_dep + nb_replacement_arr), key, mean_error) + + # 3. removing outliers from data + count = 0 + for outlier in self.outliers: + self.dataset.drop(self.dataset[self.dataset.eq(outlier).any(1)].index, inplace=True) + count += 1 + log.debug("%d incorrect values removed", count) + + # 4. save data + self.dataset.to_csv(FILE_CLEANED_DATA, index=False, encoding='utf-8') + log.info("Cleaned data is in %s", FILE_CLEANED_DATA) + + def create_before_after_labels(self, name_dep, name_arr): + """ + Creates the lists 'before', 'after' and 'labels' from data. + + Args: + name_dep: a string containing the name of the departure column, e.g. old_building_type, old_building_usage... + name_arr: a string containing the name of the arrival column, e.g. new_building_type, new_building_usage... + """ + all_repartition = {} + self.before = {} + self.after = {} + + for status, value in self.dataset[name_dep].value_counts().items(): + if name_dep == OLD_PREFIX+"geographical_position": # if geo, get only the geo position (South, East, ..) and not the city + status = status.split(" ")[0] + if status in self.before: + self.before[status] += value + else: + self.before[status] = value + else: + self.before[status] = value # self.dataset[values_before].value_counts()[status] + + for status, value in self.dataset[name_arr].value_counts().items(): + if name_arr == NEW_PREFIX+"geographical_position": # if geo, get only the geo position (South, East, ..) and not the city + status = status.split(" ")[0] + if status in self.after: + self.after[status] += value + else: + self.after[status] = value + else: + self.after[status] = value # self.dataset[values_after].value_counts()[status] + + # 2. merge before and after data in the same dictionary + for status in self.before: + all_repartition[status] = [self.before[status], 0] + for status in self.after: + if status not in all_repartition: + all_repartition[status] = [0, self.after[status]] + else: + all_repartition[status][1] = self.after[status] + + # 3. convert dictionary in 3 arrays + self.before = [] + self.after = [] + self.labels = [] + for key in all_repartition: + if not isinstance(key, float): # to remove nan values + self.before.append(all_repartition[key][0]) + self.after.append(all_repartition[key][1]) + self.labels.append(key) + + def create_bar_chart(self, name, title): + """ + Plot before/after charts. + + Args: + name: a string containing the name of the EV to plot, e.g. building_type, building_usage, landscape, ... + title: a string containing the title of the plot + """ + x = np.arange(len(self.labels)) # the label locations + width = 0.35 + + fig, ax = plt.subplots() + + ax.bar(x - width / 2, [154 for _ in range(len(self.labels))], width=width, color="#DCDCDC") # grey bar + bef = ax.bar(x - width / 2, self.before, width=width, label='Avant') # before data + ax.bar(x + width / 2, [154 for _ in range(len(self.labels))], width=width, color="#DCDCDC") # grey bar + aft = ax.bar(x + width / 2, self.after, width=width, label='Après') # after data + + ax.set_ylabel('Nombre de personnes') + plt.xticks(x, self.labels, rotation='vertical') + auto_label(bef, ax) + auto_label(aft, ax) + plt.tight_layout() + ax.legend() + filename = os.path.join(FOLDER_DISTRIBUTION, "distribution_" + name + ".png") + fig.savefig(filename) + ax.set_title(title) + plt.show() + + def to_chart(self, env, name, title): + """ + Create before/after data and plot it. + :param env: + :param name: + :param title: + Args: + env: a string containing the EV to plot, e.g. building_type, building_usage, landscape... + name: a string containing the name to save the file + title: a string containing the title of the plot + """ + self.create_before_after_labels(self.columns[env]["name_dep"], self.columns[env]["name_arr"]) + self.create_bar_chart(name, title)</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-classes">Classes</h2> +<dl> +<dt id="MethodCleaning.MethodCleaning"><code class="flex name class"> +<span>class <span class="ident">MethodCleaning</span></span> +<span>(</span><span>name, dataset)</span> +</code></dt> +<dd> +<div class="desc"><p>This class represents the method for cleaning data given by the French company.</p> +<p>Constructor of the MethodCleansing class. Initialize attributes.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">class MethodCleaning(Method): + """ + This class represents the method for cleaning data given by the French company. + """ + def __init__(self, name, dataset): + """ + Constructor of the MethodCleansing class. Initialize attributes. + """ + Method.__init__(self, name, dataset) + self.values_by_env = {} # dictionary to store values for each EV, e.g. [House, Houses] + self.columns_dep = [] # departure columns, e.g. old_building_type, old_building_usage,... + self.columns_arr = [] # arrival columns, e.g. new_building_type, new_building_usage, ... + self.columns = { + "occupation": { + "name_dep": "old_occupation", + "data_dep": self.dataset.old_occupation, + "name_arr": "new_occupation", + "data_arr": self.dataset.new_occupation + } + } + + for env in ENVIRONMENT_VARIABLES: + temp = { + "name_dep": OLD_PREFIX + env, + "name_arr": NEW_PREFIX + env, + "data_dep": self.dataset[OLD_PREFIX + str(env)], + "data_arr": self.dataset[NEW_PREFIX + str(env)] + } + self.columns[env] = temp + + for env in self.columns: self.columns_dep.append(self.columns[env]["data_dep"]) + for env in self.columns: self.columns_arr.append(self.columns[env]["data_arr"]) + + # define outliers (to be removed) + self.outliers = ['Oui', 'Moyenne-sup', 'Location'] + + # plot variables + self.before = {} # departure values for each EV + self.after = {} # arrival values for each EV + self.labels = {} # labels for EV, e.g. urban, green areas, forest and country-side for the landscape variable + + def clean(self): + """ + Clean data from bad naming conventions. The idea of this function is to create a dictionary with all spellings for each value of each EV, e.g. [["Houses", "House"], ["Upper middle", "upper middle", "upper midddle"], ["Green areas"], ["Countryside"]]. + This dictionary is constructed by computing similarities between each values and store each spelling and finally let the user choose the best one. + """ + # + log.info("The data needs to be cleaned. For each list, write the correct word. For each EV, you will get its number of corrections and its error rate.") + # 1. getting wrong values in a dictionary ordered by env variable + self.values_by_env = {} + for col_dep, col_arr in zip(self.columns_dep, self.columns_arr): + col_name = col_dep.name[len(OLD_PREFIX):] + self.values_by_env[col_name] = [] + for val in col_dep.unique(): # get possible values for the current column + index = similarity(val, self.values_by_env[col_name]) + # if the value is similar to another, add it, else create an new array with it + if index >= 0: + self.values_by_env[col_name][index].append(val) + elif index == -1: + self.values_by_env[col_name].append([val]) + for val in col_arr.unique(): + index = similarity(val, self.values_by_env[col_name]) + if index >= 0: + self.values_by_env[col_name][index].append(val) + elif index == -1: + self.values_by_env[col_name].append([val]) + + # 2. renaming these wrong values in data + for key, value in self.values_by_env.items(): + col_name_old = OLD_PREFIX + key + col_name_new = NEW_PREFIX + key + nb_replacement_dep = 0 + nb_replacement_arr = 0 + for i in range(len(value)): + if len(value[i]) > 1: + arr_without_duplicates = list(dict.fromkeys(value[i])) + chosen_label = input(str(arr_without_duplicates) + ": ") + for label in value[i]: + if label != chosen_label: # if label == chosen_label: skip it because no replacement is needed + nb_replacement_dep += pd.Series(self.dataset[col_name_old] == label).sum() + nb_replacement_arr += pd.Series(self.dataset[col_name_new] == label).sum() + self.dataset.loc[self.dataset[col_name_old] == label, col_name_old] = chosen_label + self.dataset.loc[self.dataset[col_name_new] == label, col_name_new] = chosen_label + size = int(self.dataset.count()[OLD_PREFIX + key]) + int(self.dataset.count()[NEW_PREFIX + key]) + mean_error = ((nb_replacement_dep + nb_replacement_arr) / size) * 100 + log.debug("%d IRIS have been corrected for the EV %s, corresponding to an error rate of %.0f %%", (nb_replacement_dep + nb_replacement_arr), key, mean_error) + + # 3. removing outliers from data + count = 0 + for outlier in self.outliers: + self.dataset.drop(self.dataset[self.dataset.eq(outlier).any(1)].index, inplace=True) + count += 1 + log.debug("%d incorrect values removed", count) + + # 4. save data + self.dataset.to_csv(FILE_CLEANED_DATA, index=False, encoding='utf-8') + log.info("Cleaned data is in %s", FILE_CLEANED_DATA) + + def create_before_after_labels(self, name_dep, name_arr): + """ + Creates the lists 'before', 'after' and 'labels' from data. + + Args: + name_dep: a string containing the name of the departure column, e.g. old_building_type, old_building_usage... + name_arr: a string containing the name of the arrival column, e.g. new_building_type, new_building_usage... + """ + all_repartition = {} + self.before = {} + self.after = {} + + for status, value in self.dataset[name_dep].value_counts().items(): + if name_dep == OLD_PREFIX+"geographical_position": # if geo, get only the geo position (South, East, ..) and not the city + status = status.split(" ")[0] + if status in self.before: + self.before[status] += value + else: + self.before[status] = value + else: + self.before[status] = value # self.dataset[values_before].value_counts()[status] + + for status, value in self.dataset[name_arr].value_counts().items(): + if name_arr == NEW_PREFIX+"geographical_position": # if geo, get only the geo position (South, East, ..) and not the city + status = status.split(" ")[0] + if status in self.after: + self.after[status] += value + else: + self.after[status] = value + else: + self.after[status] = value # self.dataset[values_after].value_counts()[status] + + # 2. merge before and after data in the same dictionary + for status in self.before: + all_repartition[status] = [self.before[status], 0] + for status in self.after: + if status not in all_repartition: + all_repartition[status] = [0, self.after[status]] + else: + all_repartition[status][1] = self.after[status] + + # 3. convert dictionary in 3 arrays + self.before = [] + self.after = [] + self.labels = [] + for key in all_repartition: + if not isinstance(key, float): # to remove nan values + self.before.append(all_repartition[key][0]) + self.after.append(all_repartition[key][1]) + self.labels.append(key) + + def create_bar_chart(self, name, title): + """ + Plot before/after charts. + + Args: + name: a string containing the name of the EV to plot, e.g. building_type, building_usage, landscape, ... + title: a string containing the title of the plot + """ + x = np.arange(len(self.labels)) # the label locations + width = 0.35 + + fig, ax = plt.subplots() + + ax.bar(x - width / 2, [154 for _ in range(len(self.labels))], width=width, color="#DCDCDC") # grey bar + bef = ax.bar(x - width / 2, self.before, width=width, label='Avant') # before data + ax.bar(x + width / 2, [154 for _ in range(len(self.labels))], width=width, color="#DCDCDC") # grey bar + aft = ax.bar(x + width / 2, self.after, width=width, label='Après') # after data + + ax.set_ylabel('Nombre de personnes') + plt.xticks(x, self.labels, rotation='vertical') + auto_label(bef, ax) + auto_label(aft, ax) + plt.tight_layout() + ax.legend() + filename = os.path.join(FOLDER_DISTRIBUTION, "distribution_" + name + ".png") + fig.savefig(filename) + ax.set_title(title) + plt.show() + + def to_chart(self, env, name, title): + """ + Create before/after data and plot it. + :param env: + :param name: + :param title: + Args: + env: a string containing the EV to plot, e.g. building_type, building_usage, landscape... + name: a string containing the name to save the file + title: a string containing the title of the plot + """ + self.create_before_after_labels(self.columns[env]["name_dep"], self.columns[env]["name_arr"]) + self.create_bar_chart(name, title)</code></pre> +</details> +<h3>Ancestors</h3> +<ul class="hlist"> +<li>predihood.classes.Method.Method</li> +</ul> +<h3>Methods</h3> +<dl> +<dt id="MethodCleaning.MethodCleaning.clean"><code class="name flex"> +<span>def <span class="ident">clean</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Clean data from bad naming conventions. The idea of this function is to create a dictionary with all spellings for each value of each EV, e.g. [["Houses", "House"], ["Upper middle", "upper middle", "upper midddle"], ["Green areas"], ["Countryside"]]. +This dictionary is constructed by computing similarities between each values and store each spelling and finally let the user choose the best one.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def clean(self): + """ + Clean data from bad naming conventions. The idea of this function is to create a dictionary with all spellings for each value of each EV, e.g. [["Houses", "House"], ["Upper middle", "upper middle", "upper midddle"], ["Green areas"], ["Countryside"]]. + This dictionary is constructed by computing similarities between each values and store each spelling and finally let the user choose the best one. + """ + # + log.info("The data needs to be cleaned. For each list, write the correct word. For each EV, you will get its number of corrections and its error rate.") + # 1. getting wrong values in a dictionary ordered by env variable + self.values_by_env = {} + for col_dep, col_arr in zip(self.columns_dep, self.columns_arr): + col_name = col_dep.name[len(OLD_PREFIX):] + self.values_by_env[col_name] = [] + for val in col_dep.unique(): # get possible values for the current column + index = similarity(val, self.values_by_env[col_name]) + # if the value is similar to another, add it, else create an new array with it + if index >= 0: + self.values_by_env[col_name][index].append(val) + elif index == -1: + self.values_by_env[col_name].append([val]) + for val in col_arr.unique(): + index = similarity(val, self.values_by_env[col_name]) + if index >= 0: + self.values_by_env[col_name][index].append(val) + elif index == -1: + self.values_by_env[col_name].append([val]) + + # 2. renaming these wrong values in data + for key, value in self.values_by_env.items(): + col_name_old = OLD_PREFIX + key + col_name_new = NEW_PREFIX + key + nb_replacement_dep = 0 + nb_replacement_arr = 0 + for i in range(len(value)): + if len(value[i]) > 1: + arr_without_duplicates = list(dict.fromkeys(value[i])) + chosen_label = input(str(arr_without_duplicates) + ": ") + for label in value[i]: + if label != chosen_label: # if label == chosen_label: skip it because no replacement is needed + nb_replacement_dep += pd.Series(self.dataset[col_name_old] == label).sum() + nb_replacement_arr += pd.Series(self.dataset[col_name_new] == label).sum() + self.dataset.loc[self.dataset[col_name_old] == label, col_name_old] = chosen_label + self.dataset.loc[self.dataset[col_name_new] == label, col_name_new] = chosen_label + size = int(self.dataset.count()[OLD_PREFIX + key]) + int(self.dataset.count()[NEW_PREFIX + key]) + mean_error = ((nb_replacement_dep + nb_replacement_arr) / size) * 100 + log.debug("%d IRIS have been corrected for the EV %s, corresponding to an error rate of %.0f %%", (nb_replacement_dep + nb_replacement_arr), key, mean_error) + + # 3. removing outliers from data + count = 0 + for outlier in self.outliers: + self.dataset.drop(self.dataset[self.dataset.eq(outlier).any(1)].index, inplace=True) + count += 1 + log.debug("%d incorrect values removed", count) + + # 4. save data + self.dataset.to_csv(FILE_CLEANED_DATA, index=False, encoding='utf-8') + log.info("Cleaned data is in %s", FILE_CLEANED_DATA)</code></pre> +</details> +</dd> +<dt id="MethodCleaning.MethodCleaning.create_bar_chart"><code class="name flex"> +<span>def <span class="ident">create_bar_chart</span></span>(<span>self, name, title)</span> +</code></dt> +<dd> +<div class="desc"><p>Plot before/after charts.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>name</code></strong></dt> +<dd>a string containing the name of the EV to plot, e.g. building_type, building_usage, landscape, …</dd> +<dt><strong><code>title</code></strong></dt> +<dd>a string containing the title of the plot</dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def create_bar_chart(self, name, title): + """ + Plot before/after charts. + + Args: + name: a string containing the name of the EV to plot, e.g. building_type, building_usage, landscape, ... + title: a string containing the title of the plot + """ + x = np.arange(len(self.labels)) # the label locations + width = 0.35 + + fig, ax = plt.subplots() + + ax.bar(x - width / 2, [154 for _ in range(len(self.labels))], width=width, color="#DCDCDC") # grey bar + bef = ax.bar(x - width / 2, self.before, width=width, label='Avant') # before data + ax.bar(x + width / 2, [154 for _ in range(len(self.labels))], width=width, color="#DCDCDC") # grey bar + aft = ax.bar(x + width / 2, self.after, width=width, label='Après') # after data + + ax.set_ylabel('Nombre de personnes') + plt.xticks(x, self.labels, rotation='vertical') + auto_label(bef, ax) + auto_label(aft, ax) + plt.tight_layout() + ax.legend() + filename = os.path.join(FOLDER_DISTRIBUTION, "distribution_" + name + ".png") + fig.savefig(filename) + ax.set_title(title) + plt.show()</code></pre> +</details> +</dd> +<dt id="MethodCleaning.MethodCleaning.create_before_after_labels"><code class="name flex"> +<span>def <span class="ident">create_before_after_labels</span></span>(<span>self, name_dep, name_arr)</span> +</code></dt> +<dd> +<div class="desc"><p>Creates the lists 'before', 'after' and 'labels' from data.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>name_dep</code></strong></dt> +<dd>a string containing the name of the departure column, e.g. old_building_type, old_building_usage…</dd> +<dt><strong><code>name_arr</code></strong></dt> +<dd>a string containing the name of the arrival column, e.g. new_building_type, new_building_usage…</dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def create_before_after_labels(self, name_dep, name_arr): + """ + Creates the lists 'before', 'after' and 'labels' from data. + + Args: + name_dep: a string containing the name of the departure column, e.g. old_building_type, old_building_usage... + name_arr: a string containing the name of the arrival column, e.g. new_building_type, new_building_usage... + """ + all_repartition = {} + self.before = {} + self.after = {} + + for status, value in self.dataset[name_dep].value_counts().items(): + if name_dep == OLD_PREFIX+"geographical_position": # if geo, get only the geo position (South, East, ..) and not the city + status = status.split(" ")[0] + if status in self.before: + self.before[status] += value + else: + self.before[status] = value + else: + self.before[status] = value # self.dataset[values_before].value_counts()[status] + + for status, value in self.dataset[name_arr].value_counts().items(): + if name_arr == NEW_PREFIX+"geographical_position": # if geo, get only the geo position (South, East, ..) and not the city + status = status.split(" ")[0] + if status in self.after: + self.after[status] += value + else: + self.after[status] = value + else: + self.after[status] = value # self.dataset[values_after].value_counts()[status] + + # 2. merge before and after data in the same dictionary + for status in self.before: + all_repartition[status] = [self.before[status], 0] + for status in self.after: + if status not in all_repartition: + all_repartition[status] = [0, self.after[status]] + else: + all_repartition[status][1] = self.after[status] + + # 3. convert dictionary in 3 arrays + self.before = [] + self.after = [] + self.labels = [] + for key in all_repartition: + if not isinstance(key, float): # to remove nan values + self.before.append(all_repartition[key][0]) + self.after.append(all_repartition[key][1]) + self.labels.append(key)</code></pre> +</details> +</dd> +<dt id="MethodCleaning.MethodCleaning.to_chart"><code class="name flex"> +<span>def <span class="ident">to_chart</span></span>(<span>self, env, name, title)</span> +</code></dt> +<dd> +<div class="desc"><p>Create before/after data and plot it. +:param env: +:param name: +:param title:</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>env</code></strong></dt> +<dd>a string containing the EV to plot, e.g. building_type, building_usage, landscape…</dd> +<dt><strong><code>name</code></strong></dt> +<dd>a string containing the name to save the file</dd> +<dt><strong><code>title</code></strong></dt> +<dd>a string containing the title of the plot</dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def to_chart(self, env, name, title): + """ + Create before/after data and plot it. + :param env: + :param name: + :param title: + Args: + env: a string containing the EV to plot, e.g. building_type, building_usage, landscape... + name: a string containing the name to save the file + title: a string containing the title of the plot + """ + self.create_before_after_labels(self.columns[env]["name_dep"], self.columns[env]["name_arr"]) + self.create_bar_chart(name, title)</code></pre> +</details> +</dd> +</dl> +</dd> +</dl> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-classes">Classes</a></h3> +<ul> +<li> +<h4><code><a title="MethodCleaning.MethodCleaning" href="#MethodCleaning.MethodCleaning">MethodCleaning</a></code></h4> +<ul class=""> +<li><code><a title="MethodCleaning.MethodCleaning.clean" href="#MethodCleaning.MethodCleaning.clean">clean</a></code></li> +<li><code><a title="MethodCleaning.MethodCleaning.create_bar_chart" href="#MethodCleaning.MethodCleaning.create_bar_chart">create_bar_chart</a></code></li> +<li><code><a title="MethodCleaning.MethodCleaning.create_before_after_labels" href="#MethodCleaning.MethodCleaning.create_before_after_labels">create_before_after_labels</a></code></li> +<li><code><a title="MethodCleaning.MethodCleaning.to_chart" href="#MethodCleaning.MethodCleaning.to_chart">to_chart</a></code></li> +</ul> +</li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/MethodPrediction.html b/doc/MethodPrediction.html new file mode 100644 index 00000000..eebcdaf1 --- /dev/null +++ b/doc/MethodPrediction.html @@ -0,0 +1,282 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>MethodPrediction API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>MethodPrediction</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import logging +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from area import area +from predihood import model +from predihood.classes.Method import Method +from sklearn.metrics import auc, roc_curve +from sklearn.model_selection import cross_val_score, GridSearchCV +from sklearn.tree import DecisionTreeClassifier + +log = logging.getLogger(__name__) + + +class MethodPrediction(Method): + """ + This class represents a method for predicting EV and compute performance at a national level. + """ + def __init__(self, name, dataset, classifier): + """ + Constructor of the MethodPrediction class. Initialize attributes. + """ + Method.__init__(self, name, dataset, classifier) + self.prediction = None + self.accuracy = 0 # in percentage + self.precision = 0 + self.recall = 0 + self.confusion_matrix = [0, 0, 0, 0] + + def compute_performance(self): + """ + Compute performance metrics, i.e. accuracy. + """ + scores = cross_val_score(self.classifier, self.dataset.X, self.dataset.Y, cv=5) + self.accuracy = scores.mean() * 100 + + def predict(self, iris_code=None): + """ + Predict one EV for the given iris. The EV to predict is stored in the dataset as "env" variable. + """ + iris_object = model.get_iris_from_code(iris_code) + iris_area = area(model.get_coords_from_code(iris_code)) / 1000000 + iris_population = iris_object["properties"]["raw_indicators"]["P14_POP"] + iris_indicators_values = [] + iris_indicators_names = [] + if self.dataset.normalization == "density": + density = iris_population/iris_area + for indicator in self.dataset.selected_indicators: + if indicator in iris_object["properties"]["raw_indicators"] and density > 0: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator]) / density) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + elif self.dataset.normalization == "population": + for indicator in self.dataset.selected_indicators: + # if indicator == "P14_POP": continue # skip this indicator because of normalisation # TODO + if indicator in iris_object["properties"]["raw_indicators"] and iris_population > 0: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator]) / iris_population) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + else: + for indicator in self.dataset.selected_indicators: + if indicator in iris_object["properties"]["raw_indicators"]: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator])) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + + df = pd.DataFrame([iris_indicators_values], columns=iris_indicators_names) + self.prediction = self.classifier.predict(df)[0] + log.debug("%s", self.prediction)</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-classes">Classes</h2> +<dl> +<dt id="MethodPrediction.MethodPrediction"><code class="flex name class"> +<span>class <span class="ident">MethodPrediction</span></span> +<span>(</span><span>name, dataset, classifier)</span> +</code></dt> +<dd> +<div class="desc"><p>This class represents a method for predicting EV and compute performance at a national level.</p> +<p>Constructor of the MethodPrediction class. Initialize attributes.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">class MethodPrediction(Method): + """ + This class represents a method for predicting EV and compute performance at a national level. + """ + def __init__(self, name, dataset, classifier): + """ + Constructor of the MethodPrediction class. Initialize attributes. + """ + Method.__init__(self, name, dataset, classifier) + self.prediction = None + self.accuracy = 0 # in percentage + self.precision = 0 + self.recall = 0 + self.confusion_matrix = [0, 0, 0, 0] + + def compute_performance(self): + """ + Compute performance metrics, i.e. accuracy. + """ + scores = cross_val_score(self.classifier, self.dataset.X, self.dataset.Y, cv=5) + self.accuracy = scores.mean() * 100 + + def predict(self, iris_code=None): + """ + Predict one EV for the given iris. The EV to predict is stored in the dataset as "env" variable. + """ + iris_object = model.get_iris_from_code(iris_code) + iris_area = area(model.get_coords_from_code(iris_code)) / 1000000 + iris_population = iris_object["properties"]["raw_indicators"]["P14_POP"] + iris_indicators_values = [] + iris_indicators_names = [] + if self.dataset.normalization == "density": + density = iris_population/iris_area + for indicator in self.dataset.selected_indicators: + if indicator in iris_object["properties"]["raw_indicators"] and density > 0: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator]) / density) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + elif self.dataset.normalization == "population": + for indicator in self.dataset.selected_indicators: + # if indicator == "P14_POP": continue # skip this indicator because of normalisation # TODO + if indicator in iris_object["properties"]["raw_indicators"] and iris_population > 0: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator]) / iris_population) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + else: + for indicator in self.dataset.selected_indicators: + if indicator in iris_object["properties"]["raw_indicators"]: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator])) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + + df = pd.DataFrame([iris_indicators_values], columns=iris_indicators_names) + self.prediction = self.classifier.predict(df)[0] + log.debug("%s", self.prediction)</code></pre> +</details> +<h3>Ancestors</h3> +<ul class="hlist"> +<li>predihood.classes.Method.Method</li> +</ul> +<h3>Methods</h3> +<dl> +<dt id="MethodPrediction.MethodPrediction.compute_performance"><code class="name flex"> +<span>def <span class="ident">compute_performance</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Compute performance metrics, i.e. accuracy.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def compute_performance(self): + """ + Compute performance metrics, i.e. accuracy. + """ + scores = cross_val_score(self.classifier, self.dataset.X, self.dataset.Y, cv=5) + self.accuracy = scores.mean() * 100</code></pre> +</details> +</dd> +<dt id="MethodPrediction.MethodPrediction.predict"><code class="name flex"> +<span>def <span class="ident">predict</span></span>(<span>self, iris_code=None)</span> +</code></dt> +<dd> +<div class="desc"><p>Predict one EV for the given iris. The EV to predict is stored in the dataset as "env" variable.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def predict(self, iris_code=None): + """ + Predict one EV for the given iris. The EV to predict is stored in the dataset as "env" variable. + """ + iris_object = model.get_iris_from_code(iris_code) + iris_area = area(model.get_coords_from_code(iris_code)) / 1000000 + iris_population = iris_object["properties"]["raw_indicators"]["P14_POP"] + iris_indicators_values = [] + iris_indicators_names = [] + if self.dataset.normalization == "density": + density = iris_population/iris_area + for indicator in self.dataset.selected_indicators: + if indicator in iris_object["properties"]["raw_indicators"] and density > 0: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator]) / density) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + elif self.dataset.normalization == "population": + for indicator in self.dataset.selected_indicators: + # if indicator == "P14_POP": continue # skip this indicator because of normalisation # TODO + if indicator in iris_object["properties"]["raw_indicators"] and iris_population > 0: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator]) / iris_population) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + else: + for indicator in self.dataset.selected_indicators: + if indicator in iris_object["properties"]["raw_indicators"]: + iris_indicators_values.append(float(iris_object["properties"]["raw_indicators"][indicator])) + else: + iris_indicators_values.append(0) + iris_indicators_names.append(indicator) + + df = pd.DataFrame([iris_indicators_values], columns=iris_indicators_names) + self.prediction = self.classifier.predict(df)[0] + log.debug("%s", self.prediction)</code></pre> +</details> +</dd> +</dl> +</dd> +</dl> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-classes">Classes</a></h3> +<ul> +<li> +<h4><code><a title="MethodPrediction.MethodPrediction" href="#MethodPrediction.MethodPrediction">MethodPrediction</a></code></h4> +<ul class=""> +<li><code><a title="MethodPrediction.MethodPrediction.compute_performance" href="#MethodPrediction.MethodPrediction.compute_performance">compute_performance</a></code></li> +<li><code><a title="MethodPrediction.MethodPrediction.predict" href="#MethodPrediction.MethodPrediction.predict">predict</a></code></li> +</ul> +</li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/MethodSelection.html b/doc/MethodSelection.html new file mode 100644 index 00000000..3d6db8a3 --- /dev/null +++ b/doc/MethodSelection.html @@ -0,0 +1,272 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>MethodSelection API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>MethodSelection</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import os + +import matplotlib.pyplot as plt +import numpy as np +import seaborn as sns + +from predihood.classes.Method import Method +from predihood.config import RANDOM_STATE, TITLES + + +class MethodSelection(Method): + """ + This class represents a method for selection a subset of indicators among all INSEE indicators. + """ + def __init__(self, name, dataset, classifier=None, transform=False, parameters=None): + """ + Constructor of the MethodSelection class. Initialize attributes. + """ + Method.__init__(self, name, dataset, classifier) + self.transform = transform + self.threshold = None + self.best_indicators = None + self.parameters = parameters + + def fit(self): + """ + Fit the train data on the classifier. + """ + if self.classifier is not None: + self.classifier.random_state = RANDOM_STATE + if self.transform: + self.classifier.fit_transform(self.dataset.X_train, self.dataset.Y_train) + else: + self.classifier.fit(self.dataset.X_train, self.dataset.Y_train) + + def compute_selection(self): + """ + Get results of the classifier according to its name. + """ + if self.name == "feature importance ET" or self.name == "feature importance RF": + importance = self.classifier.feature_importances_ + indicators_importance = {self.dataset.indicators[i]: importance[i] for i in range(len(importance))} # create a dictionary to associate each indicator with its importance + indicators_importance = {k: v for k, v in sorted(indicators_importance.items(), key=lambda item: item[1], reverse=True)} # order dictionary by value in descending order + k_best = [[key, value] for key, value in indicators_importance.items()][:self.parameters["top_k"]] + self.best_indicators = k_best + elif self.name == "heat map": + fig, ax = plt.subplots() + ax.xaxis.tick_top() + + # get indicators that are fully correlated (i.e. corr=1) + temp_data = self.dataset.data[self.dataset.indicators][:] # get only INSEE indicators (!= CODE, AREA, EV) + corr_matrix = temp_data.corr(method=self.parameters["method"]).abs() + sns.heatmap(corr_matrix) + upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) + + self.best_indicators = [] + for i in range(len(upper.columns)): + column = upper.columns[i] + for k, value in upper[column].items(): + if value == 1 and column not in self.best_indicators: # and (column, k) not in self.best_indicators and (k, column) not in self.best_indicators: + self.best_indicators.append(column) + + if TITLES: plt.title("Correlation matrix: filtering = " + self.dataset.filtering + ", normalization = " + self.dataset.normalization) + plt.show() + else: + raise Exception("Unknown name. Choose among [\"feature importance ET\", \"feature importance RF\", \"heat map\"].")</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-classes">Classes</h2> +<dl> +<dt id="MethodSelection.MethodSelection"><code class="flex name class"> +<span>class <span class="ident">MethodSelection</span></span> +<span>(</span><span>name, dataset, classifier=None, transform=False, parameters=None)</span> +</code></dt> +<dd> +<div class="desc"><p>This class represents a method for selection a subset of indicators among all INSEE indicators.</p> +<p>Constructor of the MethodSelection class. Initialize attributes.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">class MethodSelection(Method): + """ + This class represents a method for selection a subset of indicators among all INSEE indicators. + """ + def __init__(self, name, dataset, classifier=None, transform=False, parameters=None): + """ + Constructor of the MethodSelection class. Initialize attributes. + """ + Method.__init__(self, name, dataset, classifier) + self.transform = transform + self.threshold = None + self.best_indicators = None + self.parameters = parameters + + def fit(self): + """ + Fit the train data on the classifier. + """ + if self.classifier is not None: + self.classifier.random_state = RANDOM_STATE + if self.transform: + self.classifier.fit_transform(self.dataset.X_train, self.dataset.Y_train) + else: + self.classifier.fit(self.dataset.X_train, self.dataset.Y_train) + + def compute_selection(self): + """ + Get results of the classifier according to its name. + """ + if self.name == "feature importance ET" or self.name == "feature importance RF": + importance = self.classifier.feature_importances_ + indicators_importance = {self.dataset.indicators[i]: importance[i] for i in range(len(importance))} # create a dictionary to associate each indicator with its importance + indicators_importance = {k: v for k, v in sorted(indicators_importance.items(), key=lambda item: item[1], reverse=True)} # order dictionary by value in descending order + k_best = [[key, value] for key, value in indicators_importance.items()][:self.parameters["top_k"]] + self.best_indicators = k_best + elif self.name == "heat map": + fig, ax = plt.subplots() + ax.xaxis.tick_top() + + # get indicators that are fully correlated (i.e. corr=1) + temp_data = self.dataset.data[self.dataset.indicators][:] # get only INSEE indicators (!= CODE, AREA, EV) + corr_matrix = temp_data.corr(method=self.parameters["method"]).abs() + sns.heatmap(corr_matrix) + upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) + + self.best_indicators = [] + for i in range(len(upper.columns)): + column = upper.columns[i] + for k, value in upper[column].items(): + if value == 1 and column not in self.best_indicators: # and (column, k) not in self.best_indicators and (k, column) not in self.best_indicators: + self.best_indicators.append(column) + + if TITLES: plt.title("Correlation matrix: filtering = " + self.dataset.filtering + ", normalization = " + self.dataset.normalization) + plt.show() + else: + raise Exception("Unknown name. Choose among [\"feature importance ET\", \"feature importance RF\", \"heat map\"].")</code></pre> +</details> +<h3>Ancestors</h3> +<ul class="hlist"> +<li>predihood.classes.Method.Method</li> +</ul> +<h3>Methods</h3> +<dl> +<dt id="MethodSelection.MethodSelection.compute_selection"><code class="name flex"> +<span>def <span class="ident">compute_selection</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Get results of the classifier according to its name.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def compute_selection(self): + """ + Get results of the classifier according to its name. + """ + if self.name == "feature importance ET" or self.name == "feature importance RF": + importance = self.classifier.feature_importances_ + indicators_importance = {self.dataset.indicators[i]: importance[i] for i in range(len(importance))} # create a dictionary to associate each indicator with its importance + indicators_importance = {k: v for k, v in sorted(indicators_importance.items(), key=lambda item: item[1], reverse=True)} # order dictionary by value in descending order + k_best = [[key, value] for key, value in indicators_importance.items()][:self.parameters["top_k"]] + self.best_indicators = k_best + elif self.name == "heat map": + fig, ax = plt.subplots() + ax.xaxis.tick_top() + + # get indicators that are fully correlated (i.e. corr=1) + temp_data = self.dataset.data[self.dataset.indicators][:] # get only INSEE indicators (!= CODE, AREA, EV) + corr_matrix = temp_data.corr(method=self.parameters["method"]).abs() + sns.heatmap(corr_matrix) + upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) + + self.best_indicators = [] + for i in range(len(upper.columns)): + column = upper.columns[i] + for k, value in upper[column].items(): + if value == 1 and column not in self.best_indicators: # and (column, k) not in self.best_indicators and (k, column) not in self.best_indicators: + self.best_indicators.append(column) + + if TITLES: plt.title("Correlation matrix: filtering = " + self.dataset.filtering + ", normalization = " + self.dataset.normalization) + plt.show() + else: + raise Exception("Unknown name. Choose among [\"feature importance ET\", \"feature importance RF\", \"heat map\"].")</code></pre> +</details> +</dd> +<dt id="MethodSelection.MethodSelection.fit"><code class="name flex"> +<span>def <span class="ident">fit</span></span>(<span>self)</span> +</code></dt> +<dd> +<div class="desc"><p>Fit the train data on the classifier.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def fit(self): + """ + Fit the train data on the classifier. + """ + if self.classifier is not None: + self.classifier.random_state = RANDOM_STATE + if self.transform: + self.classifier.fit_transform(self.dataset.X_train, self.dataset.Y_train) + else: + self.classifier.fit(self.dataset.X_train, self.dataset.Y_train)</code></pre> +</details> +</dd> +</dl> +</dd> +</dl> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-classes">Classes</a></h3> +<ul> +<li> +<h4><code><a title="MethodSelection.MethodSelection" href="#MethodSelection.MethodSelection">MethodSelection</a></code></h4> +<ul class=""> +<li><code><a title="MethodSelection.MethodSelection.compute_selection" href="#MethodSelection.MethodSelection.compute_selection">compute_selection</a></code></li> +<li><code><a title="MethodSelection.MethodSelection.fit" href="#MethodSelection.MethodSelection.fit">fit</a></code></li> +</ul> +</li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/__init__.html b/doc/__init__.html new file mode 100644 index 00000000..22bd492e --- /dev/null +++ b/doc/__init__.html @@ -0,0 +1,57 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>__init__ API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>__init__</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import logging +import sys + +logging.basicConfig(level=logging.DEBUG)</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/cleaning.html b/doc/cleaning.html new file mode 100644 index 00000000..979cab35 --- /dev/null +++ b/doc/cleaning.html @@ -0,0 +1,275 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>cleaning API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>cleaning</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import logging +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from predihood.classes.MethodCleaning import MethodCleaning +from predihood.config import FILE_DATA_HIL, OLD_PREFIX, NEW_PREFIX, FOLDER_DISTRIBUTION + +log = logging.getLogger(__name__) + + +def clean(): + """ + Clean Home in Love data and generate some charts to study distribution of data. + """ + # 1. read data from Excel file + data = pd.read_excel(FILE_DATA_HIL) + + # 2. rename dataset's columns because of bad naming convention + columns = ["id", "name", "HiL_id", "sex", "age", "nb_child", "income", "monthly_charges", "tax_revenue"] + for elem in ["address", "country", "occupation", "rent"]: columns.append(OLD_PREFIX + elem) + for elem in ["address", "country", "occupation"]: columns.append(NEW_PREFIX + elem) + columns.append("reason") + for elem in ["context", "status", "building_type", "building_usage", "landscape", "morphological_position", "geographical_position", "social_class"]: columns.append(OLD_PREFIX + elem) + for elem in ["context", "building_type", "building_usage", "landscape", "morphological_position", "geographical_position", "social_class"]: columns.append(NEW_PREFIX + elem) + data.columns = columns + data.head() + + # 3. clean data by changing misspelled values + cleaning = MethodCleaning("cleaning", data) + cleaning.clean() + log.info("Many plots have be generated in " + FOLDER_DISTRIBUTION) + + # 4. distribution between women and men + women_men = data["sex"].value_counts() + labels = "Women", "Men" + number_men_women = [women_men["Femme"], women_men["Homme"]] # getting number of women and number of men + colors = ["salmon", "lightblue"] + plt.pie(number_men_women, labels=labels, colors=colors, autopct='%i%%', shadow=True, startangle=90) + plt.axis("equal") + plt.title("Distribution of gender") + plt.show() + + # 5. distribution between ages + data_temp = data + data_temp = data_temp.dropna() # remove NaN values + ages_plot = [] + total_plot = [] + min_age, max_age = int(min(data_temp["age"])), int(max(data_temp["age"])) + for counter in range(min_age, max_age + 1): + total_plot.append(data_temp.loc[data_temp.age == float(counter), "age"].count()) + ages_plot.append(counter) + mean = np.average(ages_plot, weights=total_plot) + + # First view: bar chart + plt.bar(ages_plot, total_plot) + plt.axvline(x=mean, color="red") # draw median age as a line + plt.xlabel("Age (in years)") + plt.ylabel("Number of people") + plt.title("Distribution of age") + plt.show() + + # Second view: histogram + ages = data_temp["age"] + plt.hist(ages, facecolor="gray", align="mid") + plt.xlabel("Age (in years)") + plt.ylabel("Number of people") + plt.title("Distribution of age") + plt.show() + + # 6. distribution between incomes + incomes = data["income"] + plt.hist(incomes, facecolor="gray", align="mid") + plt.title("Distribution of monthly income") + plt.xlabel("Monthly income (in euros)") + plt.ylabel("Number of people") + plt.show() + + # 7. distribution between reasons of transfer + transfers = data["reason"].value_counts() + labels = transfers.index.tolist() + sizes = [transfers[i] for i in range(len(transfers))] + plt.pie(sizes, labels=labels, autopct="%.2f", shadow=True, startangle=90) + plt.axis("equal") + plt.title("Distribution of the reason of job transfer") + plt.show() + + # 8. distribution between geographic positions + geo = pd.concat([data[OLD_PREFIX + "geographical_position"], data[NEW_PREFIX + "geographical_position"]], ignore_index=True) + split_geo = [geo[i].split()[0] if not isinstance(geo[i], float) else "" for i in range(len(geo))] + set_geo = set(split_geo) + uniques = [split_geo.count(elem) for elem in set_geo] + labels = set_geo + plt.pie(uniques, labels=labels, autopct="%.2f", shadow=True, startangle=90) + plt.title("Distribution of the geographical position") + plt.show() + + # 9. draw evolutions before and after job transfer for each EV + cleaning.to_chart("occupation", "status", "Evolution of status before and after job transfer") + cleaning.to_chart("building_type", "building_type", "Evolution of building type before and after job transfer") + cleaning.to_chart("building_usage", "building_usage", "Evolution of building usage before and after job transfer") + cleaning.to_chart("landscape", "landscapes", "Evolution of landscapes before and after job transfer") + cleaning.to_chart("social_class", "social", "Evolution of social classes before and after job transfer") + cleaning.to_chart("morphological_position", "morpho", "Evolution of morphological positions before and after job transfer") + cleaning.to_chart("geographical_position", "geo", "Evolution of geographical positions before and after job transfer") + + +if __name__ == '__main__': + clean()</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-functions">Functions</h2> +<dl> +<dt id="cleaning.clean"><code class="name flex"> +<span>def <span class="ident">clean</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Clean Home in Love data and generate some charts to study distribution of data.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def clean(): + """ + Clean Home in Love data and generate some charts to study distribution of data. + """ + # 1. read data from Excel file + data = pd.read_excel(FILE_DATA_HIL) + + # 2. rename dataset's columns because of bad naming convention + columns = ["id", "name", "HiL_id", "sex", "age", "nb_child", "income", "monthly_charges", "tax_revenue"] + for elem in ["address", "country", "occupation", "rent"]: columns.append(OLD_PREFIX + elem) + for elem in ["address", "country", "occupation"]: columns.append(NEW_PREFIX + elem) + columns.append("reason") + for elem in ["context", "status", "building_type", "building_usage", "landscape", "morphological_position", "geographical_position", "social_class"]: columns.append(OLD_PREFIX + elem) + for elem in ["context", "building_type", "building_usage", "landscape", "morphological_position", "geographical_position", "social_class"]: columns.append(NEW_PREFIX + elem) + data.columns = columns + data.head() + + # 3. clean data by changing misspelled values + cleaning = MethodCleaning("cleaning", data) + cleaning.clean() + log.info("Many plots have be generated in " + FOLDER_DISTRIBUTION) + + # 4. distribution between women and men + women_men = data["sex"].value_counts() + labels = "Women", "Men" + number_men_women = [women_men["Femme"], women_men["Homme"]] # getting number of women and number of men + colors = ["salmon", "lightblue"] + plt.pie(number_men_women, labels=labels, colors=colors, autopct='%i%%', shadow=True, startangle=90) + plt.axis("equal") + plt.title("Distribution of gender") + plt.show() + + # 5. distribution between ages + data_temp = data + data_temp = data_temp.dropna() # remove NaN values + ages_plot = [] + total_plot = [] + min_age, max_age = int(min(data_temp["age"])), int(max(data_temp["age"])) + for counter in range(min_age, max_age + 1): + total_plot.append(data_temp.loc[data_temp.age == float(counter), "age"].count()) + ages_plot.append(counter) + mean = np.average(ages_plot, weights=total_plot) + + # First view: bar chart + plt.bar(ages_plot, total_plot) + plt.axvline(x=mean, color="red") # draw median age as a line + plt.xlabel("Age (in years)") + plt.ylabel("Number of people") + plt.title("Distribution of age") + plt.show() + + # Second view: histogram + ages = data_temp["age"] + plt.hist(ages, facecolor="gray", align="mid") + plt.xlabel("Age (in years)") + plt.ylabel("Number of people") + plt.title("Distribution of age") + plt.show() + + # 6. distribution between incomes + incomes = data["income"] + plt.hist(incomes, facecolor="gray", align="mid") + plt.title("Distribution of monthly income") + plt.xlabel("Monthly income (in euros)") + plt.ylabel("Number of people") + plt.show() + + # 7. distribution between reasons of transfer + transfers = data["reason"].value_counts() + labels = transfers.index.tolist() + sizes = [transfers[i] for i in range(len(transfers))] + plt.pie(sizes, labels=labels, autopct="%.2f", shadow=True, startangle=90) + plt.axis("equal") + plt.title("Distribution of the reason of job transfer") + plt.show() + + # 8. distribution between geographic positions + geo = pd.concat([data[OLD_PREFIX + "geographical_position"], data[NEW_PREFIX + "geographical_position"]], ignore_index=True) + split_geo = [geo[i].split()[0] if not isinstance(geo[i], float) else "" for i in range(len(geo))] + set_geo = set(split_geo) + uniques = [split_geo.count(elem) for elem in set_geo] + labels = set_geo + plt.pie(uniques, labels=labels, autopct="%.2f", shadow=True, startangle=90) + plt.title("Distribution of the geographical position") + plt.show() + + # 9. draw evolutions before and after job transfer for each EV + cleaning.to_chart("occupation", "status", "Evolution of status before and after job transfer") + cleaning.to_chart("building_type", "building_type", "Evolution of building type before and after job transfer") + cleaning.to_chart("building_usage", "building_usage", "Evolution of building usage before and after job transfer") + cleaning.to_chart("landscape", "landscapes", "Evolution of landscapes before and after job transfer") + cleaning.to_chart("social_class", "social", "Evolution of social classes before and after job transfer") + cleaning.to_chart("morphological_position", "morpho", "Evolution of morphological positions before and after job transfer") + cleaning.to_chart("geographical_position", "geo", "Evolution of geographical positions before and after job transfer")</code></pre> +</details> +</dd> +</dl> +</section> +<section> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-functions">Functions</a></h3> +<ul class=""> +<li><code><a title="cleaning.clean" href="#cleaning.clean">clean</a></code></li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/config.html b/doc/config.html new file mode 100644 index 00000000..2a0bf795 --- /dev/null +++ b/doc/config.html @@ -0,0 +1,153 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>config API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>config</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import os + +from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.neural_network import MLPClassifier +from sklearn.svm import SVC +from sklearn.tree import DecisionTreeClassifier + +# 1. define files used by Predihood (reading and writing files) +FOLDER_DATA = "generated_files" +FOLDER_CLASSES = "classes" + +if not os.path.exists(FOLDER_DATA): os.mkdir(FOLDER_DATA) +FOLDER_DATASETS = os.path.join(FOLDER_DATA, "datasets") +if not os.path.exists(FOLDER_DATASETS): os.mkdir(FOLDER_DATASETS) +FOLDER_DISTRIBUTION = os.path.join(FOLDER_DATA, "distribution-plots") +if not os.path.exists(FOLDER_DISTRIBUTION): os.mkdir(FOLDER_DISTRIBUTION) +FOLDER_SELECTED_INDICATORS = os.path.join(FOLDER_DATA, "selected-indicators") +if not os.path.exists(FOLDER_SELECTED_INDICATORS): os.mkdir(FOLDER_SELECTED_INDICATORS) + +FILE_HIERARCHY = os.path.join(FOLDER_CLASSES, "hierarchy.csv") +FILE_DATA_HIL = os.path.join(FOLDER_CLASSES, "data.xlsx") +FILE_GROUPING = os.path.join(FOLDER_CLASSES, "regrouping.csv") +FILE_CLEANED_DATA = os.path.join(FOLDER_DATA, "cleaned_data.csv") +FILE_ENV = os.path.join(FOLDER_DATASETS, "data.csv") +FILE_SIMILARITIES = os.path.join(FOLDER_DATA, "similarities.csv") +FILE_LIST_DISTRIBUTION = os.path.join(FOLDER_SELECTED_INDICATORS, "selection-distribution.csv") +FILE_MANUAL_ASSESSMENT = os.path.join(FOLDER_CLASSES, "manual_assessment.csv") + +# 2. define some constants +TITLES = False # set True to display titles on plots +OLD_PREFIX, NEW_PREFIX = "old_", "new_" +RANDOM_STATE = 0 # make classifiers deterministic +TRAIN_SIZE, TEST_SIZE = 0.8, 0.2 +TOPS_K = [10, 20, 30, 40, 50, 75, 100] # define top-k to generate lists of selected indicators + +# 3. define available classifiers for the algorithmic interface +AVAILABLE_CLASSIFIERS = { + "RandomForestClassifier": RandomForestClassifier, + "KNeighborsClassifier": KNeighborsClassifier, + "DecisionTreeClassifier": DecisionTreeClassifier, + "SVC": SVC, + "AdaBoostClassifier": AdaBoostClassifier, + "MLPClassifier": MLPClassifier +} + +# 4. define EV, the possible values for each one and their translation (because data come from a French company). +ENVIRONMENT_VALUES = { + "building_type": { + "Maisons": "Houses", + "Mixte": "Mixed", + "Immeubles": "Towers", + "Grand ensemble": "Housing estates", + "Lotissement": "Housing subdivisions" + }, + "building_usage": { + "Résidentiel": "Housing", + "Commerçant": "Shopping", + "Autres activités": "Other activities", + }, + "landscape": { + "Urbanisé": "Urban", + "Espaces verts": "Green areas", + "Arboré": "Forest", + "Agricole": "Countryside", + }, + "morphological_position": { + "Central": "Central", + "Urbain": "Urban", + "Péri-urbain": "Peri-urban", + "Rural": "Rural" + }, + "geographical_position": { + "Centre": "Centre", + "Nord": "North", + "Sud": "South", + "Est": "East", + "Ouest": "West", + "Nord-Est": "North East", + "Sud-Est": "South East", + "Nord-Ouest": "North West", + "Sud-Ouest": "South West" + }, + "social_class": { + "Popu": "Lower", + "Moyen-inf": "Lower middle", + "Moyen": "Middle", + "Moyen-sup": "Upper middle", + "Sup": "Upper" + } +} + +# names of EV, i.e. ['building_type', 'building_usage', 'landscape', 'morphological_position', 'geographical_position', 'social_class'] +ENVIRONMENT_VARIABLES = list(ENVIRONMENT_VALUES.keys()) + +# translation of each value from French to English, ie. without the level with EV' names +TRANSLATION = {} +for ev in ENVIRONMENT_VALUES: + for key in ENVIRONMENT_VALUES[ev]: + TRANSLATION[key] = ENVIRONMENT_VALUES[ev][key]</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +<section> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/main.html b/doc/main.html new file mode 100644 index 00000000..cd041ff0 --- /dev/null +++ b/doc/main.html @@ -0,0 +1,683 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>main API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>main</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">#!/usr/bin/env python +# encoding: utf-8 +# ============================================================================= +# main.py: runs the Flask server (http://flask.pocoo.org/) using routes +# ============================================================================= + +import json +import logging +import os +import webbrowser + +from flask import Flask, flash, render_template, request, send_from_directory +from predihood import model +from predihood.classes.Data import Data +from predihood.config import AVAILABLE_CLASSIFIERS, TOPS_K, ENVIRONMENT_VALUES +from predihood.predict import compute_all_accuracies, predict_one_iris +from predihood.utility_functions import signature, get_classifier, set_classifier, add_assessment_to_file +from sklearn.utils._testing import ignore_warnings + +log = logging.getLogger(__name__) + +app = Flask(__name__) +app.config["JSON_SORT_KEYS"] = False +url = "http://127.0.0.1:8081/" + + +@app.route('/', defaults={'page': None}) +def index(page): + """ + Render the main page of the interface, i.e. `index.html`. + + Args: + page: The name of the specific page to display. + + Returns: + The page to display. + """ + if not model.db.connection_status: # if no connection, display a flashing message + flash("Could not connect to the MongoDB database ! Check the connection.", "danger") + return render_template('index.html') + + +@app.route('/algorithms.html', methods=["GET"]) +def get_algorithms_page(): + """ + Render the page of the algorithmic interface, i.e. `algorithms.html`. + + Returns: + The page to display. + """ + return render_template('algorithms.html') + + +@app.route('/details-iris.html', methods=["GET"]) +def get_details_iris(): + """ + Get all information about the given IRIS. + + Returns: + A page that contains all information about the given IRIS (descriptive, grouped and raw indicators). + """ + code_iris = request.args['code_iris'] + iris = model.get_iris_from_code(code_iris) + dict_code_label = model.parse_json_to_dict(model.json_iris_indicator_code_to_label) + if iris is None: + flash("No corresponding iris for code " + code_iris + ".", "warning") + return render_template('details-iris.html', iris=iris, dict_code_label=dict_code_label) + + +@app.route('/getClassifiers', methods=["GET"]) +def get_classifiers(): + """ + Get list of available classifiers (stored in AVAILABLE_CLASSIFIERS). + + Returns: + A list containing the names of the available classifiers. + """ + return {"classifiers": list(AVAILABLE_CLASSIFIERS.keys())} + + +@app.route('/getParameters', methods=["GET"]) +def get_parameters(): + """ + Get parameters of the given classifier by give its name. + + Returns: + A dictionary containing for each parameter its name, its types, its default value, and its definition. + """ + if 'name' in request.args: + name = request.args['name'] + parameters = signature(name) + return parameters + else: + return None + + +@app.route('/run', methods=["GET"]) +@ignore_warnings(category=FutureWarning) +def run_algorithm(): + """ + Run classifier on data with the specified parameters. + + Returns: + The computed accuracies for each EV and each list. The top-k are also returned. + """ + # 1. get parameters specified by the user + clf_name = request.args['clf'] + parameters = json.loads(request.args['parameters']) + train_size = parameters["train_size"] + test_size = parameters["test_size"] + remove_outliers = parameters["remove_outliers"] + remove_rural = parameters["remove_rural"] + + # 2. create an instance of the given classifier and tune it with user's parameters + clf = get_classifier(clf_name) + clf = set_classifier(clf, parameters) + log.info(clf) + + # 3. run experiment on data to get accuracies for each EV and each list of selected indicators + data = Data(normalization="density", filtering=True) + data.init_all_in_one() + accuracies = compute_all_accuracies(data, clf, train_size, test_size, remove_outliers, remove_rural) + return {"results": accuracies, "tops_k": TOPS_K} + + +@app.route('/predict_iris', methods=["GET"]) +def predict_iris(): + """ + Predict the environment (i.e. 6 EV) of the given IRIS. + + Returns: + Predictions for each EV + """ + iris_code_to_predict = request.args['iris_code'] + clf_name = request.args['algorithm_name'] + clf = get_classifier(clf_name) + + data = Data(normalization="density", filtering=True) + data.init_all_in_one() + predictions = predict_one_iris(iris_code_to_predict, data, clf, 0.8, 0.2, False) # clf + return {"predictions": predictions} + + +@app.route('/getIrisPolygon', methods=["GET"]) +def get_iris_for_polygon(): + """ + Get the list of IRIS in the given polygon. The polygon is defined by two points given in the AJAX request. + + Returns: + A list of the IRIS that are in the given polygon. + """ + lat1 = float(request.args['lat1']) + lng1 = float(request.args['lng1']) + lat2 = float(request.args['lat2']) + lng2 = float(request.args['lng2']) + iris = model.get_iris_for_polygon(lat1, lng1, lat2, lng2) + if iris is None or len(iris) == 0: + flash("No iris found in the area.", "warning") + else: + flash(str(len(iris)) + " iris found in the area.", "success") + return json.dumps({'status': 'OK', 'geojson': iris}) + + +@app.route('/countIrisPolygon', methods=["GET"]) +def count_iris_for_polygon(): + """ + Count the number of IRIS in the given polygon. The polygon is defined by two points given in the AJAX request. + + Returns: + The number of IRIS that are in the given polygon. + """ + lat1 = float(request.args['lat1']) + lng1 = float(request.args['lng1']) + lat2 = float(request.args['lat2']) + lng2 = float(request.args['lng2']) + nb_iris = model.count_iris_for_polygon(lat1, lng1, lat2, lng2) + return json.dumps({'status': 'OK', 'nbIris': nb_iris}) + + +@app.route('/searchCode', methods=["GET"]) +def get_iris_from_code(): + """ + Get an IRIS object (represented by a dictionary) given its code. The code is a string of 9 digits. + + Returns: + An object that represents the IRIS corresponding to the given code. + """ + code_iris = request.args['codeIris'] + iris = model.get_iris_from_code(code_iris) + if iris is None: + flash("No corresponding iris for code " + code_iris + ".", "warning") + else: + flash("Found iris " + code_iris + ".", "success") + return json.dumps({'status': 'OK', 'geojson': iris}) + + +@app.route('/searchName', methods=["GET"]) +def get_iris_from_name(): + """ + Get IRIS given its name. The search is done on IRIS's name and IRIS' city. + + Returns: + A list of IRIS corresponding to the given name. + """ + query = request.args['querySearch'] + iris = model.get_iris_from_name(query) + if iris is None or len(iris) == 0: + flash("No corresponding iris for query " + query + ".", "warning") + else: + flash(str(len(iris)) + " iris found for query " + query + ".", "success") + return json.dumps({'status': 'OK', 'geojson': iris}) + + +@app.route('/add_iris_to_csv', methods=["GET"]) +def add_iris_to_csv(): + """ + Adds an assessed IRIS to a CSV file. Not available in production mode. + + Returns: + A message that explain the status of the request, i.e. OK if the IRIS has been added, KO else. + """ + assessed_values = [] + for env in ENVIRONMENT_VALUES: + assessed_values.append(request.args[env]) + message = add_assessment_to_file(request.args['code_iris'], assessed_values) + return json.dumps({"status": message}) + + +@app.route('/favicon.ico') +@app.route('/<page>/favicon.ico') +def favicon(): + """ + Display the favicon. + + Returns: + The favicon. + """ + return send_from_directory(os.path.join(app.root_path, 'static'), 'favicon.png', mimetype='image/favicon.png') + + +if __name__ == '__main__': + webbrowser.open_new(url) + app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 # do not cache files, especially static files such as JS + app.secret_key = 's3k_5Et#fL45k_#ranD0m-(StuF7)' + app.run(port=8081) # debug = True</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-functions">Functions</h2> +<dl> +<dt id="main.add_iris_to_csv"><code class="name flex"> +<span>def <span class="ident">add_iris_to_csv</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Adds an assessed IRIS to a CSV file. Not available in production mode.</p> +<h2 id="returns">Returns</h2> +<p>A message that explain the status of the request, i.e. OK if the IRIS has been added, KO else.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/add_iris_to_csv', methods=["GET"]) +def add_iris_to_csv(): + """ + Adds an assessed IRIS to a CSV file. Not available in production mode. + + Returns: + A message that explain the status of the request, i.e. OK if the IRIS has been added, KO else. + """ + assessed_values = [] + for env in ENVIRONMENT_VALUES: + assessed_values.append(request.args[env]) + message = add_assessment_to_file(request.args['code_iris'], assessed_values) + return json.dumps({"status": message})</code></pre> +</details> +</dd> +<dt id="main.count_iris_for_polygon"><code class="name flex"> +<span>def <span class="ident">count_iris_for_polygon</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Count the number of IRIS in the given polygon. The polygon is defined by two points given in the AJAX request.</p> +<h2 id="returns">Returns</h2> +<p>The number of IRIS that are in the given polygon.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/countIrisPolygon', methods=["GET"]) +def count_iris_for_polygon(): + """ + Count the number of IRIS in the given polygon. The polygon is defined by two points given in the AJAX request. + + Returns: + The number of IRIS that are in the given polygon. + """ + lat1 = float(request.args['lat1']) + lng1 = float(request.args['lng1']) + lat2 = float(request.args['lat2']) + lng2 = float(request.args['lng2']) + nb_iris = model.count_iris_for_polygon(lat1, lng1, lat2, lng2) + return json.dumps({'status': 'OK', 'nbIris': nb_iris})</code></pre> +</details> +</dd> +<dt id="main.favicon"><code class="name flex"> +<span>def <span class="ident">favicon</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Display the favicon.</p> +<h2 id="returns">Returns</h2> +<p>The favicon.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/favicon.ico') +@app.route('/<page>/favicon.ico') +def favicon(): + """ + Display the favicon. + + Returns: + The favicon. + """ + return send_from_directory(os.path.join(app.root_path, 'static'), 'favicon.png', mimetype='image/favicon.png')</code></pre> +</details> +</dd> +<dt id="main.get_algorithms_page"><code class="name flex"> +<span>def <span class="ident">get_algorithms_page</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Render the page of the algorithmic interface, i.e. <code>algorithms.html</code>.</p> +<h2 id="returns">Returns</h2> +<p>The page to display.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/algorithms.html', methods=["GET"]) +def get_algorithms_page(): + """ + Render the page of the algorithmic interface, i.e. `algorithms.html`. + + Returns: + The page to display. + """ + return render_template('algorithms.html')</code></pre> +</details> +</dd> +<dt id="main.get_classifiers"><code class="name flex"> +<span>def <span class="ident">get_classifiers</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Get list of available classifiers (stored in AVAILABLE_CLASSIFIERS).</p> +<h2 id="returns">Returns</h2> +<p>A list containing the names of the available classifiers.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/getClassifiers', methods=["GET"]) +def get_classifiers(): + """ + Get list of available classifiers (stored in AVAILABLE_CLASSIFIERS). + + Returns: + A list containing the names of the available classifiers. + """ + return {"classifiers": list(AVAILABLE_CLASSIFIERS.keys())}</code></pre> +</details> +</dd> +<dt id="main.get_details_iris"><code class="name flex"> +<span>def <span class="ident">get_details_iris</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Get all information about the given IRIS.</p> +<h2 id="returns">Returns</h2> +<p>A page that contains all information about the given IRIS (descriptive, grouped and raw indicators).</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/details-iris.html', methods=["GET"]) +def get_details_iris(): + """ + Get all information about the given IRIS. + + Returns: + A page that contains all information about the given IRIS (descriptive, grouped and raw indicators). + """ + code_iris = request.args['code_iris'] + iris = model.get_iris_from_code(code_iris) + dict_code_label = model.parse_json_to_dict(model.json_iris_indicator_code_to_label) + if iris is None: + flash("No corresponding iris for code " + code_iris + ".", "warning") + return render_template('details-iris.html', iris=iris, dict_code_label=dict_code_label)</code></pre> +</details> +</dd> +<dt id="main.get_iris_for_polygon"><code class="name flex"> +<span>def <span class="ident">get_iris_for_polygon</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Get the list of IRIS in the given polygon. The polygon is defined by two points given in the AJAX request.</p> +<h2 id="returns">Returns</h2> +<p>A list of the IRIS that are in the given polygon.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/getIrisPolygon', methods=["GET"]) +def get_iris_for_polygon(): + """ + Get the list of IRIS in the given polygon. The polygon is defined by two points given in the AJAX request. + + Returns: + A list of the IRIS that are in the given polygon. + """ + lat1 = float(request.args['lat1']) + lng1 = float(request.args['lng1']) + lat2 = float(request.args['lat2']) + lng2 = float(request.args['lng2']) + iris = model.get_iris_for_polygon(lat1, lng1, lat2, lng2) + if iris is None or len(iris) == 0: + flash("No iris found in the area.", "warning") + else: + flash(str(len(iris)) + " iris found in the area.", "success") + return json.dumps({'status': 'OK', 'geojson': iris})</code></pre> +</details> +</dd> +<dt id="main.get_iris_from_code"><code class="name flex"> +<span>def <span class="ident">get_iris_from_code</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Get an IRIS object (represented by a dictionary) given its code. The code is a string of 9 digits.</p> +<h2 id="returns">Returns</h2> +<p>An object that represents the IRIS corresponding to the given code.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/searchCode', methods=["GET"]) +def get_iris_from_code(): + """ + Get an IRIS object (represented by a dictionary) given its code. The code is a string of 9 digits. + + Returns: + An object that represents the IRIS corresponding to the given code. + """ + code_iris = request.args['codeIris'] + iris = model.get_iris_from_code(code_iris) + if iris is None: + flash("No corresponding iris for code " + code_iris + ".", "warning") + else: + flash("Found iris " + code_iris + ".", "success") + return json.dumps({'status': 'OK', 'geojson': iris})</code></pre> +</details> +</dd> +<dt id="main.get_iris_from_name"><code class="name flex"> +<span>def <span class="ident">get_iris_from_name</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Get IRIS given its name. The search is done on IRIS's name and IRIS' city.</p> +<h2 id="returns">Returns</h2> +<p>A list of IRIS corresponding to the given name.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/searchName', methods=["GET"]) +def get_iris_from_name(): + """ + Get IRIS given its name. The search is done on IRIS's name and IRIS' city. + + Returns: + A list of IRIS corresponding to the given name. + """ + query = request.args['querySearch'] + iris = model.get_iris_from_name(query) + if iris is None or len(iris) == 0: + flash("No corresponding iris for query " + query + ".", "warning") + else: + flash(str(len(iris)) + " iris found for query " + query + ".", "success") + return json.dumps({'status': 'OK', 'geojson': iris})</code></pre> +</details> +</dd> +<dt id="main.get_parameters"><code class="name flex"> +<span>def <span class="ident">get_parameters</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Get parameters of the given classifier by give its name.</p> +<h2 id="returns">Returns</h2> +<p>A dictionary containing for each parameter its name, its types, its default value, and its definition.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/getParameters', methods=["GET"]) +def get_parameters(): + """ + Get parameters of the given classifier by give its name. + + Returns: + A dictionary containing for each parameter its name, its types, its default value, and its definition. + """ + if 'name' in request.args: + name = request.args['name'] + parameters = signature(name) + return parameters + else: + return None</code></pre> +</details> +</dd> +<dt id="main.index"><code class="name flex"> +<span>def <span class="ident">index</span></span>(<span>page)</span> +</code></dt> +<dd> +<div class="desc"><p>Render the main page of the interface, i.e. <code>index.html</code>.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>page</code></strong></dt> +<dd>The name of the specific page to display.</dd> +</dl> +<h2 id="returns">Returns</h2> +<p>The page to display.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/', defaults={'page': None}) +def index(page): + """ + Render the main page of the interface, i.e. `index.html`. + + Args: + page: The name of the specific page to display. + + Returns: + The page to display. + """ + if not model.db.connection_status: # if no connection, display a flashing message + flash("Could not connect to the MongoDB database ! Check the connection.", "danger") + return render_template('index.html')</code></pre> +</details> +</dd> +<dt id="main.predict_iris"><code class="name flex"> +<span>def <span class="ident">predict_iris</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Predict the environment (i.e. 6 EV) of the given IRIS.</p> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>Predictions for each EV</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/predict_iris', methods=["GET"]) +def predict_iris(): + """ + Predict the environment (i.e. 6 EV) of the given IRIS. + + Returns: + Predictions for each EV + """ + iris_code_to_predict = request.args['iris_code'] + clf_name = request.args['algorithm_name'] + clf = get_classifier(clf_name) + + data = Data(normalization="density", filtering=True) + data.init_all_in_one() + predictions = predict_one_iris(iris_code_to_predict, data, clf, 0.8, 0.2, False) # clf + return {"predictions": predictions}</code></pre> +</details> +</dd> +<dt id="main.run_algorithm"><code class="name flex"> +<span>def <span class="ident">run_algorithm</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Run classifier on data with the specified parameters.</p> +<h2 id="returns">Returns</h2> +<p>The computed accuracies for each EV and each list. The top-k are also returned.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@app.route('/run', methods=["GET"]) +@ignore_warnings(category=FutureWarning) +def run_algorithm(): + """ + Run classifier on data with the specified parameters. + + Returns: + The computed accuracies for each EV and each list. The top-k are also returned. + """ + # 1. get parameters specified by the user + clf_name = request.args['clf'] + parameters = json.loads(request.args['parameters']) + train_size = parameters["train_size"] + test_size = parameters["test_size"] + remove_outliers = parameters["remove_outliers"] + remove_rural = parameters["remove_rural"] + + # 2. create an instance of the given classifier and tune it with user's parameters + clf = get_classifier(clf_name) + clf = set_classifier(clf, parameters) + log.info(clf) + + # 3. run experiment on data to get accuracies for each EV and each list of selected indicators + data = Data(normalization="density", filtering=True) + data.init_all_in_one() + accuracies = compute_all_accuracies(data, clf, train_size, test_size, remove_outliers, remove_rural) + return {"results": accuracies, "tops_k": TOPS_K}</code></pre> +</details> +</dd> +</dl> +</section> +<section> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-functions">Functions</a></h3> +<ul class=""> +<li><code><a title="main.add_iris_to_csv" href="#main.add_iris_to_csv">add_iris_to_csv</a></code></li> +<li><code><a title="main.count_iris_for_polygon" href="#main.count_iris_for_polygon">count_iris_for_polygon</a></code></li> +<li><code><a title="main.favicon" href="#main.favicon">favicon</a></code></li> +<li><code><a title="main.get_algorithms_page" href="#main.get_algorithms_page">get_algorithms_page</a></code></li> +<li><code><a title="main.get_classifiers" href="#main.get_classifiers">get_classifiers</a></code></li> +<li><code><a title="main.get_details_iris" href="#main.get_details_iris">get_details_iris</a></code></li> +<li><code><a title="main.get_iris_for_polygon" href="#main.get_iris_for_polygon">get_iris_for_polygon</a></code></li> +<li><code><a title="main.get_iris_from_code" href="#main.get_iris_from_code">get_iris_from_code</a></code></li> +<li><code><a title="main.get_iris_from_name" href="#main.get_iris_from_name">get_iris_from_name</a></code></li> +<li><code><a title="main.get_parameters" href="#main.get_parameters">get_parameters</a></code></li> +<li><code><a title="main.index" href="#main.index">index</a></code></li> +<li><code><a title="main.predict_iris" href="#main.predict_iris">predict_iris</a></code></li> +<li><code><a title="main.run_algorithm" href="#main.run_algorithm">run_algorithm</a></code></li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/model.html b/doc/model.html new file mode 100644 index 00000000..b976e00f --- /dev/null +++ b/doc/model.html @@ -0,0 +1,490 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>model API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>model</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">#!/usr/bin/env python +# encoding: utf-8 +# ============================================================================= +# model.py: methods for getting data from MongoDB (using mongiris) and transforming it +# ============================================================================= + +import json +import os +import re + +from mongiris.api import Mongiris + +# connection to the IRIS collection in MongoDB +db = Mongiris() +iris_collection = db.collection_iris +json_iris_indicator_code_to_label = 'static/data/dictionnaire-indicateurs.json' + + +def get_iris_for_polygon(lat1, lng1, lat2, lng2): + """ + Get IRIS that are in a box represented by given coordinates. + + Args: + lat1: a float for latitude of the first point of the box + lng1: a float for longitude of the first point of the box + lat2: a float for latitude of the second point of the box + lng2: a float for longitude of the second point of the box + + Returns: + a list of IRIS that are in the box defined by given latitudes and longitudes + """ + # polygon = db.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) + polygon = Mongiris.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) + # iris = db.geo_within(iris_collection, polygon) + iris = db.intersect(iris_collection, polygon) + return iris + + +def count_iris_for_polygon(lat1, lng1, lat2, lng2): + """ + Count IRIS that are in a box represented by given coordinates. + + Args: + lat1: a float for latitude of the first point of the box + lng1: a float for longitude of the first point of the box + lat2: a float for latitude of the second point of the box + lng2: a float for longitude of the second point of the box + + Returns: + an integer representing the number of IRIS that are in the box defined by given latitudes and longitudes + """ + polygon = db.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) + iris = db.geo_within(iris_collection, polygon) + if iris is None: return 0 + return len(iris) + + +def get_iris_from_code(code_iris): + """ + Get an IRIS given its code (9 digits). + + Args: + code_iris: a string corresponding to the code of the IRIS (the code should be a 9 digits string) + + Returns: + an object that represents the IRIS corresponding to the given code + """ + iris = db.get_iris_from_code(code_iris) + return iris + + +def get_iris_from_name(name): + """ + Get an IRIS given its name. + + Args: + name: a string corresponding to the name of the IRIS + + Returns: + an object that represents the IRIS corresponding to the given name + """ + # the query string (name) is searched in both the name of the iris and the name of the city + regx = re.compile(name, re.IGNORECASE) + query_clause = {"$or": [{"properties.NOM_IRIS": {"$regex": regx}}, {"properties.NOM_COM": {"$regex": regx}}]} + iris = db.find_documents(iris_collection, query_clause) + return iris + + +def parse_json_to_dict(json_file_path): + """ + Convert a JSON file to a dictionary object. + + Args: + json_file_path: a string containing the path of the file to load + + Returns: + a dictionary containing the data in the file located in the given path + """ + assert(os.path.isfile(json_file_path)) + with open(json_file_path) as data_file: + data = json.load(data_file) + data_file.close() + return data + + +def get_coords_from_code(code): + """ + Get geometry of the IRIS corresponding to the given code. + + Args: + code: a string corresponding to the code of the IRIS. The code should be a 9 digits string + + Returns: + a list representing coordinates of the IRIS + """ + iris = db.get_iris_from_code(code) + if iris: + return db.get_geojson_polygon(iris["geometry"]["coordinates"]) + else: + return None + + +def get_indicators_list(): + """ + Get all INSEE indicators that are stored in the database. This corresponds to the collection 'collindic'. + + Returns: + a list containing all names of indicators, e.g. ['POP0002', 'POP0204', ..., 'P14_RP_MAISON'] + """ + list_indicators = db.find_all(db.collection_indic) + return [indicator["short_label"] for indicator in list_indicators] + + +def get_indicators_dict(): + """ + Get all INSEE indicators that are stored in the database. This corresponds to the collection 'collindic'. + + Returns: + a dictionary containing all names of indicators, e.g. {'POP0002': 'Population aged from 0 to 2 y.o.', ..., 'P14_RP_MAISON': 'Number of principal residences'} + """ + list_indicators = db.find_all(db.collection_indic) + return {indicator["short_label"]: indicator["full_label"] for indicator in list_indicators}</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-functions">Functions</h2> +<dl> +<dt id="model.count_iris_for_polygon"><code class="name flex"> +<span>def <span class="ident">count_iris_for_polygon</span></span>(<span>lat1, lng1, lat2, lng2)</span> +</code></dt> +<dd> +<div class="desc"><p>Count IRIS that are in a box represented by given coordinates.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>lat1</code></strong></dt> +<dd>a float for latitude of the first point of the box</dd> +<dt><strong><code>lng1</code></strong></dt> +<dd>a float for longitude of the first point of the box</dd> +<dt><strong><code>lat2</code></strong></dt> +<dd>a float for latitude of the second point of the box</dd> +<dt><strong><code>lng2</code></strong></dt> +<dd>a float for longitude of the second point of the box</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>an integer representing the number</code> of <code>IRIS that are in the box defined by given latitudes and longitudes</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def count_iris_for_polygon(lat1, lng1, lat2, lng2): + """ + Count IRIS that are in a box represented by given coordinates. + + Args: + lat1: a float for latitude of the first point of the box + lng1: a float for longitude of the first point of the box + lat2: a float for latitude of the second point of the box + lng2: a float for longitude of the second point of the box + + Returns: + an integer representing the number of IRIS that are in the box defined by given latitudes and longitudes + """ + polygon = db.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) + iris = db.geo_within(iris_collection, polygon) + if iris is None: return 0 + return len(iris)</code></pre> +</details> +</dd> +<dt id="model.get_coords_from_code"><code class="name flex"> +<span>def <span class="ident">get_coords_from_code</span></span>(<span>code)</span> +</code></dt> +<dd> +<div class="desc"><p>Get geometry of the IRIS corresponding to the given code.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>code</code></strong></dt> +<dd>a string corresponding to the code of the IRIS. The code should be a 9 digits string</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a list representing coordinates</code> of <code>the IRIS</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_coords_from_code(code): + """ + Get geometry of the IRIS corresponding to the given code. + + Args: + code: a string corresponding to the code of the IRIS. The code should be a 9 digits string + + Returns: + a list representing coordinates of the IRIS + """ + iris = db.get_iris_from_code(code) + if iris: + return db.get_geojson_polygon(iris["geometry"]["coordinates"]) + else: + return None</code></pre> +</details> +</dd> +<dt id="model.get_indicators_dict"><code class="name flex"> +<span>def <span class="ident">get_indicators_dict</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Get all INSEE indicators that are stored in the database. This corresponds to the collection 'collindic'.</p> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a dictionary containing all names</code> of <code>indicators, e.g. {'POP0002': 'Population aged from 0 to 2 y.o.', ..., 'P14_RP_MAISON': 'Number</code> of <code>principal residences'}</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_indicators_dict(): + """ + Get all INSEE indicators that are stored in the database. This corresponds to the collection 'collindic'. + + Returns: + a dictionary containing all names of indicators, e.g. {'POP0002': 'Population aged from 0 to 2 y.o.', ..., 'P14_RP_MAISON': 'Number of principal residences'} + """ + list_indicators = db.find_all(db.collection_indic) + return {indicator["short_label"]: indicator["full_label"] for indicator in list_indicators}</code></pre> +</details> +</dd> +<dt id="model.get_indicators_list"><code class="name flex"> +<span>def <span class="ident">get_indicators_list</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Get all INSEE indicators that are stored in the database. This corresponds to the collection 'collindic'.</p> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a list containing all names</code> of <code>indicators, e.g. ['POP0002', 'POP0204', ..., 'P14_RP_MAISON']</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_indicators_list(): + """ + Get all INSEE indicators that are stored in the database. This corresponds to the collection 'collindic'. + + Returns: + a list containing all names of indicators, e.g. ['POP0002', 'POP0204', ..., 'P14_RP_MAISON'] + """ + list_indicators = db.find_all(db.collection_indic) + return [indicator["short_label"] for indicator in list_indicators]</code></pre> +</details> +</dd> +<dt id="model.get_iris_for_polygon"><code class="name flex"> +<span>def <span class="ident">get_iris_for_polygon</span></span>(<span>lat1, lng1, lat2, lng2)</span> +</code></dt> +<dd> +<div class="desc"><p>Get IRIS that are in a box represented by given coordinates.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>lat1</code></strong></dt> +<dd>a float for latitude of the first point of the box</dd> +<dt><strong><code>lng1</code></strong></dt> +<dd>a float for longitude of the first point of the box</dd> +<dt><strong><code>lat2</code></strong></dt> +<dd>a float for latitude of the second point of the box</dd> +<dt><strong><code>lng2</code></strong></dt> +<dd>a float for longitude of the second point of the box</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a list</code> of <code>IRIS that are in the box defined by given latitudes and longitudes</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_iris_for_polygon(lat1, lng1, lat2, lng2): + """ + Get IRIS that are in a box represented by given coordinates. + + Args: + lat1: a float for latitude of the first point of the box + lng1: a float for longitude of the first point of the box + lat2: a float for latitude of the second point of the box + lng2: a float for longitude of the second point of the box + + Returns: + a list of IRIS that are in the box defined by given latitudes and longitudes + """ + # polygon = db.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) + polygon = Mongiris.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) + # iris = db.geo_within(iris_collection, polygon) + iris = db.intersect(iris_collection, polygon) + return iris</code></pre> +</details> +</dd> +<dt id="model.get_iris_from_code"><code class="name flex"> +<span>def <span class="ident">get_iris_from_code</span></span>(<span>code_iris)</span> +</code></dt> +<dd> +<div class="desc"><p>Get an IRIS given its code (9 digits).</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>code_iris</code></strong></dt> +<dd>a string corresponding to the code of the IRIS (the code should be a 9 digits string)</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>an object that represents the IRIS corresponding to the given code</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_iris_from_code(code_iris): + """ + Get an IRIS given its code (9 digits). + + Args: + code_iris: a string corresponding to the code of the IRIS (the code should be a 9 digits string) + + Returns: + an object that represents the IRIS corresponding to the given code + """ + iris = db.get_iris_from_code(code_iris) + return iris</code></pre> +</details> +</dd> +<dt id="model.get_iris_from_name"><code class="name flex"> +<span>def <span class="ident">get_iris_from_name</span></span>(<span>name)</span> +</code></dt> +<dd> +<div class="desc"><p>Get an IRIS given its name.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>name</code></strong></dt> +<dd>a string corresponding to the name of the IRIS</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>an object that represents the IRIS corresponding to the given name</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_iris_from_name(name): + """ + Get an IRIS given its name. + + Args: + name: a string corresponding to the name of the IRIS + + Returns: + an object that represents the IRIS corresponding to the given name + """ + # the query string (name) is searched in both the name of the iris and the name of the city + regx = re.compile(name, re.IGNORECASE) + query_clause = {"$or": [{"properties.NOM_IRIS": {"$regex": regx}}, {"properties.NOM_COM": {"$regex": regx}}]} + iris = db.find_documents(iris_collection, query_clause) + return iris</code></pre> +</details> +</dd> +<dt id="model.parse_json_to_dict"><code class="name flex"> +<span>def <span class="ident">parse_json_to_dict</span></span>(<span>json_file_path)</span> +</code></dt> +<dd> +<div class="desc"><p>Convert a JSON file to a dictionary object.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>json_file_path</code></strong></dt> +<dd>a string containing the path of the file to load</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a dictionary containing the data in the file located in the given path</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def parse_json_to_dict(json_file_path): + """ + Convert a JSON file to a dictionary object. + + Args: + json_file_path: a string containing the path of the file to load + + Returns: + a dictionary containing the data in the file located in the given path + """ + assert(os.path.isfile(json_file_path)) + with open(json_file_path) as data_file: + data = json.load(data_file) + data_file.close() + return data</code></pre> +</details> +</dd> +</dl> +</section> +<section> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-functions">Functions</a></h3> +<ul class=""> +<li><code><a title="model.count_iris_for_polygon" href="#model.count_iris_for_polygon">count_iris_for_polygon</a></code></li> +<li><code><a title="model.get_coords_from_code" href="#model.get_coords_from_code">get_coords_from_code</a></code></li> +<li><code><a title="model.get_indicators_dict" href="#model.get_indicators_dict">get_indicators_dict</a></code></li> +<li><code><a title="model.get_indicators_list" href="#model.get_indicators_list">get_indicators_list</a></code></li> +<li><code><a title="model.get_iris_for_polygon" href="#model.get_iris_for_polygon">get_iris_for_polygon</a></code></li> +<li><code><a title="model.get_iris_from_code" href="#model.get_iris_from_code">get_iris_from_code</a></code></li> +<li><code><a title="model.get_iris_from_name" href="#model.get_iris_from_name">get_iris_from_name</a></code></li> +<li><code><a title="model.parse_json_to_dict" href="#model.parse_json_to_dict">parse_json_to_dict</a></code></li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/predict.html b/doc/predict.html new file mode 100644 index 00000000..78813fa5 --- /dev/null +++ b/doc/predict.html @@ -0,0 +1,345 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>predict API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>predict</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import logging +from collections import OrderedDict + +from predihood.classes.Data import Data +from predihood.classes.Dataset import Dataset +from predihood.classes.MethodPrediction import MethodPrediction +from predihood.config import ENVIRONMENT_VARIABLES, TRAIN_SIZE, TEST_SIZE +from predihood.selection import retrieve_lists +from predihood.utility_functions import check_dataset_size, get_most_frequent +from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier +from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import LogisticRegression, SGDClassifier +from sklearn.neighbors import KNeighborsClassifier, NearestCentroid +from sklearn.svm import SVC +from sklearn.utils._testing import ignore_warnings + +log = logging.getLogger(__name__) + + +# define classifiers used in the experimental validation +CLASSIFIERS = [ + LogisticRegression(penalty="l2"), + KNeighborsClassifier(n_neighbors=30), + RandomForestClassifier(n_estimators=300), + SVC(kernel="rbf"), + SGDClassifier(), + NearestCentroid(), + AdaBoostClassifier() +] + + +@ignore_warnings(category=ConvergenceWarning) +def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=False, remove_rural=False): + """ + Compute accuracies for each EV and each list with the given classifier. + + Args: + data: a Data object that contains assessed IRIS and their attributes + clf: an object which is a classifier (with `fit` and `predict` methods) + train_size: an integer corresponding to the size of the training sample + test_size: an integer corresponding to the size of the test sample + remove_outliers: True to remove from the dataset IRIS that are detected as outliers, False else + remove_rural: True to remove IRIS that are in the countryside to avoid bias while predicting, False else + + Returns: + a dictionary of results for each EV and each list of selected indicators + """ + log.info("... Computing accuracies ...") + train_size, test_size = check_dataset_size(train_size, test_size) + + data_not_filtered = Data(normalization="density", filtering=False) + data_not_filtered.init_all_in_one() + + lists = retrieve_lists() + results = {} + for j, env in enumerate(ENVIRONMENT_VARIABLES): + results[env] = OrderedDict() + log.debug("--- %s ---", env) + + dataset = Dataset(data_not_filtered, env, selected_indicators=data_not_filtered.indicators, train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised') + dataset.init_all_in_one() + if remove_rural: dataset.remove_rural_iris() + + mean_classifier = 0.0 + algo = MethodPrediction(name="", dataset=dataset, classifier=clf) + algo.fit() + algo.compute_performance() + results[env]["accuracy_none"] = algo.accuracy + results[env]["accuracies"] = OrderedDict() + log.info("accuracy for %s without filtering: %f", env, algo.accuracy) + + for top_k, lst in lists.items(): + dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised') + dataset.init_all_in_one() + if remove_rural: dataset.remove_rural_iris() + algo2 = MethodPrediction(name='', dataset=dataset, classifier=clf) + algo2.fit() + algo2.compute_performance() + mean_classifier += algo2.accuracy + results[env]["accuracies"][str(top_k)] = algo2.accuracy + log.info("accuracy for %s with %s: %f", env, top_k, algo2.accuracy) + print("means:", results[env]) + mean_classifier /= len(CLASSIFIERS) + results[env]["mean"] = mean_classifier + log.info("mean for classifier: %f", mean_classifier) + results = OrderedDict(results) + log.info(results) + return results + + +def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outliers=False): + """ + Predict the 6 EV for the given IRIS, the given data and the given classifier. + + Args: + iris_code: a string that contains the code of the IRIS (9 digits) + data: a Data object on which classifier will learn + clf: an object (classifier) to perform the prediction + train_size: an integer corresponding to the size of the train sample + test_size: an integer corresponding to the size of the test sample + remove_outliers: True to remove from the dataset IRIS that are detected as outliers, False else + + Returns: + A dictionary containing predictions for each EV. + """ + train_size, test_size = check_dataset_size(train_size, test_size) + lists = retrieve_lists() + + predictions = {} + for j, env in enumerate(ENVIRONMENT_VARIABLES): + log.info("predicting for %s", env) + predictions_lst = [] + for top_k, lst in lists.items(): + dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type="supervised") + dataset.init_all_in_one() + algorithm = MethodPrediction(name='', dataset=dataset, classifier=clf) + algorithm.fit() + algorithm.predict(iris_code) + predictions_lst.append(algorithm.prediction) + predictions[env] = get_most_frequent(predictions_lst) # get the most frequent value and the number of occurrences + print(predictions) # TODO: give an example of the dictionary + return predictions + + +if __name__ == '__main__': + + # Create data + data = Data(normalization="population", filtering=True) + data.init_all_in_one() + + # Compute accuracies for each EV and each top-k + compute_all_accuracies(data, RandomForestClassifier(), TRAIN_SIZE, TEST_SIZE) + + # Predict EV of the "Part-Dieu" IRIS, which is he CBD of Lyon (Central Business District) + predict_one_iris("693830301", data, RandomForestClassifier(), TRAIN_SIZE, TEST_SIZE)</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-functions">Functions</h2> +<dl> +<dt id="predict.compute_all_accuracies"><code class="name flex"> +<span>def <span class="ident">compute_all_accuracies</span></span>(<span>data, clf, train_size, test_size, remove_outliers=False, remove_rural=False)</span> +</code></dt> +<dd> +<div class="desc"><p>Compute accuracies for each EV and each list with the given classifier.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>data</code></strong></dt> +<dd>a Data object that contains assessed IRIS and their attributes</dd> +<dt><strong><code>clf</code></strong></dt> +<dd>an object which is a classifier (with <code>fit</code> and <code><a title="predict" href="#predict">predict</a></code> methods)</dd> +<dt><strong><code>train_size</code></strong></dt> +<dd>an integer corresponding to the size of the training sample</dd> +<dt><strong><code>test_size</code></strong></dt> +<dd>an integer corresponding to the size of the test sample</dd> +<dt><strong><code>remove_outliers</code></strong></dt> +<dd>True to remove from the dataset IRIS that are detected as outliers, False else</dd> +<dt><strong><code>remove_rural</code></strong></dt> +<dd>True to remove IRIS that are in the countryside to avoid bias while predicting, False else</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a dictionary</code> of <code>results for each EV and each list</code> of <code>selected indicators</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">@ignore_warnings(category=ConvergenceWarning) +def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=False, remove_rural=False): + """ + Compute accuracies for each EV and each list with the given classifier. + + Args: + data: a Data object that contains assessed IRIS and their attributes + clf: an object which is a classifier (with `fit` and `predict` methods) + train_size: an integer corresponding to the size of the training sample + test_size: an integer corresponding to the size of the test sample + remove_outliers: True to remove from the dataset IRIS that are detected as outliers, False else + remove_rural: True to remove IRIS that are in the countryside to avoid bias while predicting, False else + + Returns: + a dictionary of results for each EV and each list of selected indicators + """ + log.info("... Computing accuracies ...") + train_size, test_size = check_dataset_size(train_size, test_size) + + data_not_filtered = Data(normalization="density", filtering=False) + data_not_filtered.init_all_in_one() + + lists = retrieve_lists() + results = {} + for j, env in enumerate(ENVIRONMENT_VARIABLES): + results[env] = OrderedDict() + log.debug("--- %s ---", env) + + dataset = Dataset(data_not_filtered, env, selected_indicators=data_not_filtered.indicators, train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised') + dataset.init_all_in_one() + if remove_rural: dataset.remove_rural_iris() + + mean_classifier = 0.0 + algo = MethodPrediction(name="", dataset=dataset, classifier=clf) + algo.fit() + algo.compute_performance() + results[env]["accuracy_none"] = algo.accuracy + results[env]["accuracies"] = OrderedDict() + log.info("accuracy for %s without filtering: %f", env, algo.accuracy) + + for top_k, lst in lists.items(): + dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised') + dataset.init_all_in_one() + if remove_rural: dataset.remove_rural_iris() + algo2 = MethodPrediction(name='', dataset=dataset, classifier=clf) + algo2.fit() + algo2.compute_performance() + mean_classifier += algo2.accuracy + results[env]["accuracies"][str(top_k)] = algo2.accuracy + log.info("accuracy for %s with %s: %f", env, top_k, algo2.accuracy) + print("means:", results[env]) + mean_classifier /= len(CLASSIFIERS) + results[env]["mean"] = mean_classifier + log.info("mean for classifier: %f", mean_classifier) + results = OrderedDict(results) + log.info(results) + return results</code></pre> +</details> +</dd> +<dt id="predict.predict_one_iris"><code class="name flex"> +<span>def <span class="ident">predict_one_iris</span></span>(<span>iris_code, data, clf, train_size, test_size, remove_outliers=False)</span> +</code></dt> +<dd> +<div class="desc"><p>Predict the 6 EV for the given IRIS, the given data and the given classifier.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>iris_code</code></strong></dt> +<dd>a string that contains the code of the IRIS (9 digits)</dd> +<dt><strong><code>data</code></strong></dt> +<dd>a Data object on which classifier will learn</dd> +<dt><strong><code>clf</code></strong></dt> +<dd>an object (classifier) to perform the prediction</dd> +<dt><strong><code>train_size</code></strong></dt> +<dd>an integer corresponding to the size of the train sample</dd> +<dt><strong><code>test_size</code></strong></dt> +<dd>an integer corresponding to the size of the test sample</dd> +<dt><strong><code>remove_outliers</code></strong></dt> +<dd>True to remove from the dataset IRIS that are detected as outliers, False else</dd> +</dl> +<h2 id="returns">Returns</h2> +<p>A dictionary containing predictions for each EV.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outliers=False): + """ + Predict the 6 EV for the given IRIS, the given data and the given classifier. + + Args: + iris_code: a string that contains the code of the IRIS (9 digits) + data: a Data object on which classifier will learn + clf: an object (classifier) to perform the prediction + train_size: an integer corresponding to the size of the train sample + test_size: an integer corresponding to the size of the test sample + remove_outliers: True to remove from the dataset IRIS that are detected as outliers, False else + + Returns: + A dictionary containing predictions for each EV. + """ + train_size, test_size = check_dataset_size(train_size, test_size) + lists = retrieve_lists() + + predictions = {} + for j, env in enumerate(ENVIRONMENT_VARIABLES): + log.info("predicting for %s", env) + predictions_lst = [] + for top_k, lst in lists.items(): + dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type="supervised") + dataset.init_all_in_one() + algorithm = MethodPrediction(name='', dataset=dataset, classifier=clf) + algorithm.fit() + algorithm.predict(iris_code) + predictions_lst.append(algorithm.prediction) + predictions[env] = get_most_frequent(predictions_lst) # get the most frequent value and the number of occurrences + print(predictions) # TODO: give an example of the dictionary + return predictions</code></pre> +</details> +</dd> +</dl> +</section> +<section> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-functions">Functions</a></h3> +<ul class=""> +<li><code><a title="predict.compute_all_accuracies" href="#predict.compute_all_accuracies">compute_all_accuracies</a></code></li> +<li><code><a title="predict.predict_one_iris" href="#predict.predict_one_iris">predict_one_iris</a></code></li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/selection.html b/doc/selection.html new file mode 100644 index 00000000..58c8aa73 --- /dev/null +++ b/doc/selection.html @@ -0,0 +1,406 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>selection API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>selection</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import json +import logging +import numpy as np +import os +import pandas as pd + +from predihood.classes.Data import Data +from predihood.classes.Dataset import Dataset +from predihood.classes.MethodSelection import MethodSelection +from predihood.config import TOPS_K, ENVIRONMENT_VARIABLES, FOLDER_SELECTED_INDICATORS, FILE_HIERARCHY +from predihood.utility_functions import apply_hierarchy +from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier + +log = logging.getLogger(__name__) + + +def generate_all_data(): + """ + Generate all datasets, i.e. one with density normalization, one for population normalization and one without normalization. + The three are filtered. Generated datasets are located in generated_files/datasets. + """ + data = Data(normalization="density", filtering=True) + data.init_all_in_one() + data = Data(normalization="population", filtering=True) + data.init_all_in_one() + data = Data(normalization=None, filtering=True) + data.init_all_in_one() + + +def retrieve_lists(top_k=None): + """ + Get all lists generated for each top-k or for the given top-k if provided. + + Args: + top_k: a integer corresponding to the top-k of the list to retrieve + + Returns: + a dictionary containing the lists of indicators for each top-k + """ + lists = {} + + if top_k: + # a top-k is specified, so the list of indicators of size top-k is retrieved + lst = retrieve_one_list(top_k) + lists[str(top_k)] = lst + else: + # no top-k is provided, so all lists form 10 to 100 indicators are retrieved + for top_k in TOPS_K: + lst = retrieve_one_list(top_k) + lists[str(top_k)] = lst + return lists + + +def retrieve_one_list(top_k): + """ + Retrieve the list of selected INSEE indicators corresponding to the given top-k. + + Args: + top_k: an integer corresponding to the size of the list to retrieve + + Returns: + a list containing indicators of the list of size top-k + """ + if not os.path.exists(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv")): generate_lists() + indicators_csv = pd.read_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv"), header=None) + lst = json.loads(indicators_csv.drop(indicators_csv.columns[0], axis=1).to_json(orient="index")) + list_temp = {} + for key, value in lst.items(): + list_temp[ENVIRONMENT_VARIABLES[int(key)]] = [value2 for key2, value2 in value.items() if value2 is not None] + return list_temp + + +def generate_lists(): + """ + Generates lists of INSEE indicators that are relevant for prediction. + This selection process is based on: + - removing fully correlated indicators + - select a limited number among the most relevant indicators (using Random Forest and Extra Tree classifiers) + - taking into account the diversity of categories of INSEE indicators based on a hierarchy of these indicators + """ + # 1. Create data + data = Data(normalization="density", filtering=True) + data.init_all_in_one() + + # 2. Run heat map and get fully correlated indicators by using a correlation matrtix + dataset = Dataset(data, "building_type", "unsupervised") + dataset.init_all_in_one() + heat_map = MethodSelection(name="heat map", dataset=dataset, parameters={"method": "spearman"}) + heat_map.compute_selection() + fully_correlated_indicators = heat_map.best_indicators + log.info("fully correlated indicators: %d %s", len(fully_correlated_indicators), ", ".join(fully_correlated_indicators)) + + # 3. Select a limited number (top-k) of indicators by using Random Forest and Extra Tree classifiers. + # Then take into account the diversity of indicators by using a hierarchy of the INSEE indicators + hierarchy = pd.read_csv(FILE_HIERARCHY, sep="\t") + + # 4. For each size of list (i.e. for each top-k), select a limited number of indicators with Extra Tree and Random Forest classifiers + # Then merge the two results to obtain a single list of relevant indicators. + for top_k in TOPS_K: + log.info("constructing list of %d indicators", top_k) + all_lists = [] # to keep lists of indicators for each EV (for the current top-k) + + for env in ENVIRONMENT_VARIABLES: + dataset = Dataset(data, env, indicators_to_remove=fully_correlated_indicators, _type="supervised") + dataset.init_all_in_one() + + # a. get best indicators according to Extra Tree classifier + fi_et = MethodSelection(name="feature importance ET", dataset=dataset, classifier=ExtraTreesClassifier(), parameters={"top_k": top_k}) + fi_et.fit() + fi_et.compute_selection() + best_indicators_ET = fi_et.best_indicators + + # b. get best indicators according to Random Forest classifier + fi_rf = MethodSelection(name="feature importance RF", dataset=dataset, classifier=RandomForestClassifier(), parameters={"top_k": top_k}) + fi_rf.fit() + fi_rf.compute_selection() + best_indicators_RF = fi_rf.best_indicators + + # c. merge indicators that have been selected by ET and RF classifiers + # in this step, if an indicator have been selected by the two classifiers, its score is the addition of its score for RF and the one for ET. + best_indicators_ET.extend(best_indicators_RF) + all_selected_indicators = best_indicators_ET # all selected indicators, i.e. union between RF and ET + keys = set([element[0] for element in all_selected_indicators]) # store indicators' names selected + merged_indicators_temp = {key: 0 for key in keys} + + for i in range(len(all_selected_indicators)): + indicator = all_selected_indicators[i] + merged_indicators_temp[indicator[0]] += indicator[1] # adding score + + # transform it into a list of sub-lists, e.g. [[indicator1, score1], ..., indicatorN, scoreN]] + merged_indicators = [[key, merged_indicators_temp[key]] for key in merged_indicators_temp] + + # d. apply hierarchy on selected indicators to taking into account the diversity of categories of indicators + indicators_hierarchy = apply_hierarchy(merged_indicators, hierarchy) + + # get the names of each selected indicator + selected_indicators_names = [indicator[0] for indicator in indicators_hierarchy] + + # e. add uncorrelated indicators of heat map to the lists + all_lists.append(selected_indicators_names) + + # C. Transform lists of selected indicators for the current top-k and save it as a CSV file + # indexes = {i: ENVIRONMENT_VARIABLES[i] for i in range(len(ENVIRONMENT_VARIABLES))} + selected_indicators = pd.DataFrame(np.array(all_lists).tolist()) + selected_indicators.to_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list"+str(top_k)+".csv"), header=False) + + +if __name__ == "__main__": + generate_all_data() + generate_lists()</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-functions">Functions</h2> +<dl> +<dt id="selection.generate_all_data"><code class="name flex"> +<span>def <span class="ident">generate_all_data</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Generate all datasets, i.e. one with density normalization, one for population normalization and one without normalization. +The three are filtered. Generated datasets are located in generated_files/datasets.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def generate_all_data(): + """ + Generate all datasets, i.e. one with density normalization, one for population normalization and one without normalization. + The three are filtered. Generated datasets are located in generated_files/datasets. + """ + data = Data(normalization="density", filtering=True) + data.init_all_in_one() + data = Data(normalization="population", filtering=True) + data.init_all_in_one() + data = Data(normalization=None, filtering=True) + data.init_all_in_one()</code></pre> +</details> +</dd> +<dt id="selection.generate_lists"><code class="name flex"> +<span>def <span class="ident">generate_lists</span></span>(<span>)</span> +</code></dt> +<dd> +<div class="desc"><p>Generates lists of INSEE indicators that are relevant for prediction. +This selection process is based on: +- removing fully correlated indicators +- select a limited number among the most relevant indicators (using Random Forest and Extra Tree classifiers) +- taking into account the diversity of categories of INSEE indicators based on a hierarchy of these indicators</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def generate_lists(): + """ + Generates lists of INSEE indicators that are relevant for prediction. + This selection process is based on: + - removing fully correlated indicators + - select a limited number among the most relevant indicators (using Random Forest and Extra Tree classifiers) + - taking into account the diversity of categories of INSEE indicators based on a hierarchy of these indicators + """ + # 1. Create data + data = Data(normalization="density", filtering=True) + data.init_all_in_one() + + # 2. Run heat map and get fully correlated indicators by using a correlation matrtix + dataset = Dataset(data, "building_type", "unsupervised") + dataset.init_all_in_one() + heat_map = MethodSelection(name="heat map", dataset=dataset, parameters={"method": "spearman"}) + heat_map.compute_selection() + fully_correlated_indicators = heat_map.best_indicators + log.info("fully correlated indicators: %d %s", len(fully_correlated_indicators), ", ".join(fully_correlated_indicators)) + + # 3. Select a limited number (top-k) of indicators by using Random Forest and Extra Tree classifiers. + # Then take into account the diversity of indicators by using a hierarchy of the INSEE indicators + hierarchy = pd.read_csv(FILE_HIERARCHY, sep="\t") + + # 4. For each size of list (i.e. for each top-k), select a limited number of indicators with Extra Tree and Random Forest classifiers + # Then merge the two results to obtain a single list of relevant indicators. + for top_k in TOPS_K: + log.info("constructing list of %d indicators", top_k) + all_lists = [] # to keep lists of indicators for each EV (for the current top-k) + + for env in ENVIRONMENT_VARIABLES: + dataset = Dataset(data, env, indicators_to_remove=fully_correlated_indicators, _type="supervised") + dataset.init_all_in_one() + + # a. get best indicators according to Extra Tree classifier + fi_et = MethodSelection(name="feature importance ET", dataset=dataset, classifier=ExtraTreesClassifier(), parameters={"top_k": top_k}) + fi_et.fit() + fi_et.compute_selection() + best_indicators_ET = fi_et.best_indicators + + # b. get best indicators according to Random Forest classifier + fi_rf = MethodSelection(name="feature importance RF", dataset=dataset, classifier=RandomForestClassifier(), parameters={"top_k": top_k}) + fi_rf.fit() + fi_rf.compute_selection() + best_indicators_RF = fi_rf.best_indicators + + # c. merge indicators that have been selected by ET and RF classifiers + # in this step, if an indicator have been selected by the two classifiers, its score is the addition of its score for RF and the one for ET. + best_indicators_ET.extend(best_indicators_RF) + all_selected_indicators = best_indicators_ET # all selected indicators, i.e. union between RF and ET + keys = set([element[0] for element in all_selected_indicators]) # store indicators' names selected + merged_indicators_temp = {key: 0 for key in keys} + + for i in range(len(all_selected_indicators)): + indicator = all_selected_indicators[i] + merged_indicators_temp[indicator[0]] += indicator[1] # adding score + + # transform it into a list of sub-lists, e.g. [[indicator1, score1], ..., indicatorN, scoreN]] + merged_indicators = [[key, merged_indicators_temp[key]] for key in merged_indicators_temp] + + # d. apply hierarchy on selected indicators to taking into account the diversity of categories of indicators + indicators_hierarchy = apply_hierarchy(merged_indicators, hierarchy) + + # get the names of each selected indicator + selected_indicators_names = [indicator[0] for indicator in indicators_hierarchy] + + # e. add uncorrelated indicators of heat map to the lists + all_lists.append(selected_indicators_names) + + # C. Transform lists of selected indicators for the current top-k and save it as a CSV file + # indexes = {i: ENVIRONMENT_VARIABLES[i] for i in range(len(ENVIRONMENT_VARIABLES))} + selected_indicators = pd.DataFrame(np.array(all_lists).tolist()) + selected_indicators.to_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list"+str(top_k)+".csv"), header=False)</code></pre> +</details> +</dd> +<dt id="selection.retrieve_lists"><code class="name flex"> +<span>def <span class="ident">retrieve_lists</span></span>(<span>top_k=None)</span> +</code></dt> +<dd> +<div class="desc"><p>Get all lists generated for each top-k or for the given top-k if provided.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>top_k</code></strong></dt> +<dd>a integer corresponding to the top-k of the list to retrieve</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a dictionary containing the lists</code> of <code>indicators for each top-k</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def retrieve_lists(top_k=None): + """ + Get all lists generated for each top-k or for the given top-k if provided. + + Args: + top_k: a integer corresponding to the top-k of the list to retrieve + + Returns: + a dictionary containing the lists of indicators for each top-k + """ + lists = {} + + if top_k: + # a top-k is specified, so the list of indicators of size top-k is retrieved + lst = retrieve_one_list(top_k) + lists[str(top_k)] = lst + else: + # no top-k is provided, so all lists form 10 to 100 indicators are retrieved + for top_k in TOPS_K: + lst = retrieve_one_list(top_k) + lists[str(top_k)] = lst + return lists</code></pre> +</details> +</dd> +<dt id="selection.retrieve_one_list"><code class="name flex"> +<span>def <span class="ident">retrieve_one_list</span></span>(<span>top_k)</span> +</code></dt> +<dd> +<div class="desc"><p>Retrieve the list of selected INSEE indicators corresponding to the given top-k.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>top_k</code></strong></dt> +<dd>an integer corresponding to the size of the list to retrieve</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a list containing indicators</code> of <code>the list</code> of <code>size top-k</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def retrieve_one_list(top_k): + """ + Retrieve the list of selected INSEE indicators corresponding to the given top-k. + + Args: + top_k: an integer corresponding to the size of the list to retrieve + + Returns: + a list containing indicators of the list of size top-k + """ + if not os.path.exists(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv")): generate_lists() + indicators_csv = pd.read_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv"), header=None) + lst = json.loads(indicators_csv.drop(indicators_csv.columns[0], axis=1).to_json(orient="index")) + list_temp = {} + for key, value in lst.items(): + list_temp[ENVIRONMENT_VARIABLES[int(key)]] = [value2 for key2, value2 in value.items() if value2 is not None] + return list_temp</code></pre> +</details> +</dd> +</dl> +</section> +<section> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-functions">Functions</a></h3> +<ul class=""> +<li><code><a title="selection.generate_all_data" href="#selection.generate_all_data">generate_all_data</a></code></li> +<li><code><a title="selection.generate_lists" href="#selection.generate_lists">generate_lists</a></code></li> +<li><code><a title="selection.retrieve_lists" href="#selection.retrieve_lists">retrieve_lists</a></code></li> +<li><code><a title="selection.retrieve_one_list" href="#selection.retrieve_one_list">retrieve_one_list</a></code></li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/doc/utility_functions.html b/doc/utility_functions.html new file mode 100644 index 00000000..80aa0b07 --- /dev/null +++ b/doc/utility_functions.html @@ -0,0 +1,1264 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" /> +<meta name="generator" content="pdoc 0.8.1" /> +<title>utility_functions API documentation</title> +<meta name="description" content="" /> +<link href='https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.0/normalize.min.css' rel='stylesheet'> +<link href='https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/8.0.0/sanitize.min.css' rel='stylesheet'> +<link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" rel="stylesheet"> +<style>.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style> +<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style> +<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style> +</head> +<body> +<main> +<article id="content"> +<header> +<h1 class="title">Module <code>utility_functions</code></h1> +</header> +<section id="section-intro"> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">import ast +import inspect +import json +import logging +import numpy as np +import pandas as pd +import re +import requests +import stringdist + +from area import area +from predihood import model +from predihood.config import AVAILABLE_CLASSIFIERS, TRAIN_SIZE, TEST_SIZE, ENVIRONMENT_VARIABLES, FOLDER_DATASETS, FOLDER_DISTRIBUTION, TRANSLATION, OLD_PREFIX, NEW_PREFIX, FILE_MANUAL_ASSESSMENT + +log = logging.getLogger(__name__) + + +# 1. IRIS function +def address_to_code(address): + """ + Get IRIS code from address. + + Args: + address: a string containing the address of the iris + + Returns: + a string containing the code of the searched address + """ + response = requests.get("https://pyris.datajazz.io/api/search/", params=[("q", address)]) + json_response = response.json() + return str(json_response["complete_code"]) if "complete_code" in json_response else None + + +def address_to_city(address): + """ + Get city from address. + + Args: + address: a string containing the address of the iris + + Returns: + a string containing the city of the searched address + """ + response = requests.get("https://pyris.datajazz.io/api/search/", params=[("q", address)]) + json_response = response.json() + return str(json_response["name"]) if "name" in json_response else None + + +def append_indicator(raw_indicator, iris, lst, append_col, indicators): + """ + Append the value of an INSEE indicator to a list corresponding to the data of the assessed IRIS (departure or arrival). + + Args: + raw_indicator: a string containing the name of the indicator, e.g. P14_POP + iris: the iris object that contains the indicator + lst: the list where to append the indicator's value + append_col: True to append the indicator to the cols variable + indicators: the list of the columns of the dataset, i.e. the indicators + + Returns: + the list containing indicator' values and the other one containing indicators names. + """ + if raw_indicator in iris["properties"]["raw_indicators"]: + val_indicator = iris["properties"]["raw_indicators"][raw_indicator] + # append NaN if indicator is null, else append the value + lst.append(np.nan) if (val_indicator is None or not val_indicator) else lst.append(val_indicator) + else: + lst.append(np.nan) + if append_col: indicators.append(raw_indicator) + return lst, indicators + + +def append_target(row, target, lst): + """ + Append the target to the current list (departure or arrival). + + Args: + row: a list corresponding to the current row of dataset + target: a string containing the target to append, i.e. the value of the EV + lst: the list where the target is append + + Returns: + the list with the appended target + """ + if target == OLD_PREFIX + "geographical_position" or target == NEW_PREFIX + "geographical_position": + # get only geo position without the city, e.g. South-East Lyon gives South-East + if str(row[target]) == "nan": + city = np.nan + else: + city = row[target].split(" ")[0] + # city_name = ''.join(split_city[1:len(split_city)]) + value = TRANSLATION[city] if city in TRANSLATION else np.nan + lst.append(value) + else: + value = TRANSLATION[row[target]] if row[target] in TRANSLATION else np.nan + lst.append(value) + + return lst + + +def indicator_full_to_short_label(full_label): + """ + Convert the full label of an indicator to its short label. + + Args: + full_label: a string containing the full label of the indicator, e.g. Population 0-2 y.o. in 2014 + + Returns: + the short label of the given indicator, e.g. P14_POP0002 + """ + indicators = model.get_indicators_dict() + key_list = list(indicators.keys()) + val_list = list(indicators.values()) + if full_label not in indicators.values(): + return key_list[val_list.index(full_label + " ")] + else: + return key_list[val_list.index(full_label)] + + +def indicator_short_to_full_label(short_label): + """ + Convert the short label of an indicator to its full label. + + Args: + short_label: a string containing the short label of the indicator, e.g. P14_POP0002 + + Returns: + the full label of the indicator, e.g. Population 0-2 y.o. in 2014 + """ + indicators = model.get_indicators_dict() + return indicators[short_label] + + +def apply_hierarchy(selected_indicators, hierarchy): + """ + Apply hierarchy on the given indicators in order to go up children indicators in their parents. + + Args: + selected_indicators: a list of indicators selected by the feature importance process. + hierarchy: the hierarchy of the indicators, i.e. for each indicator, there are its level in the hierarchy and its first ancestor. + + Returns: + the new selected indicators list, with some children which have go up in their parents. + """ + list_indicators_FI = [selected_indicators[j][0] for j in range(len(selected_indicators))] + indexes_to_remove = [] + for i in range(len(selected_indicators)): + index_row = hierarchy.index[hierarchy["INDICATOR"] == selected_indicators[i][0]].tolist() + if len(index_row) == 0: + continue # pass this indicator because it does not exist in the hierarchy + else: + index_row = index_row[0] + level = hierarchy.iloc[index_row, 2] + ancestor = hierarchy.iloc[index_row, 3] + if level > 1: + if ancestor in list_indicators_FI: + index_ancestor = list_indicators_FI.index(ancestor) if ancestor in list_indicators_FI else None + while index_ancestor in indexes_to_remove: + ancestor2 = hierarchy.iloc[hierarchy.index[hierarchy["INDICATOR"] == ancestor].tolist()[0], 3] # name of ancestor + ancestor = ancestor2 + index_ancestor = list_indicators_FI.index(ancestor) if ancestor in list_indicators_FI else None + if hierarchy.iloc[hierarchy.index[hierarchy["INDICATOR"] == ancestor].tolist()[0], 2] == 1: + break + if index_ancestor not in indexes_to_remove: + selected_indicators[index_ancestor][1] += selected_indicators[i][1] + indexes_to_remove.append(i) + for index in sorted(indexes_to_remove,reverse=True): # remove in reverse order to do not throw off subsequent indexes + del selected_indicators[index] + return selected_indicators + + +def get_classifier(name): + """ + Get an instance of a classifier with its name. + + Args: + name: a string containing the name of the desired classifier + + Returns: + an instance of a classifier + """ + classifier = AVAILABLE_CLASSIFIERS[name] + return classifier() + + +def set_classifier(classifier, parameters): + """ + Tune the classifier with the given parameters. + + Args: + classifier: an instance of the classifier to tune + parameters: a dictionary containing the tuning parameters + + Returns: + an instance of the tuned classifier + """ + keys_clf = list(classifier.get_params().keys()) + # remove None parameters and parameters that don't exist in sklearn (e.g. train and test size) + parameters = {key: value for key, value in parameters.items() if value != "" and key in keys_clf} + classifier.set_params(**parameters) + return classifier + + +def signature(chosen_algorithm): + """ + Get the signature of an algorithm, i.e. its parameters, the default values and the type of each parameter. The documentation of the algorithm must be in NumPy style. + + Args: + chosen_algorithm: the name of the algorithm in str, e.g. RandomForestClassifier + + Returns: + the signature of the given algorithm, i.e. a dictionary containing for each parameter: + - a list of the accepted types + - the default value + - a description of the parameter (e.g. "The train_size parameter aims at tuning the size of the sample during the learning step.") + """ + # special case for no selection + if chosen_algorithm == "Algorithm": return json.dumps({}) + try: + # model = eval(_chosen_algorithm) # never use eval on untrusted strings + model = get_classifier(chosen_algorithm) + doc = model.__doc__ # TODO: specify case when there is no doc (user-implemented algorithm) + param_section = "Parameters" + dashes = "-" * len(param_section) # ------- + number_spaces = doc.find(dashes) - (doc.find(param_section) + len(param_section)) + attribute_section = "Attributes\n" + # sub_doc is the param section of the docs (i.e. without attributes and some text) + sub_doc = doc[doc.find(param_section) + len(param_section) + number_spaces + len(dashes) + len("\n"):doc.find(attribute_section)] + except: + raise Exception("This algorithm does not exist for the moment...") + params = inspect.getfullargspec(model.__init__).args[1:] # get parameter' names -- [1:] to remove self parameter + defaults = inspect.getfullargspec(model.__init__).defaults # get default values + assert len(params) == len(defaults) + parameters = {} + for i in range(len(params)): + param_name = str(params[i]) + " : " + index_param = sub_doc.find(param_name) + index_next_newline = sub_doc[index_param:].find("\n") # find returns the first occurrence + parameter_string = sub_doc[index_param:index_param + index_next_newline] + doc_param = sub_doc[index_param + index_next_newline:] + index_end_sentence = re.search("(\.\s)", doc_param).start() # search for the first sentence + first_sentence = doc_param[:index_end_sentence + 1] + # format first sentence to have a prettier display. + first_sentence = first_sentence.replace("\n", " ") + while " " in first_sentence: + first_sentence = first_sentence.replace(" ", " ") + types_and_default = parameter_string[len(param_name):] + if "{" in types_and_default and "}" in types_and_default: # for cases like {"auto", "kd_tree", "brute"}, optional + types_and_default = types_and_default.replace("{", '') + types_and_default = types_and_default.replace("}", '') + if " or " in types_and_default: types_and_default = types_and_default.replace(" or ", ", ") + types_defaults_split = types_and_default.split(", ") + types = [] + default = -1 + variants = ["optional (default=", "optional (default = ", "optional", "(default=", "(default = ", "default ", + "default: ", "default="] # DO NOT CHANGE THE ORDER OF ITEMS + for item in types_defaults_split: + if not any(value in item for value in variants): + if item.startswith("length"): + pass # exceptions + else: + types.append(item) # item is a type + else: + for value in variants: + if value in item: + if value.startswith("optional ("): + default = item.split(value)[1][:-1] + elif value.startswith("(default"): + default = item.split(value)[1][:-1] + elif value.startswith("default"): + default = item.split(value)[1] + elif value == "optional": + default = "None" + break # do not iterate over other values + if default != -1 and default != "None": + type_of_default = str(type(ast.literal_eval(str(default))).__name__) + else: + type_of_default = "str" + types[:] = ["int" if x == "integer" else x for x in types] # replace "integer" by "int" + types[:] = ["bool" if x == "boolean" else x for x in types] # replace "boolean" by "bool" + types[:] = ["str" if x == "string" else x for x in types] # replace "str" by "string" + if len(types) == 0: types.append(type_of_default) # fill missing types + types[:] = [x for x in types if "None" not in x and "NoneType" not in x] # remove None type + parameters[param_name[:-3]] = {"types": types, "default": default, "description": first_sentence} # -3 to remove " : " + return parameters + + +def check_dataset_size(train_size, test_size): + """ + Check train and test size and update with defaults or divided by 100 if needed. + + Args: + train_size: an integer or a float corresponding to the value for train size (should be between 0 and 1) + test_size: an integer or a float corresponding to the value for test size (should be between 0 and 1) + + Returns: + the train and test sizes + """ + if 0 < train_size < 1 and 0 < test_size < 1 and train_size + test_size == 1: + return train_size, test_size # default case + elif 0 < train_size < 1 and 0 < test_size < 1: + train_size = TRAIN_SIZE # 0.8 + test_size = TEST_SIZE # 0.2 + + if 1 <= train_size < 100 and 1 <= test_size < 100 and train_size + test_size == 100: + return train_size / 100, test_size / 100 # default case + elif 1 <= train_size < 100 and 1 <= test_size < 100: + train_size = TRAIN_SIZE # 0.8 + test_size = TEST_SIZE # 0.2 + return train_size, test_size + + +# 2. list functions +def intersection(lst1, lst2): + """ + Intersect two lists. + + Args: + lst1: a list corresponding to the first list to be intersected + lst2: a list corresponding to the second list to be intersected + + Returns: + a list corresponding to the result of the intersection of the two given lists. + """ + return list(set(lst1) & set(lst2)) + + +def union(lst1, lst2): + """ + Unify two lists without repetitions. + + Args: + lst1: a list corresponding to the first list to append + lst2: a list corresponding to the second list to append + + Returns: + a list corresponding to the union of the two lists + """ + return list(set(lst1) | set(lst2)) + + +def similarity(value, lst): + """ + Check if a value is enough similar to the data. + + Args: + value: the value on which the similarity is computed + lst: the list containing other values to check similarity + + Returns: + the index of the similar value, -1 if no value is enough similar + """ + for i in range(len(lst)): + if value in lst[i]: return -2 # this value is already append + for elem in lst[i]: + if not isinstance(elem, float): + dissimilarity = stringdist.levenshtein(str(elem), str(value)) # compute Levenshtein similarity + if dissimilarity == 1: return i # if the given value has only one difference with the current value, store it as a similarity + return -1 + + +def get_most_frequent(lst): + """ + Get the most frequent item in a list. If many elements are frequent, it returns the first one. + + Args: + lst: the list to find the most frequent element + + Returns: + a dictionary containing the most frequent of the given list and its count + """ + most_frequent_element = max(set(lst), key=lst.count) + dictionary = {"most_frequent": most_frequent_element, "count_frequent": lst.count(most_frequent_element)} + return dictionary + + +# 3. plot functions +def auto_label(rectangles, axes): + """ + Adds a text label above each bar in rectangles, displaying its value. + + Args: + rectangles: the bars of the plot. + axes: the axes of the plot. + """ + for rect in rectangles: + height = rect.get_height() + axes.annotate("{}".format(height), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha="center", va="bottom") + + +def add_assessment_to_file(code_iris, values): + """ + Add an assessed IRIS to the CSV file. + + Args: + code_iris: a string corresponding to the code of the IRIS (9 digits) + values: the values of the 6 EV that represent the environment of the assessed IRIS + + Returns: + the string "okay" if the assessed IRIS has been added to the CSV file + """ + df = pd.read_csv(FILE_MANUAL_ASSESSMENT) + codes = df["CODE"].tolist() + codes_lst = [str(elem) for elem in codes] + if code_iris in codes_lst: + return "iris already assessed" + else: + iris_data = model.get_iris_from_code(code_iris) + iris_coords = model.get_coords_from_code(code_iris) + area_iris = area(iris_coords) / 1000000 if iris_coords is not None else None + density_iris = iris_data["properties"]["raw_indicators"]["P14_POP"] / area_iris if area_iris is not None and area_iris > 0 else None + + iris = [] + cols = ["CODE", "AREA", "DENSITY"] + # adding code, area and density + iris.append(code_iris) + iris.append(area_iris) + iris.append(density_iris) + + # adding insee indicators + indicators = model.get_indicators_list() + indicators_to_remove = ["IRIS", "REG", "DEP", "UU2010", "COM", "LIBCOM", "TRIRIS", "GRD_QUART", "LIBIRIS", "TYP_IRIS", "MODIF_IRIS", "LAB_IRIS", "LIB_IRIS", "LIB_COM", "CODGEO", "LIBGEO"] + for indicator in indicators_to_remove: + if indicator in indicators: + indicators.remove(indicator) + + for raw_indicator in indicators: + iris, cols = append_indicator(raw_indicator, iris_data, iris, True, cols) + + iris.extend(values) # adding assessed values + cols.extend(ENVIRONMENT_VARIABLES) + df = pd.DataFrame([iris]) + df.to_csv(FILE_MANUAL_ASSESSMENT, mode="a", index=False, header=False) # DO NOT ERASE HEADER IN THE CSV FILE + return "okay"</code></pre> +</details> +</section> +<section> +</section> +<section> +</section> +<section> +<h2 class="section-title" id="header-functions">Functions</h2> +<dl> +<dt id="utility_functions.add_assessment_to_file"><code class="name flex"> +<span>def <span class="ident">add_assessment_to_file</span></span>(<span>code_iris, values)</span> +</code></dt> +<dd> +<div class="desc"><p>Add an assessed IRIS to the CSV file.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>code_iris</code></strong></dt> +<dd>a string corresponding to the code of the IRIS (9 digits)</dd> +<dt><strong><code>values</code></strong></dt> +<dd>the values of the 6 EV that represent the environment of the assessed IRIS</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>the string "okay" if the assessed IRIS has been added to the CSV file</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def add_assessment_to_file(code_iris, values): + """ + Add an assessed IRIS to the CSV file. + + Args: + code_iris: a string corresponding to the code of the IRIS (9 digits) + values: the values of the 6 EV that represent the environment of the assessed IRIS + + Returns: + the string "okay" if the assessed IRIS has been added to the CSV file + """ + df = pd.read_csv(FILE_MANUAL_ASSESSMENT) + codes = df["CODE"].tolist() + codes_lst = [str(elem) for elem in codes] + if code_iris in codes_lst: + return "iris already assessed" + else: + iris_data = model.get_iris_from_code(code_iris) + iris_coords = model.get_coords_from_code(code_iris) + area_iris = area(iris_coords) / 1000000 if iris_coords is not None else None + density_iris = iris_data["properties"]["raw_indicators"]["P14_POP"] / area_iris if area_iris is not None and area_iris > 0 else None + + iris = [] + cols = ["CODE", "AREA", "DENSITY"] + # adding code, area and density + iris.append(code_iris) + iris.append(area_iris) + iris.append(density_iris) + + # adding insee indicators + indicators = model.get_indicators_list() + indicators_to_remove = ["IRIS", "REG", "DEP", "UU2010", "COM", "LIBCOM", "TRIRIS", "GRD_QUART", "LIBIRIS", "TYP_IRIS", "MODIF_IRIS", "LAB_IRIS", "LIB_IRIS", "LIB_COM", "CODGEO", "LIBGEO"] + for indicator in indicators_to_remove: + if indicator in indicators: + indicators.remove(indicator) + + for raw_indicator in indicators: + iris, cols = append_indicator(raw_indicator, iris_data, iris, True, cols) + + iris.extend(values) # adding assessed values + cols.extend(ENVIRONMENT_VARIABLES) + df = pd.DataFrame([iris]) + df.to_csv(FILE_MANUAL_ASSESSMENT, mode="a", index=False, header=False) # DO NOT ERASE HEADER IN THE CSV FILE + return "okay"</code></pre> +</details> +</dd> +<dt id="utility_functions.address_to_city"><code class="name flex"> +<span>def <span class="ident">address_to_city</span></span>(<span>address)</span> +</code></dt> +<dd> +<div class="desc"><p>Get city from address.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>address</code></strong></dt> +<dd>a string containing the address of the iris</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a string containing the city</code> of <code>the searched address</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def address_to_city(address): + """ + Get city from address. + + Args: + address: a string containing the address of the iris + + Returns: + a string containing the city of the searched address + """ + response = requests.get("https://pyris.datajazz.io/api/search/", params=[("q", address)]) + json_response = response.json() + return str(json_response["name"]) if "name" in json_response else None</code></pre> +</details> +</dd> +<dt id="utility_functions.address_to_code"><code class="name flex"> +<span>def <span class="ident">address_to_code</span></span>(<span>address)</span> +</code></dt> +<dd> +<div class="desc"><p>Get IRIS code from address.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>address</code></strong></dt> +<dd>a string containing the address of the iris</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a string containing the code</code> of <code>the searched address</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def address_to_code(address): + """ + Get IRIS code from address. + + Args: + address: a string containing the address of the iris + + Returns: + a string containing the code of the searched address + """ + response = requests.get("https://pyris.datajazz.io/api/search/", params=[("q", address)]) + json_response = response.json() + return str(json_response["complete_code"]) if "complete_code" in json_response else None</code></pre> +</details> +</dd> +<dt id="utility_functions.append_indicator"><code class="name flex"> +<span>def <span class="ident">append_indicator</span></span>(<span>raw_indicator, iris, lst, append_col, indicators)</span> +</code></dt> +<dd> +<div class="desc"><p>Append the value of an INSEE indicator to a list corresponding to the data of the assessed IRIS (departure or arrival).</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>raw_indicator</code></strong></dt> +<dd>a string containing the name of the indicator, e.g. P14_POP</dd> +<dt><strong><code>iris</code></strong></dt> +<dd>the iris object that contains the indicator</dd> +<dt><strong><code>lst</code></strong></dt> +<dd>the list where to append the indicator's value</dd> +<dt><strong><code>append_col</code></strong></dt> +<dd>True to append the indicator to the cols variable</dd> +<dt><strong><code>indicators</code></strong></dt> +<dd>the list of the columns of the dataset, i.e. the indicators</dd> +</dl> +<h2 id="returns">Returns</h2> +<p>the list containing indicator' values and the other one containing indicators names.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def append_indicator(raw_indicator, iris, lst, append_col, indicators): + """ + Append the value of an INSEE indicator to a list corresponding to the data of the assessed IRIS (departure or arrival). + + Args: + raw_indicator: a string containing the name of the indicator, e.g. P14_POP + iris: the iris object that contains the indicator + lst: the list where to append the indicator's value + append_col: True to append the indicator to the cols variable + indicators: the list of the columns of the dataset, i.e. the indicators + + Returns: + the list containing indicator' values and the other one containing indicators names. + """ + if raw_indicator in iris["properties"]["raw_indicators"]: + val_indicator = iris["properties"]["raw_indicators"][raw_indicator] + # append NaN if indicator is null, else append the value + lst.append(np.nan) if (val_indicator is None or not val_indicator) else lst.append(val_indicator) + else: + lst.append(np.nan) + if append_col: indicators.append(raw_indicator) + return lst, indicators</code></pre> +</details> +</dd> +<dt id="utility_functions.append_target"><code class="name flex"> +<span>def <span class="ident">append_target</span></span>(<span>row, target, lst)</span> +</code></dt> +<dd> +<div class="desc"><p>Append the target to the current list (departure or arrival).</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>row</code></strong></dt> +<dd>a list corresponding to the current row of dataset</dd> +<dt><strong><code>target</code></strong></dt> +<dd>a string containing the target to append, i.e. the value of the EV</dd> +<dt><strong><code>lst</code></strong></dt> +<dd>the list where the target is append</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>the list with the appended target</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def append_target(row, target, lst): + """ + Append the target to the current list (departure or arrival). + + Args: + row: a list corresponding to the current row of dataset + target: a string containing the target to append, i.e. the value of the EV + lst: the list where the target is append + + Returns: + the list with the appended target + """ + if target == OLD_PREFIX + "geographical_position" or target == NEW_PREFIX + "geographical_position": + # get only geo position without the city, e.g. South-East Lyon gives South-East + if str(row[target]) == "nan": + city = np.nan + else: + city = row[target].split(" ")[0] + # city_name = ''.join(split_city[1:len(split_city)]) + value = TRANSLATION[city] if city in TRANSLATION else np.nan + lst.append(value) + else: + value = TRANSLATION[row[target]] if row[target] in TRANSLATION else np.nan + lst.append(value) + + return lst</code></pre> +</details> +</dd> +<dt id="utility_functions.apply_hierarchy"><code class="name flex"> +<span>def <span class="ident">apply_hierarchy</span></span>(<span>selected_indicators, hierarchy)</span> +</code></dt> +<dd> +<div class="desc"><p>Apply hierarchy on the given indicators in order to go up children indicators in their parents.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>selected_indicators</code></strong></dt> +<dd>a list of indicators selected by the feature importance process.</dd> +<dt><strong><code>hierarchy</code></strong></dt> +<dd>the hierarchy of the indicators, i.e. for each indicator, there are its level in the hierarchy and its first ancestor.</dd> +</dl> +<h2 id="returns">Returns</h2> +<p>the new selected indicators list, with some children which have go up in their parents.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def apply_hierarchy(selected_indicators, hierarchy): + """ + Apply hierarchy on the given indicators in order to go up children indicators in their parents. + + Args: + selected_indicators: a list of indicators selected by the feature importance process. + hierarchy: the hierarchy of the indicators, i.e. for each indicator, there are its level in the hierarchy and its first ancestor. + + Returns: + the new selected indicators list, with some children which have go up in their parents. + """ + list_indicators_FI = [selected_indicators[j][0] for j in range(len(selected_indicators))] + indexes_to_remove = [] + for i in range(len(selected_indicators)): + index_row = hierarchy.index[hierarchy["INDICATOR"] == selected_indicators[i][0]].tolist() + if len(index_row) == 0: + continue # pass this indicator because it does not exist in the hierarchy + else: + index_row = index_row[0] + level = hierarchy.iloc[index_row, 2] + ancestor = hierarchy.iloc[index_row, 3] + if level > 1: + if ancestor in list_indicators_FI: + index_ancestor = list_indicators_FI.index(ancestor) if ancestor in list_indicators_FI else None + while index_ancestor in indexes_to_remove: + ancestor2 = hierarchy.iloc[hierarchy.index[hierarchy["INDICATOR"] == ancestor].tolist()[0], 3] # name of ancestor + ancestor = ancestor2 + index_ancestor = list_indicators_FI.index(ancestor) if ancestor in list_indicators_FI else None + if hierarchy.iloc[hierarchy.index[hierarchy["INDICATOR"] == ancestor].tolist()[0], 2] == 1: + break + if index_ancestor not in indexes_to_remove: + selected_indicators[index_ancestor][1] += selected_indicators[i][1] + indexes_to_remove.append(i) + for index in sorted(indexes_to_remove,reverse=True): # remove in reverse order to do not throw off subsequent indexes + del selected_indicators[index] + return selected_indicators</code></pre> +</details> +</dd> +<dt id="utility_functions.auto_label"><code class="name flex"> +<span>def <span class="ident">auto_label</span></span>(<span>rectangles, axes)</span> +</code></dt> +<dd> +<div class="desc"><p>Adds a text label above each bar in rectangles, displaying its value.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>rectangles</code></strong></dt> +<dd>the bars of the plot.</dd> +<dt><strong><code>axes</code></strong></dt> +<dd>the axes of the plot.</dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def auto_label(rectangles, axes): + """ + Adds a text label above each bar in rectangles, displaying its value. + + Args: + rectangles: the bars of the plot. + axes: the axes of the plot. + """ + for rect in rectangles: + height = rect.get_height() + axes.annotate("{}".format(height), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha="center", va="bottom")</code></pre> +</details> +</dd> +<dt id="utility_functions.check_dataset_size"><code class="name flex"> +<span>def <span class="ident">check_dataset_size</span></span>(<span>train_size, test_size)</span> +</code></dt> +<dd> +<div class="desc"><p>Check train and test size and update with defaults or divided by 100 if needed.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>train_size</code></strong></dt> +<dd>an integer or a float corresponding to the value for train size (should be between 0 and 1)</dd> +<dt><strong><code>test_size</code></strong></dt> +<dd>an integer or a float corresponding to the value for test size (should be between 0 and 1)</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>the train and test sizes</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def check_dataset_size(train_size, test_size): + """ + Check train and test size and update with defaults or divided by 100 if needed. + + Args: + train_size: an integer or a float corresponding to the value for train size (should be between 0 and 1) + test_size: an integer or a float corresponding to the value for test size (should be between 0 and 1) + + Returns: + the train and test sizes + """ + if 0 < train_size < 1 and 0 < test_size < 1 and train_size + test_size == 1: + return train_size, test_size # default case + elif 0 < train_size < 1 and 0 < test_size < 1: + train_size = TRAIN_SIZE # 0.8 + test_size = TEST_SIZE # 0.2 + + if 1 <= train_size < 100 and 1 <= test_size < 100 and train_size + test_size == 100: + return train_size / 100, test_size / 100 # default case + elif 1 <= train_size < 100 and 1 <= test_size < 100: + train_size = TRAIN_SIZE # 0.8 + test_size = TEST_SIZE # 0.2 + return train_size, test_size</code></pre> +</details> +</dd> +<dt id="utility_functions.get_classifier"><code class="name flex"> +<span>def <span class="ident">get_classifier</span></span>(<span>name)</span> +</code></dt> +<dd> +<div class="desc"><p>Get an instance of a classifier with its name.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>name</code></strong></dt> +<dd>a string containing the name of the desired classifier</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>an instance</code> of <code>a classifier</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_classifier(name): + """ + Get an instance of a classifier with its name. + + Args: + name: a string containing the name of the desired classifier + + Returns: + an instance of a classifier + """ + classifier = AVAILABLE_CLASSIFIERS[name] + return classifier()</code></pre> +</details> +</dd> +<dt id="utility_functions.get_most_frequent"><code class="name flex"> +<span>def <span class="ident">get_most_frequent</span></span>(<span>lst)</span> +</code></dt> +<dd> +<div class="desc"><p>Get the most frequent item in a list. If many elements are frequent, it returns the first one.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>lst</code></strong></dt> +<dd>the list to find the most frequent element</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a dictionary containing the most frequent</code> of <code>the given list and its count</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def get_most_frequent(lst): + """ + Get the most frequent item in a list. If many elements are frequent, it returns the first one. + + Args: + lst: the list to find the most frequent element + + Returns: + a dictionary containing the most frequent of the given list and its count + """ + most_frequent_element = max(set(lst), key=lst.count) + dictionary = {"most_frequent": most_frequent_element, "count_frequent": lst.count(most_frequent_element)} + return dictionary</code></pre> +</details> +</dd> +<dt id="utility_functions.indicator_full_to_short_label"><code class="name flex"> +<span>def <span class="ident">indicator_full_to_short_label</span></span>(<span>full_label)</span> +</code></dt> +<dd> +<div class="desc"><p>Convert the full label of an indicator to its short label.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>full_label</code></strong></dt> +<dd>a string containing the full label of the indicator, e.g. Population 0-2 y.o. in 2014</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>the short label</code> of <code>the given indicator, e.g. P14_POP0002</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def indicator_full_to_short_label(full_label): + """ + Convert the full label of an indicator to its short label. + + Args: + full_label: a string containing the full label of the indicator, e.g. Population 0-2 y.o. in 2014 + + Returns: + the short label of the given indicator, e.g. P14_POP0002 + """ + indicators = model.get_indicators_dict() + key_list = list(indicators.keys()) + val_list = list(indicators.values()) + if full_label not in indicators.values(): + return key_list[val_list.index(full_label + " ")] + else: + return key_list[val_list.index(full_label)]</code></pre> +</details> +</dd> +<dt id="utility_functions.indicator_short_to_full_label"><code class="name flex"> +<span>def <span class="ident">indicator_short_to_full_label</span></span>(<span>short_label)</span> +</code></dt> +<dd> +<div class="desc"><p>Convert the short label of an indicator to its full label.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>short_label</code></strong></dt> +<dd>a string containing the short label of the indicator, e.g. P14_POP0002</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>the full label</code> of <code>the indicator, e.g. Population 0-2 y.o. in 2014</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def indicator_short_to_full_label(short_label): + """ + Convert the short label of an indicator to its full label. + + Args: + short_label: a string containing the short label of the indicator, e.g. P14_POP0002 + + Returns: + the full label of the indicator, e.g. Population 0-2 y.o. in 2014 + """ + indicators = model.get_indicators_dict() + return indicators[short_label]</code></pre> +</details> +</dd> +<dt id="utility_functions.intersection"><code class="name flex"> +<span>def <span class="ident">intersection</span></span>(<span>lst1, lst2)</span> +</code></dt> +<dd> +<div class="desc"><p>Intersect two lists.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>lst1</code></strong></dt> +<dd>a list corresponding to the first list to be intersected</dd> +<dt><strong><code>lst2</code></strong></dt> +<dd>a list corresponding to the second list to be intersected</dd> +</dl> +<h2 id="returns">Returns</h2> +<p>a list corresponding to the result of the intersection of the two given lists.</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def intersection(lst1, lst2): + """ + Intersect two lists. + + Args: + lst1: a list corresponding to the first list to be intersected + lst2: a list corresponding to the second list to be intersected + + Returns: + a list corresponding to the result of the intersection of the two given lists. + """ + return list(set(lst1) & set(lst2))</code></pre> +</details> +</dd> +<dt id="utility_functions.set_classifier"><code class="name flex"> +<span>def <span class="ident">set_classifier</span></span>(<span>classifier, parameters)</span> +</code></dt> +<dd> +<div class="desc"><p>Tune the classifier with the given parameters.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>classifier</code></strong></dt> +<dd>an instance of the classifier to tune</dd> +<dt><strong><code>parameters</code></strong></dt> +<dd>a dictionary containing the tuning parameters</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>an instance</code> of <code>the tuned classifier</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def set_classifier(classifier, parameters): + """ + Tune the classifier with the given parameters. + + Args: + classifier: an instance of the classifier to tune + parameters: a dictionary containing the tuning parameters + + Returns: + an instance of the tuned classifier + """ + keys_clf = list(classifier.get_params().keys()) + # remove None parameters and parameters that don't exist in sklearn (e.g. train and test size) + parameters = {key: value for key, value in parameters.items() if value != "" and key in keys_clf} + classifier.set_params(**parameters) + return classifier</code></pre> +</details> +</dd> +<dt id="utility_functions.signature"><code class="name flex"> +<span>def <span class="ident">signature</span></span>(<span>chosen_algorithm)</span> +</code></dt> +<dd> +<div class="desc"><p>Get the signature of an algorithm, i.e. its parameters, the default values and the type of each parameter. The documentation of the algorithm must be in NumPy style.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>chosen_algorithm</code></strong></dt> +<dd>the name of the algorithm in str, e.g. RandomForestClassifier</dd> +</dl> +<h2 id="returns">Returns</h2> +<p>the signature of the given algorithm, i.e. a dictionary containing for each parameter: +- a list of the accepted types +- the default value +- a description of the parameter (e.g. "The train_size parameter aims at tuning the size of the sample during the learning step.")</p></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def signature(chosen_algorithm): + """ + Get the signature of an algorithm, i.e. its parameters, the default values and the type of each parameter. The documentation of the algorithm must be in NumPy style. + + Args: + chosen_algorithm: the name of the algorithm in str, e.g. RandomForestClassifier + + Returns: + the signature of the given algorithm, i.e. a dictionary containing for each parameter: + - a list of the accepted types + - the default value + - a description of the parameter (e.g. "The train_size parameter aims at tuning the size of the sample during the learning step.") + """ + # special case for no selection + if chosen_algorithm == "Algorithm": return json.dumps({}) + try: + # model = eval(_chosen_algorithm) # never use eval on untrusted strings + model = get_classifier(chosen_algorithm) + doc = model.__doc__ # TODO: specify case when there is no doc (user-implemented algorithm) + param_section = "Parameters" + dashes = "-" * len(param_section) # ------- + number_spaces = doc.find(dashes) - (doc.find(param_section) + len(param_section)) + attribute_section = "Attributes\n" + # sub_doc is the param section of the docs (i.e. without attributes and some text) + sub_doc = doc[doc.find(param_section) + len(param_section) + number_spaces + len(dashes) + len("\n"):doc.find(attribute_section)] + except: + raise Exception("This algorithm does not exist for the moment...") + params = inspect.getfullargspec(model.__init__).args[1:] # get parameter' names -- [1:] to remove self parameter + defaults = inspect.getfullargspec(model.__init__).defaults # get default values + assert len(params) == len(defaults) + parameters = {} + for i in range(len(params)): + param_name = str(params[i]) + " : " + index_param = sub_doc.find(param_name) + index_next_newline = sub_doc[index_param:].find("\n") # find returns the first occurrence + parameter_string = sub_doc[index_param:index_param + index_next_newline] + doc_param = sub_doc[index_param + index_next_newline:] + index_end_sentence = re.search("(\.\s)", doc_param).start() # search for the first sentence + first_sentence = doc_param[:index_end_sentence + 1] + # format first sentence to have a prettier display. + first_sentence = first_sentence.replace("\n", " ") + while " " in first_sentence: + first_sentence = first_sentence.replace(" ", " ") + types_and_default = parameter_string[len(param_name):] + if "{" in types_and_default and "}" in types_and_default: # for cases like {"auto", "kd_tree", "brute"}, optional + types_and_default = types_and_default.replace("{", '') + types_and_default = types_and_default.replace("}", '') + if " or " in types_and_default: types_and_default = types_and_default.replace(" or ", ", ") + types_defaults_split = types_and_default.split(", ") + types = [] + default = -1 + variants = ["optional (default=", "optional (default = ", "optional", "(default=", "(default = ", "default ", + "default: ", "default="] # DO NOT CHANGE THE ORDER OF ITEMS + for item in types_defaults_split: + if not any(value in item for value in variants): + if item.startswith("length"): + pass # exceptions + else: + types.append(item) # item is a type + else: + for value in variants: + if value in item: + if value.startswith("optional ("): + default = item.split(value)[1][:-1] + elif value.startswith("(default"): + default = item.split(value)[1][:-1] + elif value.startswith("default"): + default = item.split(value)[1] + elif value == "optional": + default = "None" + break # do not iterate over other values + if default != -1 and default != "None": + type_of_default = str(type(ast.literal_eval(str(default))).__name__) + else: + type_of_default = "str" + types[:] = ["int" if x == "integer" else x for x in types] # replace "integer" by "int" + types[:] = ["bool" if x == "boolean" else x for x in types] # replace "boolean" by "bool" + types[:] = ["str" if x == "string" else x for x in types] # replace "str" by "string" + if len(types) == 0: types.append(type_of_default) # fill missing types + types[:] = [x for x in types if "None" not in x and "NoneType" not in x] # remove None type + parameters[param_name[:-3]] = {"types": types, "default": default, "description": first_sentence} # -3 to remove " : " + return parameters</code></pre> +</details> +</dd> +<dt id="utility_functions.similarity"><code class="name flex"> +<span>def <span class="ident">similarity</span></span>(<span>value, lst)</span> +</code></dt> +<dd> +<div class="desc"><p>Check if a value is enough similar to the data.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>value</code></strong></dt> +<dd>the value on which the similarity is computed</dd> +<dt><strong><code>lst</code></strong></dt> +<dd>the list containing other values to check similarity</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>the index</code> of <code>the similar value, -1 if no value is enough similar</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def similarity(value, lst): + """ + Check if a value is enough similar to the data. + + Args: + value: the value on which the similarity is computed + lst: the list containing other values to check similarity + + Returns: + the index of the similar value, -1 if no value is enough similar + """ + for i in range(len(lst)): + if value in lst[i]: return -2 # this value is already append + for elem in lst[i]: + if not isinstance(elem, float): + dissimilarity = stringdist.levenshtein(str(elem), str(value)) # compute Levenshtein similarity + if dissimilarity == 1: return i # if the given value has only one difference with the current value, store it as a similarity + return -1</code></pre> +</details> +</dd> +<dt id="utility_functions.union"><code class="name flex"> +<span>def <span class="ident">union</span></span>(<span>lst1, lst2)</span> +</code></dt> +<dd> +<div class="desc"><p>Unify two lists without repetitions.</p> +<h2 id="args">Args</h2> +<dl> +<dt><strong><code>lst1</code></strong></dt> +<dd>a list corresponding to the first list to append</dd> +<dt><strong><code>lst2</code></strong></dt> +<dd>a list corresponding to the second list to append</dd> +</dl> +<h2 id="returns">Returns</h2> +<dl> +<dt><code>a list corresponding to the <a title="utility_functions.union" href="#utility_functions.union">union()</a></code> of <code>the two lists</code></dt> +<dd> </dd> +</dl></div> +<details class="source"> +<summary> +<span>Expand source code</span> +</summary> +<pre><code class="python">def union(lst1, lst2): + """ + Unify two lists without repetitions. + + Args: + lst1: a list corresponding to the first list to append + lst2: a list corresponding to the second list to append + + Returns: + a list corresponding to the union of the two lists + """ + return list(set(lst1) | set(lst2))</code></pre> +</details> +</dd> +</dl> +</section> +<section> +</section> +</article> +<nav id="sidebar"> +<h1>Index</h1> +<div class="toc"> +<ul></ul> +</div> +<ul id="index"> +<li><h3><a href="#header-functions">Functions</a></h3> +<ul class=""> +<li><code><a title="utility_functions.add_assessment_to_file" href="#utility_functions.add_assessment_to_file">add_assessment_to_file</a></code></li> +<li><code><a title="utility_functions.address_to_city" href="#utility_functions.address_to_city">address_to_city</a></code></li> +<li><code><a title="utility_functions.address_to_code" href="#utility_functions.address_to_code">address_to_code</a></code></li> +<li><code><a title="utility_functions.append_indicator" href="#utility_functions.append_indicator">append_indicator</a></code></li> +<li><code><a title="utility_functions.append_target" href="#utility_functions.append_target">append_target</a></code></li> +<li><code><a title="utility_functions.apply_hierarchy" href="#utility_functions.apply_hierarchy">apply_hierarchy</a></code></li> +<li><code><a title="utility_functions.auto_label" href="#utility_functions.auto_label">auto_label</a></code></li> +<li><code><a title="utility_functions.check_dataset_size" href="#utility_functions.check_dataset_size">check_dataset_size</a></code></li> +<li><code><a title="utility_functions.get_classifier" href="#utility_functions.get_classifier">get_classifier</a></code></li> +<li><code><a title="utility_functions.get_most_frequent" href="#utility_functions.get_most_frequent">get_most_frequent</a></code></li> +<li><code><a title="utility_functions.indicator_full_to_short_label" href="#utility_functions.indicator_full_to_short_label">indicator_full_to_short_label</a></code></li> +<li><code><a title="utility_functions.indicator_short_to_full_label" href="#utility_functions.indicator_short_to_full_label">indicator_short_to_full_label</a></code></li> +<li><code><a title="utility_functions.intersection" href="#utility_functions.intersection">intersection</a></code></li> +<li><code><a title="utility_functions.set_classifier" href="#utility_functions.set_classifier">set_classifier</a></code></li> +<li><code><a title="utility_functions.signature" href="#utility_functions.signature">signature</a></code></li> +<li><code><a title="utility_functions.similarity" href="#utility_functions.similarity">similarity</a></code></li> +<li><code><a title="utility_functions.union" href="#utility_functions.union">union</a></code></li> +</ul> +</li> +</ul> +</nav> +</main> +<footer id="footer"> +<p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.8.1</a>.</p> +</footer> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> +<script>hljs.initHighlightingOnLoad()</script> +</body> +</html> \ No newline at end of file diff --git a/predihood/classes/Dataset.py b/predihood/classes/Dataset.py index 0b002155..b3bbfbf8 100644 --- a/predihood/classes/Dataset.py +++ b/predihood/classes/Dataset.py @@ -2,6 +2,7 @@ import logging import pandas as pd from predihood.config import TRAIN_SIZE, TEST_SIZE, ENVIRONMENT_VARIABLES, RANDOM_STATE, FILE_ENV +from predihood.utility_functions import check_dataset_size from sklearn.ensemble import IsolationForest from sklearn.model_selection import train_test_split @@ -9,23 +10,23 @@ log = logging.getLogger(__name__) class Dataset: + """ + This class represents assessed IRIS with their indicators ans EV values. There are options, such as removing outliers or rural IRIS. + """ def __init__(self, data, env, _type, selected_indicators=None, indicators_to_remove=None, train_size=TRAIN_SIZE, test_size=TEST_SIZE, outliers=False): """ Constructor of the Dataset class. Initialize attributes. Args: data: an instance of Data class. Don"t forget to initialize data after created it with "init_all_in_one()" method - env: a string representing the environment variable. Must be in ["batiment", "usage", "paysage", "morpho", "geo", "social"] + env: a string representing the EV, i.e. a value in ["building_type", "building_usage", "landscape", "morphological_position", "geographical_position", "social_class"] selected_indicators: a list containing the indicators to keep in the dataset indicators_to_remove: a list containing the indicators to remove in the dataset - train_size: percentage of the dataset used for training. Must be between > 0 and < 1 (e.g. 0.8 represents 80% of the dataset). The sum between train and test sizes must equal to 1 - test_size: percentage of the dataset user for testing. Must be between > 0 and < 1 (e.g. 0.2 represents 20% of the dataset). The sum between train and test sizes must equal to 1 - outliers: True or False to remove outliers from dataset. Outliers are detected with IsolationForest algorithm - """ - # if not isinstance(data, Data): raise Exception("Dataset must be an instance of Data") - # else: self.dataset = data # an instance of Data - # self.data_ref = data # store a copy oof the data object, needed for prediction - self.data = data.data[:] # copy by value, not by reference + train_size: a integer or a float corresponding to the size of the dataset used for training + test_size: a integer or a float corresponding to the size of the dataset used for test + outliers: True or False to remove outliers from dataset (detected with IsolationForest algorithm) + """ + self.data = data.data[:] # data must be a Data object self.indicators = data.indicators[:] self.filtering = data.filtering self.normalization = data.normalization @@ -38,34 +39,28 @@ class Dataset: self.Y_train = None self.X_test = None self.Y_test = None - if env in ENVIRONMENT_VARIABLES: - self.env = env - else: - self.env = "batiment" - self.train_size = train_size - self.test_size = test_size + if env in ENVIRONMENT_VARIABLES: self.env = env + else: self.env = "building_type" + self.train_size, self.test_size = check_dataset_size(train_size, test_size) self.outliers = outliers def init_all_in_one(self): """ - Initialize the dataset by initializing X and Y and generating X_train, Y_train, X_test, Y_test and removing outliers if needed + Initialize the dataset by initializing X and Y ; generating X_train, Y_train, X_test, Y_test ; removing outliers if needed. + When the type is "unsupervised", split data into X and Y is not relevant (as there is no train/test sets). """ if self.type == "supervised": + if self.outliers: + self.remove_outliers() self.init_X() self.init_Y() self.train_test() - if self.outliers: - self.remove_outliers() - self.init_X() - self.init_Y() - self.train_test() def init_X(self): """ Initialize self.X by getting indicators in dataset. """ assert self.data is not None - if self.selected_indicators is not None: # select given indicators self.X = self.data.loc[:, self.selected_indicators] if self.indicators_to_remove: # remove given indicators @@ -83,9 +78,9 @@ class Dataset: def init_Y(self): """ - Initialize self.Y by getting environmental variables in dataset. + Initialize self.Y by getting EV in dataset. """ - if self.data is None: raise Exception("You must have a non empty dataset") # dataset.read() + assert self.data is not None self.Y = self.data[self.env].values def train_test(self): @@ -100,14 +95,14 @@ class Dataset: """ Detect and remove IRIS that are outliers from dataset. """ - isolation_forest = IsolationForest(random_state=RANDOM_STATE, n_estimators=100) + isolation_forest = IsolationForest(random_state=RANDOM_STATE, n_estimators=100) # IsolationForest is used to detect outliers isolation_forest.fit(self.X_train) predictions = isolation_forest.predict(self.X_test) for i in range(len(predictions)): if predictions[i] == -1: code = self.data.loc[i, "CODE"] log.debug(code, "has been removed since it is an outlier.") - self.data = self.data.drop(i, axis=0) # delete rows which are detected as outliers + self.data = self.data.drop(i, axis=0) # delete IRIS that are detected as outliers def remove_rural_iris(self): """ @@ -117,19 +112,17 @@ class Dataset: def get_environment_variable(self): """ - Get information about environment variable + Get values for a given EV for each assessed IRIS and store it in a CSV file. """ - if self.env not in ENVIRONMENT_VARIABLES: - return None - else: - data_env = pd.DataFrame(self.data[['CODE', self.env]]) - data_env.to_csv(FILE_ENV) + assert self.env in ENVIRONMENT_VARIABLES + data_env = pd.DataFrame(self.data[['CODE', self.env]]) + data_env.to_csv(FILE_ENV) def get_all_environmental_variable(self): + """ + Get EV for each assessed IRIS and store it in a CSV file. + """ columns = ['CODE'] columns.extend(ENVIRONMENT_VARIABLES) data_env = pd.DataFrame(self.data[columns]) data_env.to_csv(FILE_ENV) - - def __str__(self): - return "Class Dataset: " + "train_size=" + str(self.train_size) + " test_size=" + str(self.test_size) diff --git a/predihood/classes/Method.py b/predihood/classes/Method.py index 33da2dfe..e227dd76 100644 --- a/predihood/classes/Method.py +++ b/predihood/classes/Method.py @@ -2,17 +2,27 @@ from predihood.config import RANDOM_STATE class Method: + """ + This class represents the general concept of a Method that is applied on data. There are two specific concepts of Method: MethodSelection and MethodPrediction. + """ def __init__(self, name, dataset=None, classifier=None): + """ + Constructor of the Method class. Initialize attributes. + Args: + name: a string that represents the name of the method, e.g. "feature selection" or "correlation matrix" + dataset: a Dataset object on which the method will be applied + classifier: an object that can be used (e.g. fit) on data + """ self.name = name self.dataset = dataset self.classifier = classifier - self.params = None # same as return + self.parameters = None # same as return def fit(self): """ - Fit the classifier on train set. + Fit the classifier on dataset. """ - self.classifier.random_state = RANDOM_STATE + self.classifier.random_state = RANDOM_STATE # defined random_state to have reproducibility if self.dataset.type == "supervised": self.classifier.fit(X=self.dataset.X_train, y=self.dataset.Y_train) else: diff --git a/predihood/classes/MethodCleaning.py b/predihood/classes/MethodCleaning.py index 3ed30cba..49a42fd4 100644 --- a/predihood/classes/MethodCleaning.py +++ b/predihood/classes/MethodCleaning.py @@ -8,21 +8,24 @@ import pandas as pd from predihood.classes.Method import Method from predihood.config import ENVIRONMENT_VARIABLES, FILE_CLEANED_DATA, FOLDER_DISTRIBUTION, OLD_PREFIX, NEW_PREFIX -from predihood.utility_functions import sim, auto_label +from predihood.utility_functions import similarity, auto_label log = logging.getLogger(__name__) warnings.simplefilter(action='ignore', category=FutureWarning) class MethodCleaning(Method): + """ + This class represents the method for cleaning data given by the French company. + """ def __init__(self, name, dataset): """ Constructor of the MethodCleansing class. Initialize attributes. """ Method.__init__(self, name, dataset) - self.values_by_env = {} # dict to store values for each environment variable, e.g. [Maisons, Maison] - self.columns_dep = [] # departure columns, e.g. Abatiment, Ausage,... - self.columns_arr = [] # arrival columns, e.g. Nbatiment, Nusage, ... + self.values_by_env = {} # dictionary to store values for each EV, e.g. [House, Houses] + self.columns_dep = [] # departure columns, e.g. old_building_type, old_building_usage,... + self.columns_arr = [] # arrival columns, e.g. new_building_type, new_building_usage, ... self.columns = { "occupation": { "name_dep": "old_occupation", @@ -50,27 +53,29 @@ class MethodCleaning(Method): # plot variables self.before = {} # departure values for each EV self.after = {} # arrival values for each EV - self.labels = {} # labels for EV, e.g. maisons, immeubles, grand ensemble... + self.labels = {} # labels for EV, e.g. urban, green areas, forest and country-side for the landscape variable def clean(self): """ - Clean data from bad naming conventions. + Clean data from bad naming conventions. The idea of this function is to create a dictionary with all spellings for each value of each EV, e.g. [["Houses", "House"], ["Upper middle", "upper middle", "upper midddle"], ["Green areas"], ["Countryside"]]. + This dictionary is constructed by computing similarities between each values and store each spelling and finally let the user choose the best one. """ - log.info("The data needs to be cleaned. For each list, write the correct word. For each environment variable, you will get its number of corrections and its error rate.") - # 1. getting wrong values in a dict ordered by env variable + # + log.info("The data needs to be cleaned. For each list, write the correct word. For each EV, you will get its number of corrections and its error rate.") + # 1. getting wrong values in a dictionary ordered by env variable self.values_by_env = {} for col_dep, col_arr in zip(self.columns_dep, self.columns_arr): col_name = col_dep.name[len(OLD_PREFIX):] self.values_by_env[col_name] = [] for val in col_dep.unique(): # get possible values for the current column - index = sim(val, self.values_by_env[col_name]) + index = similarity(val, self.values_by_env[col_name]) # if the value is similar to another, add it, else create an new array with it if index >= 0: self.values_by_env[col_name][index].append(val) elif index == -1: self.values_by_env[col_name].append([val]) for val in col_arr.unique(): - index = sim(val, self.values_by_env[col_name]) + index = similarity(val, self.values_by_env[col_name]) if index >= 0: self.values_by_env[col_name][index].append(val) elif index == -1: @@ -94,7 +99,7 @@ class MethodCleaning(Method): self.dataset.loc[self.dataset[col_name_new] == label, col_name_new] = chosen_label size = int(self.dataset.count()[OLD_PREFIX + key]) + int(self.dataset.count()[NEW_PREFIX + key]) mean_error = ((nb_replacement_dep + nb_replacement_arr) / size) * 100 - log.debug("%d IRIS have been corrected for the environment variable %s, corresponding to an error rate of %.0f %%", (nb_replacement_dep + nb_replacement_arr), key, mean_error) + log.debug("%d IRIS have been corrected for the EV %s, corresponding to an error rate of %.0f %%", (nb_replacement_dep + nb_replacement_arr), key, mean_error) # 3. removing outliers from data count = 0 @@ -109,9 +114,11 @@ class MethodCleaning(Method): def create_before_after_labels(self, name_dep, name_arr): """ - Creates the arrays 'before', 'after' and 'labels' from data. - :param name_dep: the name of the departure column, e.g. Aoccup, Abatiment, Ausage... - :param name_arr: the name of the arrival column, e.g. Noccup, Nbatiment, Nusage... + Creates the lists 'before', 'after' and 'labels' from data. + + Args: + name_dep: a string containing the name of the departure column, e.g. old_building_type, old_building_usage... + name_arr: a string containing the name of the arrival column, e.g. new_building_type, new_building_usage... """ all_repartition = {} self.before = {} @@ -137,7 +144,7 @@ class MethodCleaning(Method): else: self.after[status] = value # self.dataset[values_after].value_counts()[status] - # 2. merge before and after data in the sale dict + # 2. merge before and after data in the same dictionary for status in self.before: all_repartition[status] = [self.before[status], 0] for status in self.after: @@ -146,7 +153,7 @@ class MethodCleaning(Method): else: all_repartition[status][1] = self.after[status] - # 3. convert dict in 3 arrays + # 3. convert dictionary in 3 arrays self.before = [] self.after = [] self.labels = [] @@ -159,8 +166,10 @@ class MethodCleaning(Method): def create_bar_chart(self, name, title): """ Plot before/after charts. - :param name: the name of the target to plot, i.e. environment variable, e.g. usage, batiment, ... - :param title: the title of the plot. + + Args: + name: a string containing the name of the EV to plot, e.g. building_type, building_usage, landscape, ... + title: a string containing the title of the plot """ x = np.arange(len(self.labels)) # the label locations width = 0.35 @@ -186,9 +195,13 @@ class MethodCleaning(Method): def to_chart(self, env, name, title): """ Create before/after data and plot it. - :param env: the target to plot, i.e. the environment variable, e.g. usage, paysage... - :param name: the name to save the file. - :param title: the title of the plot. + :param env: + :param name: + :param title: + Args: + env: a string containing the EV to plot, e.g. building_type, building_usage, landscape... + name: a string containing the name to save the file + title: a string containing the title of the plot """ self.create_before_after_labels(self.columns[env]["name_dep"], self.columns[env]["name_arr"]) self.create_bar_chart(name, title) diff --git a/predihood/classes/MethodPrediction.py b/predihood/classes/MethodPrediction.py index 8516c8c4..34d96f37 100644 --- a/predihood/classes/MethodPrediction.py +++ b/predihood/classes/MethodPrediction.py @@ -14,6 +14,9 @@ log = logging.getLogger(__name__) class MethodPrediction(Method): + """ + This class represents a method for predicting EV and compute performance at a national level. + """ def __init__(self, name, dataset, classifier): """ Constructor of the MethodPrediction class. Initialize attributes. @@ -27,14 +30,14 @@ class MethodPrediction(Method): def compute_performance(self): """ - Compute performance metrics like accuracy, confusion matrix, ... + Compute performance metrics, i.e. accuracy. """ scores = cross_val_score(self.classifier, self.dataset.X, self.dataset.Y, cv=5) self.accuracy = scores.mean() * 100 def predict(self, iris_code=None): """ - Predict environment variables for the given iris. The environment variable to predict is stored in the dataset as "env" variable + Predict one EV for the given iris. The EV to predict is stored in the dataset as "env" variable. """ iris_object = model.get_iris_from_code(iris_code) iris_area = area(model.get_coords_from_code(iris_code)) / 1000000 diff --git a/predihood/classes/MethodSelection.py b/predihood/classes/MethodSelection.py index 71aa5f25..dcf8c37c 100644 --- a/predihood/classes/MethodSelection.py +++ b/predihood/classes/MethodSelection.py @@ -6,10 +6,12 @@ import seaborn as sns from predihood.classes.Method import Method from predihood.config import RANDOM_STATE, TITLES -from predihood.utility_functions import draw_features_importance class MethodSelection(Method): + """ + This class represents a method for selection a subset of indicators among all INSEE indicators. + """ def __init__(self, name, dataset, classifier=None, transform=False, parameters=None): """ Constructor of the MethodSelection class. Initialize attributes. @@ -31,33 +33,23 @@ class MethodSelection(Method): else: self.classifier.fit(self.dataset.X_train, self.dataset.Y_train) - def results(self): + def compute_selection(self): """ Get results of the classifier according to its name. """ if self.name == "feature importance ET" or self.name == "feature importance RF": - if not self.classifier: self.fit() # TODO: a modifier - importance = self.classifier.feature_importances_ - indicators_importance = {self.dataset.indicators[i]: importance[i] for i in range(len(importance))} # create a dict to associate each indicator with its importance - indicators_importance = {k: v for k, v in sorted(indicators_importance.items(), key=lambda item: item[1], reverse=True)} # order dict by value in descending order - - if self.parameters["top_k"] == "MAX": - k_best = [[key, value] for key, value in indicators_importance.items()] - else: - k_best = [[key, value] for key, value in indicators_importance.items()][:self.parameters["top_k"]] + indicators_importance = {self.dataset.indicators[i]: importance[i] for i in range(len(importance))} # create a dictionary to associate each indicator with its importance + indicators_importance = {k: v for k, v in sorted(indicators_importance.items(), key=lambda item: item[1], reverse=True)} # order dictionary by value in descending order + k_best = [[key, value] for key, value in indicators_importance.items()][:self.parameters["top_k"]] self.best_indicators = k_best - self.parameters = { - "importance": importance, - "k_best": k_best, - } - elif self.name == "heat map EV-agnostic": + elif self.name == "heat map": fig, ax = plt.subplots() ax.xaxis.tick_top() # get indicators that are fully correlated (i.e. corr=1) - temp_data = self.dataset.data[self.dataset.indicators][:] # get only INSEE indicators (!= CODE, AREA, EVs) - corr_matrix = temp_data.corr(method="spearman").abs() + temp_data = self.dataset.data[self.dataset.indicators][:] # get only INSEE indicators (!= CODE, AREA, EV) + corr_matrix = temp_data.corr(method=self.parameters["method"]).abs() sns.heatmap(corr_matrix) upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) @@ -70,8 +62,5 @@ class MethodSelection(Method): if TITLES: plt.title("Correlation matrix: filtering = " + self.dataset.filtering + ", normalization = " + self.dataset.normalization) plt.show() - self.parameters = { - "fig": fig - } else: - raise Exception("Unknown name. Choose among [\"feature importance\", \"heat map\"].") + raise Exception("Unknown name. Choose among [\"feature importance ET\", \"feature importance RF\", \"heat map\"].") diff --git a/predihood/cleaning.py b/predihood/cleaning.py index 07d6b916..63ffbab8 100644 --- a/predihood/cleaning.py +++ b/predihood/cleaning.py @@ -10,6 +10,9 @@ log = logging.getLogger(__name__) def clean(): + """ + Clean Home in Love data and generate some charts to study distribution of data. + """ # 1. read data from Excel file data = pd.read_excel(FILE_DATA_HIL) @@ -23,7 +26,7 @@ def clean(): data.columns = columns data.head() - # 3. clean data + # 3. clean data by changing misspelled values cleaning = MethodCleaning("cleaning", data) cleaning.clean() log.info("Many plots have be generated in " + FOLDER_DISTRIBUTION) @@ -31,25 +34,25 @@ def clean(): # 4. distribution between women and men women_men = data["sex"].value_counts() labels = "Women", "Men" - sizes = [women_men["Femme"], women_men["Homme"]] # getting number of women and number of men + number_men_women = [women_men["Femme"], women_men["Homme"]] # getting number of women and number of men colors = ["salmon", "lightblue"] - plt.pie(sizes, labels=labels, colors=colors, autopct='%i%%', shadow=True, startangle=90) + plt.pie(number_men_women, labels=labels, colors=colors, autopct='%i%%', shadow=True, startangle=90) plt.axis("equal") plt.title("Distribution of gender") plt.show() # 5. distribution between ages data_temp = data - data_temp = data_temp.dropna() + data_temp = data_temp.dropna() # remove NaN values ages_plot = [] total_plot = [] - min_age = int(min(data_temp["age"])) - max_age = int(max(data_temp["age"])) + min_age, max_age = int(min(data_temp["age"])), int(max(data_temp["age"])) for counter in range(min_age, max_age + 1): total_plot.append(data_temp.loc[data_temp.age == float(counter), "age"].count()) ages_plot.append(counter) mean = np.average(ages_plot, weights=total_plot) + # First view: bar chart plt.bar(ages_plot, total_plot) plt.axvline(x=mean, color="red") # draw median age as a line plt.xlabel("Age (in years)") @@ -57,6 +60,7 @@ def clean(): plt.title("Distribution of age") plt.show() + # Second view: histogram ages = data_temp["age"] plt.hist(ages, facecolor="gray", align="mid") plt.xlabel("Age (in years)") @@ -82,7 +86,7 @@ def clean(): plt.show() # 8. distribution between geographic positions - geo = pd.concat([data[OLD_PREFIX + "morphological_position"], data[NEW_PREFIX + "morphological_position"]], ignore_index=True) + geo = pd.concat([data[OLD_PREFIX + "geographical_position"], data[NEW_PREFIX + "geographical_position"]], ignore_index=True) split_geo = [geo[i].split()[0] if not isinstance(geo[i], float) else "" for i in range(len(geo))] set_geo = set(split_geo) uniques = [split_geo.count(elem) for elem in set_geo] diff --git a/predihood/generated_files/distribution-plots/distribution_geo.png b/predihood/generated_files/distribution-plots/distribution_geo.png index 3cbb5edd82f645c6ca24b0c88992a9064779a630..5abe54d063395825b32ef78a4be13f790738b3d1 100644 GIT binary patch literal 28987 zcmdSBcQ}{*|2KXjlxQL?Ng7l}5)mzlN;YLj2pQR%hK59xw!L@C-VIU7ipW-q$lm*Y zyt>Bc^Zgz7eILi~IDY^9uH(9{!rS|Op0DwIKGy5JrFi-{3)4C#ilSKLWTllUiq?yw zXxbMu;5#DK&7JsTk?j#V)y4S7ZSnb=_<D)8>{(liT7HiFM-z82))e0qvpaUiPQ}XD z&hea$5oK}C&ibO2-9=LaE(aqUTT?4bVSXWg;ayx8?Ch+?1qA-{bABru69F$uNSdO! zC^_lFs!q>-x464ij4doI%mf50U5p7n!(v$e)h6&+7mvxU9bNI>E)ut%U6URb@(B-o z8F+tRm-u=<_IO#_RWJ7>%l(WxqhicqocVp`M(FqY>SwN&5<jzb1Rj33xpY7;_4|}t zzPPia=Rv*Ht`Va&&17{NiYhj3R;r@MEpU0Rr114KW||fF?)w%0pMH_eYDcq>VfuMn z!TfY@V~%aCAE#tOclQO!@diVOE}m`MdZHid*1Ws3KIq=PNVB?xUy=S&%8&OL@yI=X z{CM4do2Uo7wPIeHR3;p|&F<0_>}WSoFrn+XasU4P;x?T(ebxx@-!3s2U5D$p^sY0j zi&u$dU9loeqCb0R>cEYqUTfE`O;C!wK0W#?vLaZbOMHH2peXUu_jfMiUrj{Ly<X(( z;=(5+q{PX|nQ42SWqnNL!3FJ)4+S*hm816>yj`4O)ut=q*smJEeJmx>Z_9DN9fE>Y z4fgo7M-G#N9qXU8=e{+_&`fq#^k2z-duu(jzCWjAXHO5gTAgvxU%!404-HjE9J^gx z;OZ#1;Q2`FrSYA{bW>X$!`Rr^j~_ptys><f`mI%4E$0j{6U_Uahofle=<L6}U4koX z9URzb$uMYUV`DS=<Ds5eZJd(X=a@SVeT~s!VO)3C3cq7t<=EdGpOmyK!?LAuJkQIE zQcKm<NlQ!9VD`fJeJC$acRy&~bDt+(`qrvNhbXH3uI0E;#*vK0JR%}$u?nGI?(?dQ z4!&7#-jZwYGCNd?0o-oUoUIgiKQNG6R8%x**V&r>EDsNl6LNAn25l=>t{fg6O?S_p zTZ#eK@aCg_c6Q1JiaY#{TchiASj7BGZFhBKU7@>c!AyUi#$0lpS%KLJPGMu;n$Mpt zLf_n2zCL*)7MT)j0_8N(;`p(o<d|BbdWLzba<tE~jp96V!xIzrQzKnL4;~mho;EW} zn4O(XX~@dU%j<Ix$a1Hliu;ljCF#0<{)}PdbUGv_C+E=TI?=E8^@Y(^xrsK<B5fU= z439Y%``$0|fp(+aN?Un&E?~f?bcHD@YfEBX!qPSTJ0v9X!cOMwX5&?s=I7^k&SVRI zLtp$kR>8cj&^>eD{rmSm{{9cMva&wh=Z##W=cd$>=csXSn|y7)vu(-Sx7{O^2kRZ% z+_%eNENWj?N+qt`A`_<?uRJ_H{-rEn3){MN30Sr4RcbVp^^BP3d_Gz8!X_20cDz{` z1|>f6cE4(DY;A>Gv))+HjkGwn#XFDu%xQET>xnKala;HvLcf%C<;w7b^OHw6Z{GYw z@Z1#)@CAv9<{Vp>p^`NjcE6PK?u1#LKUezx-u55Ae#MWD+F3HvQG*M(^lGc3xft$a zK4qn)$1N=_^95fC;;u)!DtjB!3_ny<sF(P#*;<UdCGIcjpbd-UqG+kRO*yt*_n$pW zdSUc2;Y^wVy-tNiU-@HUai_tsD_5@k)^*z4;&Pf-^)?oemtpqfdjF1hoj>#p=lih{ z#MaVs;oJ8vWvdlaRG2lN==4XojRVE^9Yn|S8XC?>PPSjgz78@lFksfOw<vI(_;PFY z_H>&cAKW(0tTINi@lrYaT>mn|a?YerPeq$_c{(mG_9Er!{Q2|Us0KB*m6EaJeN9c7 zk3Qn|3MN{g_SU_)bm`KIGrJm#Jd1ctvAdaASn7fuTjPt153y_zQ^d;74BoVMEw6xp zfcsdTMgo>se{L_o<m})ZahK6J9^L~7w3U>UG&9VCcCp{azZL@DXQin{6m{AV1H`ms ziIS}B%@CI!UiZn48!t4IbHDpZ)J7g(zWeNpxBauxCm$vpzRJL-SC}7ld#A0*rzg?Z zt}Qxt?AWiTR)y7ZN|C=kY=4%oEf{*YUNikdz&?w{sG~QQnRb>11V#mHIi8@EWtpky zgyku6u|d$2e_qdt;knlwiW9xgm0%_|zUG>NoRIvIv4-z1!|e5j{=GR^vs!1*p2d#N zj~<d2$F{c*mGGQ*vma>H>8c1;d1?H~ZhF)-Stn0@x#Z~GT)STpzdpzA<mHt!`uITY z^l3j*N0lOFzP{rS)b~nGt%*Kq-kg1Dow!|Gj6x{C+=mYzl-1QAw7O5|G_^%`g?j3) z-hTQ%Pm6uM4xeUv+#X@!9|HqcJp!*-3>nZS#Jn+)W~F``dm1wmdER3i2TM4rtE!^X z7=L<#;j2Er$(=`T8n<NsyFtjlzxnHzQdIf66dj2m0X<zQDXHnUh51;|`KjZm(V4bB zY&(tDRGoatpIlp8JDdHsSLV-TY~Hg+c@4j|qBj$pUA>Ouuavz0n%Sw546{1E=;&y< zK$p?(b4$H;swXKXswdl_qMQs|#(5yFl+*PT>Pj6JH0%2HiED+8Wpl+w9B(BiCbnd= z-#+Fy{!N4|+w4A_Z0p1L)H>9`bc?2aO(rwreS9i0z7GWS_~m+93-VU2TE)6%O)Od# zhPvg;ac%8q{My-JXfz4B1uobw5f+z9?^xT~syH}grkUrhJwUe@wb9l5OOleN<`b8R z{@n@+3VDY92j{aRA|ghcZ7buob8HgyiV8zq`}dR2_m`TE>&qYh@v--{_fpd2YB5R{ zUtb)<t}W`&{1CJ=HnngX6<Cf>E#dwFNlDvJ>wb!mty63vz{ht26RwytxLwX)a{Q6U z^snuGjTt9vN}7rfF3i3pr5XLFu_6&|Fy?e{f@qtY3MrJh!4wTW_p$5dznZdAj~qFo z8hd)<)Z}FMjZGfeQ%m#Y*b^gL?mMj5t`Kq`y$bbPEkgR%f%P}kjGS-xrj?F$?oKJ? ztiPAVWWzRy`k|V6@oY|BUYjz@!q1M5OP`Ah0@RA9eyFE(1=@C1@b2G#_VD4uIn#M{ z&t5bCDHxkkdH$^)|8KtF7$nVbYWN4e&O7@{yxn*k%90TkU+p&g?#{lxWHb_1yZsu+ z$O@8iIyLmMjpof~s~!`r%!^7NAMCD+IJPudx1jONeyl<tKR@+{{5q<M>bvO~7}oVR z?X{$#qo~*y)*VMACz@@2?%b&hKYGJ5KK1wSi$6L$8wxez6c6V)^x1R|ZYQ|OYxVoG zvZ~lnj|7jo>3EB#OwD|!w7!NI`KPMs7nI0;#FYg-eE5*lZSXL<QA5R9RD{{NxeVh^ z>*^BJPKJbpWUk1;$JHgBRngVeC7TDu*C<1Ct6W!QXg#_O*{vVnygB;gLy(HJwDi30 zY!N|eUY{pB?<uIJ85~nfP?b|SFm#QbU;DyR(}(%9!ykQa-mLoh>FJN&-b8+#+#Nf2 zesq|{+q%q+)sIz34D8_NKaJbhMCILf>QNX<GM|)`u1f6bI`h<m7<Ay1f!N5=b%`3i z1d%B#2eJ$3CZTHo=!$OYSbGRiVlVfWEn{z2E64_FW?3ft`ZBxE^kunphu^r^MJ~Xs zI>MXBbbfY<Y1y(b32KS8(I?l)ks4!M@hl$bpmO}_&6|tmgT?DICnNF3)0ov!)Qd#D zA`fx9?z?>INpYwc9}UvtE`EJ6W&i$R30XMSOz8J@ej6ofGt4#Q0&m~GjbfM-7q=sC zq^B;?WacIMHnvGrR1}ZaQ~Yz}=uwL{|BVtcq@A8ScaBsuANJjRw@V_@3`@-x=H~*V zS_@ov;_lIG*X=of^GmW8rpraXoGj4T6>HbV12e=O@n*{0msrQAp`n4wqcfJpe!FD6 z>m<!U;K|#E(A;X%O;zLqZ``=iGGJbxlHA#Op3J?T=UhgveQyk^Z_hmishl6ZxogS$ zdNs9rFI|(4PA>m=&#|UVi<j=1R&6PJj6O72wCB6eI5dTRuBjPqvMK@+J)!Ztsj^am zhK6Q(yvZt6^C$o#0hFIY59*=x{aJk9Oa0rXOIj8I^FG~YQG+>402(~Ii#=I8XV|A} ze4tGabx+v1{JFWp$&)H)&OAaLA=u&hbHQM-OWdg^87XR+*{3HWC-CEe)&jL;O_99H z@BRlZ9oLI-b02B*m`NW0{WX%Ta$ueM=7y;t4PCNQMQneb>TAi<$a5U{!nnyTj8`Q_ zHn7#DclU0+LhYjY>BRB=mPC}lGxPKF!^6WLKKB-``VTb!S^n6zZbUsro9rE4E-tS6 zv=B*GvKWPggnVw@QgU<4fBpLPaKd>kxpq=PtBx*z|M|0UeHy9q2j@ob0T6GMb3vOI zvFVgWM^!xT&sn!U)C~YOD{;InR$<epU<nQE{LggD4}@W7nsvPP#x@K1)83wl+b8tY z#zzu7$+&Do)e{kOyV8yHt4lWYuPx5BXiC6!X-IZ~8Pt-kNGdT3>`RFUj~>N6dc<MT zl3O>vq6<x_F6=Pf)~#E=&dZ=6y{x!1-y7pO(`NGX)XeB-g;b{`^<+(P*YPBP0UkNC zhUei9{msr_ZQs0pogx-bpv72s^-<;iUQ&*y$NOA`qq~;ecuJH1;Mubc-rnB7O4jJ9 zqVL&kY<ppJ8<W?kK3JQor-$-^8uh7C8CYyX;G0jB?#J4Al^@@}MQlELU3=W=#0eia zcX!*2ndxc1ty_Qi%o!US^9c)o9-r%ce=kdS7qA;0pDK+tK*W>%wlUwnef#}DZ=wE& z-`V6ZiSyeq^Q*U=5>(z!Q9In8>@|%57h|_uI~AeWLOYM$8ua8z0#<Dq5CEVwL5woK z9OqnveTs@O^*bko7Xu{R3k_{!oB3}V&{b!IL1K+NB0|T(&Q7*RV+L5F3^r)b=U9K9 zc%Z8j3JUeWeQH^jI&w9bi2ETSDd1?jX;zMoj%YPqIztrIEHghdul?xZL(e-K`Kb^o zcTF_#m+lIuPpf0$<QEjE#hh9j#HaS5-wiu=YG$TMeL_86`PlM<v(GWgi9p_RfzxBX z1Yy3U<)FN@>`a?T$NDk#LC3@qAWm-0OFz?PYbeLx4dE!LO@+%hNofbxCTa*AlabNY z*mUy2uIjs+kJjeeo69NRShg`GClzHXdmjo6V0brBcObu|qfLg%?~yk}Yc91eOHxm) zc(y;XwpP{I**RV|fcsskbvD(IboS-?C&LG<+YhDev$C?<t()JVuZA_h3U_OSITI17 zI}@6<=VMS%kWtbyl~d)7`1SW~ZEc4{a`tPJUg!78Agv5-LOoFp{8(msmr6{z{K9xu z_)!@%!C&er$8$NjMht!qtoj3ecpq9aKDFLix%#J(V(wYLdAIZ7jSNKo$nv9rVA{_U z6i%JK{{QDMHt<uHjp=d(VA0ajo=-ai-f^NS4TQO1wC3bLqydFea&Gi95fpSbnVOn1 zEnRvKkOzB=4QmrPxN{;JLm|fVKdRQf-iF7*CIlNoPvAv64Rs%XOiCT#M?-M^OB02X z($byVw#kq>=I^g@=FAxn9ds10qqe{-^NA=cW=)xWM*d*>W^>bH4i?y2>fqt_-Bp`| zcvWO>-MWSPSB-WAQZFe-aPQwGhL?wjr!o~AzBpiiGjsbL?39Nk1_9_=Jf<jfgra*N zlp_$6+{60yt|cA-@b<%<a(T<~OMP*QjKx^urlZ}}JzrnOp18kbvmD@MKx(R0imki5 zdr)AYO-Ug)H}}O#W)~<Vf`WpTJy;q$WWD!M#zsc*D6k*E#Uuvu4IRr3(mKW4%rbN{ z=K_Tc-|di;Y#HD0Hu-$$38_ez-$>FqNxpdTqBdDebnEu*y&y<wpXTKP|I~-xRPCbu z4!<A$VfnFO`OPgYBT>2-?BmMa#*~0}5!#MCX`&~B_>3I$|5Ft#Fnks&E-tz2_Fa6` z_Jcsl4XBm}+^5sAlTBtqKxZc>ot#}=cXDwZ(#f?`0&p*!{I=A{#H1;tcVvA00l-0m zYJ3Fn?iu%)@w$BH5gs|rg9-}BSsG@F;$md>l5qH~IP&w8MH_S)@PWF-hp*qf$#5An zlT${k?DLrD%W6wF`_ectYU{}d7s~E#Hfzbv;E4y>d)T5u<g~Tenisz$Jx)^ZW%JkT zYuMV_+DZ`O`xm%L%k|#<1WHlO>;59q|2r7Gg+>sI=&7hxrn@HqmfA~W`LW+$Rj^LC zUB|C_ZI+>Jzb#N0cW=AGi9m;e)?}y=-2ulrtLNutEM_LjDiPxN$|wi~4oVcn<39Z> z)~v0tV4FfnQg*hO<Z#)Rf8NNLYH4n^BQ>w4UeD9QmF@<>l!Y<wgYtV-#;pJS-5n9P zNox_aTG^&7t9o!Y9yx->z{$-GP%|hFgYyj5C~h{FE>)ueM}QaffaL`6{Nbf2xwkgs zRr)sr))u2_bmOwi>gK;_Sv~IN#P=E2HZcx@o7C@2&5zg~;&dcFJ|4AmIraso%2B+a znA$>Pjf)@~7Y$+bt4Zju(O;hfqe4CB&jJC73ZV*8%tEUuEYRe7U03<u`qYP*=i~y^ z`9}23e?FhecAc=4oS*!j)9CKzHj3u{h=cHm>l{nGfWe^=Tu2$$$#eLsz6fjp1GvOX z(sjJ{q(DK`{rjwdRiFZdDOEx58AE%2`tNT;CnEZ#U8UqiH?A!~Gb4etO*F0^;3c8I zukgk4FmTZayUs=cd7IUz=wLf3C2M97Fu%Hmd?eQvn!!Lw0R(t%%fo$nos*%}2gd~J z&N%mmAvz|8Z~OMr{z(uzLjw+6JBL0Gh1#PU5D<WO3{KEUCE93&7Jk%8gMkBMnuee< zTDHaiQTH-@*SV~WPah&5JtO1OZX$&{m&;Ncgbc0$q4OrznMFvDU){VV%S+U%H3_<s zI*_dzFbEV*GsjsdU&2DzCls^bDykqqpMQp5A76=a;>zDFKe)G=1z!zc4ln^#p%k4k zgr0L_4}s;N&~mL=vj+0p+kV_CReG+I-5QXS6uvv_MNdOrG2U_kKVUSz$VL`KDlplT ze=d1<J47Bs*Z)mn&-;$f*U{0zw0N;1L|5=B;l8ggOZ%Utr|$)V0%%kLT0(XIj6wIU z1>jX|Si8gvptuUErG0zxWv%|5Tecj<`mTDm-<IFi4a(sYDfe7p)W}Vqvq}K4M9MlH zEN&zbYf>3P%KE8OzCi!1n>M9BfBsx61^3+UppW~R=x?#dPErF~grM{>W98`3_tMn7 z)V~qsLrLQdO%cO)%y%|Qs0WMN2l7BYdJmKjF+vTbB<NpssW#SAcaMi)eU^256+y(n z)h&nlFs7EvvGsk|3ZJsGm51_&3a-@qCE3EFzrWwCF+JYE(6Fw^(~}4ZgkwS=80)Qn zh{lE=Eb+n;`-Ph?a2eZE{YfM`DoQpGRrq(~-0ZB~VEf^w{e^{vIUc>ez2-A>KhFOb zX54lHoxoa5hBk}`wbUG(^u(!C_e%_vKxPS5KJb-su&Am0OtN}j=0}U8BW<?xJO4(J z8a!CWSNE)<Ci<FUk8?UpL;i`y?VQih%y1mg1p)8*C{UydvF8-?7Ee!4xiuzJCm!-! zm@CN3XMAFso|)-QEt>x@)?`@f_v6X_*(~&%3((;+2cS}@>g$IE<YHJh+=B9O^g6Q= z5sfAX&y|#vm|}+!P6;)0@A<M7tB=YC;;)?p1F0C2rpAiUgQ-|}3FqEi4~$}9S%)5` zg94Wo{1TE-bZl%>Ma~AGu%I1EugQY7me{k0F8k7V8DM`k)bq?ta5Phh#Zc(0S;p+4 z!}T>~RgO!d<PHrDK^9g=Mc50ePd?<pS+<?(s#p;s&co;N*Hb7f_j%O*Pl%Ml#y@nO zHVmz?Mau2rGvHf@x}vr9gaAX1ZkjlmW%Rdjd}p1=2`tLG;>&c9&tqK1dQ5>P_;mBL z`$Yc+sTUKyCG5JbOWf%(PS5THPz{jkF$4;m+y*cAd`aH<>zhSA4IM)q8klIU21Lb_ zY4w#rvV()x-ZyUO4Q==fnND7R2j|F|T_%L)nTYuVKGnD|3>(3oa8fV|0avbGCA4t$ zQS9R46<dT!Q6*w4dUNdCx6)^m)xj^-UcGv?Ef?F+>+rjAn)^}h?fNK1JE3%fN~%LV zJMmzbsSW8X&D<zLH{j1Kmzq3OQ^FDsj+@Y!VZKvPP_fN@BKY63KKXuoXq?QQb%9aD z$(flLsK<gK^J@CLA?=?wN88@!xnSXXvJ_>7z!IXKR7%Yxp?O;T-NtyR>8Yt-5A{5h zP;m*ne_?b|t_FB-xKpPX7nJQR!+sg%{WwNg<yTF#QQDc>1ht)VYbI!uv~ynkdp#VS zoGDdRiW|l4WYJB6FpWF}@voC~N&o9LWxE-R|MG7%fiqOV!utHM;d|Bd$he%#nq?-9 z0wN^9>#15SKCc7D=U0|}Ryb(U#Q3K%e<~C9B03SAEwY*uQJr!}DkS>+6hft1-U{Mk z1&d{=Q?jy(psia;zs`_&mwXQ2FE!wyJOC-SkzI30N^qFDeJjAvXUKxl=(#5X!BpN8 zjbGm@fSSaB#=+JCWD4YT?p(Q1(nU3Jv}j>YVT+8f;J=MmkxCpWhTqyqxZqh!d$ z{Xnt*^gVn8<dD%i4ZUBjt^@FWpr1mpz3?x35ZtomjpfgQz@mlurg2R%T%{Pp7MkaB z_qJ(fTZi}Mq0T_e*7cmT7qMz>eoIC`@NL=dcHk8qAhOTc7k2Zr&SWpRj{mMk%^gE8 z`lq=+rb$9S_L!eCsY}+<K)+R%k-08z-xGz&i6@QrZ$xEGZ3VN^Bh0rBeib!*GmUzL zJw=uuU^$!z;wZp(*n<7=4>UoWv}}%pGl%Oq33w*%F`G>^G<*k^TJ=BI%G@4y_$pa; zM3gw@!y53XjH3Ch+k4?ICi)aJ>4|i{<rLx9>(GvO&&nyof!qt|MLdFkE|OLs6N*0Z zC{Yp?CcL-3f6s!=QYRo$BJGpy26B5cFw?RnA*;<pOC{!1H74?+#6;mtzpxPMF=ZKu z$^9eaVc*z)HH1urHj4j${rYv=b=03?c~tCs_wLEtk>%$GlmWW_MOqZUTgHfOfIq_q zci7Pk67~8=kOnI&YZN5s1h=W-Ct{Z(;9g$rLN`GDAW$4j2oUyDIw}=y(mK5RCDvR@ z%)UpJIEGJzP5f4^UM*tLxR?0FPb6J4`<|J8jv*Eg`I*P;&~b41L;zXLnktPtU`#s) zlMdAkY)7PSNI^th@2QEtO+?9!5)KglXGH(gXO*$J*x1;jVaHv-zNOce#L|By%uXph z6AK5_&@+7JOIFQRIXFzy5PalWHp`hcXQ%DE*bs)rd?Ju2>=v#0XZ;Th--P)SsG_V) zxPOxsxJw;Gz>;_Gbko|(%TEO#aIX2gUA$nLgr%YmZbl?GI>u#jR&9l(FWA|o6Tbpl z;IwcZQV4%#0iJNk1cILm@S+a|?bJ}m<lhetPR1tqI?jCxM2%-^YU=A;1>geXa?X5% zBKWezCQK8@X5A13blvghMxx_D>on*|XGh05bE~GzWC%~^4HiE|)44G8twb}|E*@eJ zE5<^!)#)hAFo!PS+Pr~>`yvYe2a&7((^S3jt^@<2N`0*J0J?23?mXoTJ5mH#nMhZG ztnfXrGA`#svj?gm1TX5g@MVCx%sGA$5s~_&_9Cb@A6r^lE+k{k_?4A0huyeQY83kT z<cSl3{{H^AHeV#dh(!N$fjkt+Obh-0YSMn82R(Jyi(0C_xz2Lu3$VH(2jB}Pxc2R> z)zCz?{`%HePfRvKYMXOj;>|eSN0sEC?3L5=n2KJuK`aa&j(n(;4sl{Eev_Us-1(m9 z|3{8oflZT4oLQ_&8FLKc4qxA9T95B{L<w3htFPC<57a)#$b%V2pwnHbiIUrP{rO|8 zPe|0Um+2TG&!|D5?P-JTHLx&W=}-X|&~E(qSz<?`7yyljfXT;xE4jTs8ND-15S!}c z{T<=(JP9Wz^b|WqHP68!<KovE=?Mp98&Y6gU3llaR^vdaLD~*jmH@Z!O3@g0{PtZ` ze3H|pd$T0u(7)+{=tc@gq<u@*vbB3Ujc?#Cyw2f!ZBSzK2B5-yM*Hgg@dE?(^f0K~ zHa$H(S}weZF6Wn8G4Nw%Wt7}KRnW_=NM%e8eJAw`n^6syR50C>xUCnLUA%kG3zl+V zpS`Z*7rn1tYiI;~8wG>=^<U&>6@?UpSrgr0+l!Yk$55Fc|I3bcYo;s0WX~`xT?Pyw zS3_V-k>|pJuS@`ylxU?ar42`-Kc|b66lj168%IYCtR~f5JJW)N`I&!e-#E=Ef)(iB zL?Y)!`9sM<8&`@x5p>E98V7aN*K&~#aYmBPRG)u{ZwQu`?bgSitelGu;g9V{DU>$r zJ%4>+xQp^wn456viI1&?iaR&e6&e_YN@a@G9`mOs&;V)^lLwn9?m~40u*XA4DqH1Z zPY2gPNR?;Ca8pMyD};3p6wajltPB-^;;7|{^~$zpjmf{jV%yfO@z}{Zjb!#AF8$M+ zmUv;Sd?|FFvB|JPdHM12;eY5XfJ`c&HS{72^d2Hq5$Xsh*$jQ-pE1#;7d5Nh0k}#; zO_(Cjz&p7CTkmYxuZ)_X|2HT}bEHwgrT~Pl6DXREfVxbem}86m!}veFo*)t!9t6w* zyu#$ng*t8+!H@gv4AAmmsDji>JY@i*TQ2zK-BRCOw9csedyUIEHXih7mUhH<%Z~ny z4M_{~@}h!vYpFZ7I>)wL+IJOGZM^~GY3MI$9I>9fFCFOV?BvitKqQmJm#42G?dX46 zZGX{%$Gtzi7!trBc5u~dg#+Q6JSGnVSO!L67~`;^H8U?hMtny8?+%(o>+$ok$^D?9 zXh2R}nRcoFCJh6F2*8>U_wlp;0jGe21t3Lit5zA9FNOW~`|HcPIHfKBd4bV~`%3ES z>M7$Gm~IFf|G=45RIL2dm{(UB&9T3b5(wnyU%VE5x7F!8%j%;IQ|KRKwJHk4Fd=Bj z>`PKSeDX#=t^Z9s<os@rp<tB}bzho&{(T_kIdg!-mryvgwfP;KJ6_Z0WM_9jHVF~R zRnP}IUMFG}1wq;zC}4POtZtxa!BWeOTZcIcNfwRs=c9-VMph$%IiQWGCzeU<*zd2q zW9a_S#xIvzh$9R-O+pTtnK0qL&R_h$($bN<^SH<xHlJypYozLK>&>=I)JRna8YgZX z2@b(B^=DYNAr9;KL}1{(d!%5Z?lpOQ@9$56^}@Gr-vgp3KMSe86Q-h~Li__P!Mt(e z2sUsK#EzH?QJD(hAvPAe=Ur!BOI9&aXo*Jo|7?B##+(qHN_#^3->m>D+qUa88vV!2 ziMAp%HSpA@XsP!RF#~M4fU+ML1vX+@86x>a!Z9VpW7;(D(!wXRjGtyRS;UllB>3t~ zFmBZELm6MYl>2lX2p(~QA*lsDdv<Z=cUw{0+ZFq;Vb#HU`x=Rkh~6U?2w!*W`+cjO z<jl;>QqI2eT>n3!Pgv{4N*s@APMtpe#U#`{283sIi;h~P%$@u9@1L*F1P3PjBoGLS z2lO{tuSgeaOCV41U#+he#!hNO>fbVHiZw2f@!WZa=O|Rr^g>VB*~<&$4_+Zy55XW3 zW+F`<I9n3Rfox5QRuO8^XH+Y;jT@7ZR~hSX(E<}y!7z+O%i!IP8=q$woSSIF)E3;o zdp8Vv=f}Fbx_&3v8ersdF71~YS@FAsc7J<&i;0P;2J*uwOz6gnwIXKsD?%g_NiW>J zTOOnT8A!i@4hS6tR{!F~q<s*PGR*6LiW|GEyvakNbzsRw-!A=2FV}Be^S6332t$Px z+I!@2|34y^q4+Fjw*3n<q=nfLe_f{z@5GcB=to|L(4Hg!;Gs@^k=DRHlo_J%B!Q%$ zZ$_fI?%ca~Z#_5O@A@MS6dsC!LyO)GQ4&+=h>h8oB3p!RmmK-i3%K6xyMk97>d^08 zTkQE#GdZV~k;q%6=XOzud?<#8hyQIJCt$HEAZ)Ow4(JhjBLFu5>N-)&XCa>R&O{zX zJ32T!c$I;fBMeMNi%iR{b9gObQetSG1p6x-oJ&tmMjcL2PksVfnP@W9GD2o>+k~^` zG_K!wF%Dlm`L}<9F%u2XCIiIj2$0-=LH^g*mXN#($r=I3zc)y`facZs{M;L(tDEu0 zOnv}x$UvH-hvee=GvI`{BL(^A?=YN+82V2q0sZ*}qEQd9B5zN4=m7(_KZ2ZCrq&ah zd_<wZMpIn;N(`!jTrG|MsE>9hX#~Z^r+rFMNkwprx=Zn!?fRCylmik#8cl?VW8wTs z&%p|hC*c$I>$oznbMHcE4xpzIKaPwn3H-pkgV(DH^X@Nt<)yXE3vo)9U(pX)H*83P zYH|UKd3tuFQmw$ne)SlZNAYv5T>1l`4#*U!L8?PH_-bUe1f-=nty!7c0L~5LKLMK# zQX4NHU(bs!G5}e7{%DxF(bOb_Ts_<M<Zi?sXfb&~NWK06UmWu2kmtgmr~mI>SRm=i zgn(f9N$8Yhu%Ie62Cs%k6dU4l0t*N^hzBm?zvJP)uql>M)+3gt_`GbLNl077;3YXm zo#tYXVlNsZmyi-pH2RTp;dO{@5oD<V{Re{Ec>z_*dlk)!vh76LK|W-xbd#rSAPG&N z9VLP@1xCR~Hv^w{D1!VI3{Cg(K!$iE`)<t!_f24x5BPNw;E)jUgJ{;+@_Y4*siyr9 z^ged+(G)Y&a@|!(5qZNShxlVX*3reHp~1l&cQu1`(E7im=p=$e#lh5`2DQP+9gN3P zCUG!m&`}68nZLccG68{-!w;8IqDfT;={Q!d+zdf_V$&y(BNomBXON-H>dzlJ4c+(E zzFu0^`?$6^m?@|$%8H72VI+qGlk-YRwK|-Jp8Mnbcf%Q00o{CRr-Ky2P`Q9vaKzP! zWQ_4tLn$K)vRn<ER1%NaedhTgRA-WP0Qy&aevGbIen&0MAO@<|b)X=Ybet6ThUe$N z0lwgZGn_|^gCVe?Drq=4IMApuHGHh7_<{iFxu@rH&*QB~{Ud2vqGuvv$Y0DCtbhwu zPt{FUkeZSv4gqmuh-ZZY5J*gY`(HK3@SC5Br9l#22vqR5C(yLNV{6nZCIMWs6%e=S z&8XLXnE`RO_v&(~Dv}ufqc~v)vT<<4Vee>KG^X>$1=1j45Dyt1K|pu~B^QNH1=fZi zWrTG40fA3q#;mu>4~Ju>21ZA(oiT!B+Dpm$HQ;8L#+^uu&CN_a<blrC6s#A1gdQ@^ z-lC6-ytLFWm{5!?>!PrLzaDAbN>b0r>VQOBm@kE%PG~bR66gOl67?H{j2}haet8mF z3)*8)Eu_RxkA;tux(6A9Dch_I876y(F)bMF;gJy)8=Ds`LkPyuFIvQ^XphCu^n<w_ zg93OQ3u-A;p9XIwCnqN!@W-L=pU`pW-{f{k^Rj1N4fE$BlQe;NR7@m&?IBbXZ!~nL zy_8k{(Df%!1+f)~hOg%~<PE7khJGn~nL%2&=fX@V$sHj}mk2A7%s6_vh)Kn>EBqTN z%QN9iz2NYDfog>$b?$o2&88WeskwUfkAw_~?9wy=cMUe^-~%)IlpnZ=AG1i}ATtTt zsvxED;~AtHD}uA(SP{XMPa`D>pBx8gI4}zMVTZ7=3fW}vE6I<LR%%MHq;{Ml$^|m< z$|@@MpdU?*;7cScPlgfIF^K5W@GkPs79%WxV9TJ-T<#wREvaCB!#1yx`7`!9aU&uA zoPq&~DVQP2VN{vMxtD(V!oaQOA;Q?!dF~WV*mFKR8Uhv?3tY2wv_hraRdZ~N{utrR z%=e)(eo56!#ofyVlDsnTf&9~b$y_HGcy!#a|ANm20HlV5=bk-#2pfa^rVX}Fv<aqM zJ0uZfs_2WcT)SL2dyp>!;*Gx8M^sg^jj=x7H#KP?K(Bh?!i5z2Er&dE8<|xO9rA*d z^uWIkc<?Nyp#FPuRx62gk;oJjND`y>@%25w4W0f_mK-(PpEvNk(ACN50+L7&ha6t9 zg2c5KGxbS%p+0BtUvvn6Y`4{Fre*NbQaYz(A-vH$_USK1YM4XVYngt3a}Ei)18}hr zdP&E?T!mH8S3xXja+#3I02frD)>eVwEsZmP%J4{tuA3s+tiLNbBGg|X{MO|;S|JM_ zhq&~gNG~{&$`etm5J=YXM0y4qRD<ZR3h+%^NUOb6_!QvPYvwSzNz{+J99t7ocky2} zf~R(!Wh?(A1HXb*!XHz`^^pL;RR$JiM7{o!$~F*gu;>t@T~95qTp+Ce<UZ1(Nm>b~ z1&AGq(o=;>f(=n{g>N-puG~!PAc#VQwM#4}OvzTQ85BoifgtFA9D$wT!NDrH_-}4^ zBTUM&g%W=Rr5`266lS4AABaRfkYonJi8u(L3^TR9{ngbBw<#M?e^$i|B4cn<;&+#< z-bT7Hp<jROnuCY|p_WHsSvDBcCjuNpry-&f327}QNiN03E2)iMb!K(fmaI-kULUb) z>C2ZdJCs%iOKg|R{l0AG_`ZSc@5^vsomxQR?c>GVD;NbmW}PsgC&+b?v^N^gFXU@7 z+cgT^oC$IUrb8o|i^9U}0gkOl$|npRip_cqm->7&B~CatnJSmz_uEOBqNxjp(|sPI zSflRVh28yk7Kr#-C?`)azUt3QBrf-Ujhk78AiYV5vkX9(W|4=xlm-)3Jl4VfoUS3u zs!gPlS0m+FEA(`?IEDp#$@TVn!yPv^eK1I4Rg9)6?M<{?)+`{b;Vlyg$i*Q$d;*d@ z>;sEF&|H$kjEIcHC0(86Url-SzT2(e&Y)u%*^h02u-LhE2R-uL+}xNF_EJ>JQkoUn zG#ZU@u8^S;bn^DywxK9*+AOcj>#cL{<RW)vPE0>AGyY=Wmu!zkhuXz;Oe6b~wX%pX zk6+1bnIfwTs{$Fsnpu7}$}5qZqOG@1zLQJD29$xX86+C8PQoDxI#c-UF1)1o2(?Rp z_s<2hs8AO7o$G%w=G@tIP#5NV&Y#@4OiQ3@zSAXVCnp>}=z6jAATl>L-p4@6XmG*T zPi9$%x(&<ajg#y&8aBPss(B-9N%WXt<P!xz@(>T#0vl4s>a*yA>1YO1ZTj170lWU@ z{UR5t4ndHAo)@&7s<=ii)_yZ~vDV_7X!id4mr4_muF~O(e|jWz6p8@CuZl$;b0qBp zmpRHS6%zg?niYq_9dLgN!Q#;*jz~HNI5dx(%g86uvG%LWdtm4*qD(VIT%>j={-#^p zo;lohCwKL#RYXI8(AQKkgf@m=ht!V-mk8~l!&|7=;TCO$ItY-c5updrlsJkTE6d6h zot&}>MTcV?k4mcf;=CM<+U+Y2m?6Ybw?P|1@TW0YD?yca4r`TOX&c2VW=ZoVJXq3I zooHEzCGWjZ6{cOfUH(ZFnYvG+ZMBG6B!bnLLoEU6|B<}mbhizb+MN=E_uqXGR1l(D zeB%pLXcmOjfEaRq4-snvFpQ*u`f+xONVP;$hHfXN;d8|S%cA7=nx^Fbv7sSo91jrm z?yFb;q*Dc{C2Mg5EzJsVO|nu*>L4nr%q+tULbkYdyB8wOudeX3OuaSB;Db5|^Z7G6 zfNfvF8DMhkDKjvT%uMty#6v~vo3yCnv(`&FoDBd<pB(foBym#w5L`&Ke)m@lp(*@! zLir)k5Cah;0#HGFLX2!{?DM+pONNj@(9at}XoO$Y%Cs;Z@U^@8G5d>53+_P!CNCSh zU*Oi0A7J&v>L6!0EYt)kvGufE!*4S}S|m`IaAY7nMJLa!yDFTtYUq9>OxRW>25m<b zv;&e#tZRSHaTvfFm_mdnr~LEoWry6!g~pSJNFRy$WZntu`t-Dv(~Ek>PP4)}0(cvi z`!6<sm~J|l?K&hy&YSN<0FUL)B`n^8sh>~(B>Umn$|<9^5Pu(bLnzhFlVbAOg2;5( zO31KO-oP{t4KcS{{M2lU@r93ta^ny>>k)hw#ur>033__^cxd}LM3O>OzQix)@Bccy z(VzWhe$#hc;SK~t&cBCks?KCY`638Ta<7<~j}WrSA%Vcaz~2?`%gbv~&Fyh$qL(ym zaQsm~V^+nIsjiyj`ovFmdrfPkk(p0(m%<qyx1_;uJs*+s#_Cgms;&fu2^1w7e4ogY z4qe7Lt^>og8lob%oHznt9#cQoY}{`nO{NBQPQ>T~D>2Lo<=G)OcVmalO<fkffp% z8H$19&8or<H|E;^K_Vn{jp+P`va;vF!tl<I9Y1c4y3m;K{PJ|DR5El$IK_F?&Pvz% z#(l>Eo12?E2|>_yF+Adtvsa=X_?hq~62KueoG4{rR}NkMtp!@>@QS!!9y!t)@t3_Y zh~#RJWwwW#YzpGy0znK2=K$G8fc53|=Csz%^|?ETtWxubeXuxf3tE?ZIeQPUEk*`7 zZ-5kbtdtDsP9hjJFx6=kl=KVL+)(qC11~lmbwlu8gXMUIJ_d_S9YS~EfH`LPu&LZl zn(qU*lFTm1Y2G;g2lew!N;0z68t(5t0@z~%f4V15)0}I*dvV@*q|}Ju)7jPa(7%qR zB2lq|ny-&$(Jd$_*edroa&NIHhU^M3K#l{_D@R7+7+gG#`lMhw%*!7M5fDhuHz8X3 zQ;P+pl7y-V8ft{qXAgg;Q)^?zZ_Sr`!17eFv&pG8fTCV}E|RrwC}ppIyY!zx&Yl<( zR)teB+S*J%@)s#~vrc<{#ty!|d{YW)W+HM3f&BCSw-Cc3e!gQ$XlN+(Nd6`TW8)Z| z#p1|8w6f`V;k`{}Y)Ng!2SF(8$r~sX78iaJ5D<=T-dL#T;!uh6lEh;{K>GT!jR_>M zjbdAm7muPe4sy1+*(9HROpZokD*;WA-61tQc$DZ|P*U3a)(S#RI!;h1v@weTD~7uh zOzxE1{8ZN%l-j1gVb87ViDNzk^JLEenVfpqHHcYd!--OOb{u^ZHRa#7V#z$<vG1vs zgB_%Tff2d%O`3R@1ly<qhXA~nHZW9V=bBr5w*B?^<4IkQDI<5l&J!n2D5FP=<mUIj zlWFm8i=N{^azm{unmQegKqgU<!Lm>S?~yvKl?MauzX?F?&iPU@aqFbqvioKL%WMmj zdmQe=m4-s}p}xNULQ%bogF;}gLtg^w+q#WweiGe~oQ(qVQp>UVafP2TE8wqlYNp_% z8nXwxs9~9KW-nkw0_9;wS{%&r%!AMJ17}!Cm;eXW0;7luw|Vnn+;85+P=t#_XW5yK z+hu5_B31-^^Z=K<1!mIRQ0XSpO4VMRf3KZtN>@1bb3M!YeWzfZ1*N8@Le9BZ`IUCt z?BsWUA{o?1$*tDXB7rt;ZdM$!!1ia?w=eEXMkzqf(-iJ3kwA!S?V3`Ff(|ke1cFSY z`=%as81IB|K~u?8dzFQAT`oAr5P`}@1WIg|_t-ULb&yRBxsOfty|p6w8k#0a#}F43 zb$lAag)_s7cL#7BIj6M&z;YALt0^Hb`rdv%qg7}gNd;4sjmDQI{xvl<>EK%AU_@mq z-40*W)@M+UdEy~iIP^W_RDBqg)#`E*s2WL{r$h?{2Hvq}&v2&aL|2AofsBmIPd%Zm zu-6fd#^Ov0qM!X&6H|a3?}G9UN~=kZ%t%dlZy^Vd=yldH1z@+&3>ZivEQga>aWK?# zrgOi1Bj$BCz`q7ciHO7RGb3y0nq_b`>6*#)2TI}F05g7phcOb)-9B#ehl5XD;ivO1 z&tSjSa?^abE+Ii%c@^5G?UK*R0@UILOrf42Myvrc&&tVJ!DJ%#?8sV*ruj+Yrw>lf z&c6V+mB{fb947)JC2*aWmzNZN8udG`yIt6?%{`4gIN_Llwrrx+RbeUnuBvp?&m;*z zQU~PFCu*HjEyFTzYOz>2-ZLMLB00SY&}%<Cd5O>qOr>U_+y0jv)-D=Py>T<3ezgcQ zS<EDye!5SdOdgg^3WA~B2T#(c_>gxp$2%eHGIfT<+Eola2YzA<0cwpf<H^ARWWQEY z9g2&+1byx}(}*P!F*cB#6Ku*rrEehyf9iA-TFr$oNoO?+Tr%(m#26*}8vwaxqhm9> zm4!DHR9Z^E%2PC>uu&_H-x-I{(=UGAgGMipg;d+$l1E%{HUZt{%shJiRWyV9US;wY z_YoQHVenx>HiCQD9@IA~|Abjh>^m_21wGdZP7Rg9h2^su=63g_1GNIf1Nb`|y`}kk zwstM~CktRI<0EuOY@3aNafRbe@$V1zT+@F2XrObSOitSmkNqsirhY23%JHxnMI6aZ ztQ=S#JNL3LqNVR;egB->f${Iv^GRuy=E3Ub1rsL8m0fg{th_uu)!ox`L_;I%)ri{o ztlD_H8p5XmJ#4dz+S(g__w@<cbt_VbU~F0=Fpi}8UTyuFD5au3o3EASpmw%n2Yu6! zm0e8j%~ATgWXTe18=F<BY6W10V*B@>LvNzIKqATCA&OI8SX{5(zh8nhj_G~3>FMdJ zy1HYK5werh(^-Moj)qEYS_o{-veMHoa8c7vZ_RUDjvUS1Cj$@eP9Kr<y0(J5OG?|_ z2vD-SIY$`YQW>w(r`meqR~-v(Sqx*b3<4<koIp;g4;Mc5xFxKqX=&LA%=AsJ_UhHE z;VCJ8P@UaIZB5D_U7nqF^WCt26@;4|0s^JTcuGwTERNsJM=jYbeFz7iR<g5S2UzwP z&e@~9ASIFpL2kRx?b{t)UE4zzy8!=%KBF51eO$bB=^HHQDyWQjgaJ1f7loKShp46Y z)&A9{PvR(^s3q|OoNH?);Tw3>r|OA1DXfWajCI+3jC&;_LEadX_oF#GHB#QfKN&=g zp`l1N9gN2SRaTV>WH?+&tvYhkYr_Gj6`7ftRi8h9-{@T%`R$uQ{4ZUYMT$yFuXtA~ z+11j?F5XN#H2O<L`uOqho(mJ6&c`>8T?ApA1gEHmGB~um9fGDI+=2YamGsB{4Co3# zo-2NSeohH@a~CgLCX;2^@*HRETx4Z>%+M=1pa~)iDhM|@=}a0e%+<H3COJPNzDr+H zTF$lk+TugUj?pjMB*l!v<Tie`<#Y?2;)Wv6gD#_CCi{PQ$HZ(y>AQx4J><tNQ+mrF zlJ@QjWh><C?f^`+qjynW*fZZf=es-wAx2cZzlL+HOtA9o3%8z8RP@?!`x7U@4hd!j znnujL)2RP`t%82o-;!qaksf)Ry=?#f{VKSB2ROm=7cX9;#0NZkRxa_R9jY=1j^|tp z4?h<yRod3J2|Dicw6yrx^U^Ep#>B#HFV9AD9y%ghkd~G8(CMIKzcy}+5x0LOAYf&z zYE%s_?b1ZQ4!nZ<j;x2x_&)EbE4_52i+1WbJ=f-_!}lLP{EkoCv1`{gZ*TemF?n5M z<82VqeV}g~Qd0|Z+`t{X?i=|Ww+M2}sr_w5{!mB4qM{b9TepskHFCkurDnrGp||0C zib+V^Zu;=?<E6Q2``^tut04s_s;mEebhqs4^nuge+FWf|OxJ369zA+=!$FS?i2cAl z+6Dae9GQ6WAzK{A@J7=*nLY<6q`NU=r*ZkC!zG(&6pKo4<v&{r9B>`X@f!xatfE53 z+FB4YS9RRl&XsIl*Y3JRM?_q~;ZTYyFE77)_paC7yDOd?_Z60xmyb6|t*xu8Mx@bY zTVlZbk+`3@oX#KGimIxM3=Itln4LtsUq9xl6{18VyVz9GR|rsK*zfS-4&VA5eYDoG zeRJjb{O5?!o;NXxgfx97{ld!Wi2*%<(3XZjxAhbP5qar_wn9mW{-oZ#efw4)^e{g% zfDco1!*LNem9hUc>yWlJbgxB=78PSs!4ylNbhdYNTt=rO#%uof_r6SIUY&=&(c{op zKydIKtiUiF13<!60*Rfs5>L4H2ARs-{HF5O_VAC!@|2Vm$_p#B0|InIfvYISR4n%1 z#y{io61qX_%Eg;)4pmps<FqI2<YgGx&DByDs<__1duMY(O*DPg;?37q94R}FNKJTt z{sxfkfD852)vs=^5fF<`SWjvSer_hmvuemsai`tVM=Opj-prI37Dgcs^lbvxzU-+} z*D^C5?l4-Vo7FL)C@B}X<KR|Rb#;7w{vB_ZKkpFn9AbKVXXm2r+qYB2xY&-)&TFI> zP7154s_L7W`Jjx*rt9U{(7R99e$My1<4b-~`c>J8^c!i9y+KM{4<0;lLJm6kpb3!L z;Q09U$B)<U+O_MsRh!g#BcoTJJ|%N1@00rbf~7j3h)L_}aze=x#l*cSD_e{<e-y>f z`FJ85dDWgs&TU6zqhsu(9vyJrgL5S15;6s0CV$=V6OO;%=n026KjjH_&9QFX5qWtQ zGHVF6I3MT6uq^I4eQs$ntO!Gh0B5qTHzSHB=rpMB%PF}I<O!t;gLTb1`uWvE$bgZV z#?uVwC@9qaP>wc>iE#kewm<VDV>%XH7?V!Z>8bg2I^r(9zNsm%Wo!N|WMJgd{ZZ_z ztE<)Kp9Ti5LfO38R^&2fxwi&N=6e1eUOs`ZU8bJZbE^x4UKQ^b=u;38y5N~G#nl)( zQ9ZA^`s#S>ljRX}@v3?=v=p}9*zkT9or9_#Qid6`3!%JWuy6KIyI`qANOEzJN6n~B zS)p!nP%hQCbw?xC6m%aGc$E*m+)QmJ@A>8W(aYCdF!oi;yD);ITtX%l8-`|le3&|a z{ycL0`0@TVE}(lNqhn*R9cgy&-rahs$9MdAVfB2OXyc|SnVBtZ*U}IA3W4<1J4t?D z&U>B)hs@XGVE6a0FHKhUA+3onkvQBImymEoO-(IAUP~+Q)t-He@K!ko^^FeW4K8?g zuJ7}Ai<}FvI`TD?q&TPB_&Rs!BM>>-*R%${>)i`Q)*ZA-$;tbp|4dWBa;C3%o&g?a z@#^6G(DKpV`sK*MG`uic3Y|y@3GdeGKieQ%d3<7GqHANs=0{JS;C2}C5FHY)M!mR( z#7hJc1^JO8i;awo8oqw*cv;CB854`ENIy7y=kI$w))d-vlOwTj@*J7@72MKHQ_N^J znBaI$Y)UfG^34B-zsS-7D!zqa6Ws`6cN-5;S$1o2IPG2QQK-4=imu_D(<8@4M;2|} zyO$N>wsIf#^kNp4A9v#{K_q>eo3-P|afRm~32j(a_X)`LZfK}Loh~{N5ZonMRZ;nj zL$Rj%i?&Hg6~0?Y&dG76+ZHdeb!+sx)R)@2x~;{gJsN0<&^w>QzL`bql|6l02I!`v zxA!(GZ1I~nR~@AvU%P(&bzPmBS<c&<8s*<jS<9hOb=*&KfV>zG6h!>06X{)@onbiJ zc15BUM~1EgZ^`b~o0=XoN2mA>8}J)m&e}H-85MO6SayAGZtki+WcRx3Q$yPeJJly< zNCgC!)-V#<TFKhyl*n5W@<WjZ6nH&%z+&Gt1$KpDk1E}KC#BgDU?w4G9NBq!Z$5lr zqO3tiuU)&Q4<?Y${RO9`KCW1`>I$N^9ia=evDq!<<;<YFtF>uJT31X=Og5NnWMo7L zU%X=Ts&gOM&C7e)$LGYB*!VmgVhK-AS4}9$&gOV1p!X94ogh&piHOc=^U!^nu*GO7 zY3`K(>01jA;o%5daNJw*<Gn(Uxplrmg6}Z;%aIrz${ZfHAx9p1Hf&y0x6ul<@j`3< z{u|PFt}-3EEGI9|wy<H%8b&a|$;nAsR(aT>C9uAnjwPib?s3y|9#0fl!OD6Kr+s{y zo;`b}Z)|*hw^n95l!fmr)4t%z0BJ@a*&CXgPVPE*!P3$Xj|}j8Z+2XXzl<DR_Hta) ziHSIjkWzj!3Ou$YP6;ulI_+^?@WY9J?hr}WZ612Mx_+by0gfDqsPh3Tz<uFg;OMRD z20)p|&CK{VY}g>S?Y{hvHU>jxBp|&oYfewG^>^*wy|s|*gM@{J#n3=0maSQyEYEA` zmts7L8;|X*GCD>dR?NY{anmzI@Eonj+?a-#xVTSK`KM3esj16BQ?MvTZl2kRwOaxe zB&u;{toH^Ar7c!0gvet6Hc^f6d;uxmZC2{r(MazMA&zR*xhrbXxConZXdkvd41nkl z-xYP$T34>PCC!2Jl(i1__VyQ>vjdL4XpBE#g5OyRRX%V>)lScYSTtR13dT6Ovz-dV zJfokmyIIvhReHto1}On+beeR_7R`KlYzPJh2J9)i(rYi2(rI<nRVk)3M~=`kv#>-r zvaqlOJbF}y%lg&ZYY5bPCeI;RdImBb?yZtnt?%)pN5mXnMK3`S6wJ=f{!ID(>1zJ6 zAjxjD{1T)etAI}`ZuM*v5n=Ncvg>PP!;ZV>^zIZs88a8O!{$eDXy_&{NBYlTp*zRQ z;}z#-+7=!-{pQ-en+4e57?MvycxHg#`|~Q!_2g|(t5Sx9n}<(SGub^B;y!~<#c1Eb zDBWrT5qSLc>H3$?pSM3c<p6$UfRI=D+qc(XcnB_EzTDWr;58mz(*OeDRC@0#LgZ(m zlYa%je|-HqgI0ly$WtkIaTO({b2zTiwNb$shfDQAQ1cD<K7xVt0znU)N&P6nbj}wc z&X<r{hh`uy?lb?gD-+QKzj!k%h=sev#2yK~JA(#|B+u^N+W6I2poXXg5?yvLBy4{& z=ebS2n4Ci%>K&A~l5MA-?I<e>fBpK1x}qYN$HL^0A@X73c=o`4zxZ{ithDT_SHEe@ zwD<^Y@CK{)aY2Ek`R5px&vkW1IJpdDwM$Ym7{|{-A-xNln3!bhzoLB*5>g?do(!kr z!Pmyd7nn3*tQccM!z~S~eIj)3B8OapYW9x9Rb%$7mX>gU>|2<l75uvSWun@<aTMn& z!tu(pt)-<mppORP!A5DwX4;-omjLNDfDqs-^bTi+&p>f_6cohvxw`rsVlExePCJN; zi~mN72|veee8AkCb#~q_VckykVsByugfWSpLI{(a@bU3o#M4JEA|YGe(h_Vd{*_nr zvaSl&p!VUzG)?)=!Xv}O2H3mC(74Yba@(;nQO<Q{e0QGRuRyM%B^t-E)*uw?gTI;h z3OV3uO<%yL92Vx?Ghv7tTUdPBAN<?b*Y}K*lbE4z8uaX#l#~jj+{zI>d!wzb4Lli~ znYj<?b^~9bOt>dQ1D&w@%d4vHnxtzgX=s=L3F1!h0G46|2iF=76rPDsOdRo5-wBc3 z1X%+75P+fqVeGYrzAgFA_kfKqVU&E-!|GWj>2@e$4-bxxUQ10Cz)5fC()gDO@|mXQ z=1sD4J*Ur}U5j1&8sf{M1>dvL(1rC8D?;9zaal2HIr1TRHrWBBB5q;x+MeF=6nrGz zynY#FjRS0_9tqMwuo@g3T;b2k9!M5rSR~<GIDvN^toY)^i~1%erI<}LuUnxDGnJn6 zbOfRvLiYP^P*91mX0l!H7Z$+AtG912#gsAPSzOd12Zw#>>FH!-Q2%fglO}&=PW3ch z9ZpcZ`eeG8!Z}UEtLdqBKqL_A4FGLGbhn^6Rs34FEO`;PB2L%5sg6AETpA||cBf2o z(LQ=d3{l&$qlf&ItXTW5B*n(wK<0Nyl^2hkV;t;1!WDIRb<kcBk$`*m=-ON8lWaV( zQvpui;xci9Mj!Pj+ihwO=sFjtX~^TpOEzxYSn=z8meY`dldEea>Kuodb(m9Q3LeR{ z)J*YAG!)8nIIM6c*N&HvESMgvit}zIBZ~MItAcOoC`JtG;OPT`$K&W-<dtZ4=tCG3 zbNJ0Mz%j-xAfWIq2polU4$p;oG7kezdyup7%E}Tw9dh6nj)c7uu!y~|rW4iV8_ow@ zj*sU>TLFEep>E!~wX*hrR}j7+u-FhX3*ZL3-d&ubC1_)iMPn)C?*=UK-@ktgLZ2S( zsar}RVSVY6Li$}o-eeUNN+Gyf<AB1YJlF<KX55>na9CszN2)tu5re6+SBf~}0PA7- zrE0(CXx*`Nt_W0_`#iX^`Z<gR#0VJ1$H%*&#n-MF5A~S7rSf8W_1d*dFw4zYsE(|U zcks?F45$w%1QpLqgulp7tr-r-IKC<=p-<H-dWZ1W4G0r-6#K3-Ug*3V_S;;uJ@OSB zZZVMbBb;W*uIJ|ASp>g-)r2)u2>@jp>=Zl{t{o@c*63ZsLDewuR|F7kPVdMAQpoq1 z)BGmngF*tfeDrRjysEd*b(Gqj<@1XHkt?w(y<xlGs%%4>xC&ClRmA#b5h|d1-3G59 zY3_g{%~$dr2RK1d{c+%V(2x7x&6_v<Ej>NiSi=>sEqxIgxdqR6*cn3YgW!Y52}L@w zv|!ILBg$qya)B=bMf@A$N3wEqm(eHi;J3>Nx?M0el`B8aTZtIz-3Jf0g@jhWdPPIJ zXJeMta%vLyLR6Yp?wc2RA^kLqlLz?ySHON%nHE~lBwg7dnF$IBb!<#k=ah20iPtnV zHXhN@DUdmt!nBsnOPW8wD2i>c|LG-lb#^>P=hllCd!&yYyL3Wb0>z@LzW$-OGZ0lk zz-8<~+Y{TKVoCV3JL3G&F=TS}EkC+vcjrBLzz)aX8!MT1#W+1&wvS}-a7wydgD*g^ zO+$|DcC>9F@ZZ*<^MMZ@FfcMQM#aVghb|)UlPC|BA$Oopy-NBL0dNY@|CWCiICwjZ zx>coG;PfasuY_aDA@>VHENy6L;M%ii737ekn2t0&&`mqffd#_Uw~b2Mfb=GRKDDwQ z`gZK5H1`3gK?*Lm_l+Bj=@}W(>^l@$N14N}=r8DmX~p5>4$IP|OGn}H%)Z17LB)eG z;W_BHT^Ot2fakn+f~2VEM5JWaa11CYWDxcu(S7@55XG8whod^_yms^Cpp|r)r|^J; z+oT}EoAl^qVEMR>MHHZ@KUk~n>Fs$C?ZWYFs<MwC@0|(3lPl?Q8fU}3x`Ug?1yQb- zqu0N|V#J!{w%jCcw<ATj;DgZFK8WYx_)ToM8`rO2&u#(!|Mpn^*ij%mZ_oi0;+~!! zMGcJ`IXU~$mETJ|D2F&a)Lp$LDkg>--3&bfdXqHF8LrTxxc)D%@F<9|m>2@++|F<T zQ!m0?&UJepU55t)eI)$jVoNR?s^sb{EiJ7r$lDvfXoUO2gvsDh(xwK!9PykU<%Qa% z3@nB=71fCQEs3o5?nKR9j40T;SIrPMy+atT|D<7k4nJ*jditB*!t@#x{bG)zs;aD{ zX{-^@z1j67&;%f%?9-<bVB-=Cbnd~Ck)!CZ&w_)~a6<Z3(h_kHU234P4i6wY;r4VF z5b>!~%$T)LJ9V$FP+oprmM8GT&ZybC9M1)hAX_6dQ_~I%ICvNU<guWvj?e?n+##v* zi=_^<CWy~plhx8%54@o4C=AKq?UpOvQDI?+zG>_HLKC}^GhWMp4Gn@Jvxt%L_V$iN zUU<mY>$h(gArE|I2Q@c0ch1CQJ3oa?2Sf!L*zO$b*BioiBQO;`c#0u895{>`a4CW8 z!s8+cjim~Y`au&I@u}|=de`3H&o31Z-O~^c1^BjdG$`w|H(6H@Si)=X3uvThuRLhA zjR$99i@`k!zn}(~Vo^?tD=U{na((u>Zi68+I_tNH0jwH4Ghl`P9yYUq98?SsoJ%E8 z7eoz!^QMo_737iLj>+!0XkoE(-8vs+muS75LJ>FJUFLkj+=uB9&BqTPu4H6nsOI5+ z`rEn+?0X+f`Y)rf=Wf85HvnIwC_K<*a%yTx43+OR#O>ucQf|@k{P5ttn?BOq5EG6< z?Q_$b!L@wLVfNk^+d-aeI^=;f63PpARtV_h=?KLr_RkR!$5w<w{SEp*0sYN<yQ$}Z zlFWG89#mcFOP7S;!4I~-q62?sfZX?EW9qy4*%5`-mFuJr9il+?s?!XQ3%;wuqj4al zzWMZt1^Dh8fH0Cu*MoyMS~h221~HxmMt_H}YQWQ{JF{tSz6et)rZqM*W5Savb|KOm z;SN8Jh|QQFv8{>wK@>@WK$)akKM4-Bw{uC|I^Z$Of#+P6RaSPbefb`4bAk=$o8@gw zFV!YRoQ2wi6Dc=Pt0=F81U|@xTMPA<$SEnUM0LN7N0wsF!$6-N?KKU28@A%L#)%V5 zSZk2nX;5hF23mt1qcmY>tVImq(#TKwp`mf4dVOMlD@g-fHsI)GM^BFr{&@559RubP zYvM2<BFP~CIGQ{BuDF;27Q2?5yc@MDJURIe1oq<AR?gx)t=D*L!C^~F{;sGOygPOj zgF39_<n+Un2jfjSukAVhJ@%(?63<&_7B;1G=QiVP9!lzE&`>7h!u*&{q?d|;L$2Ti zz@XoeCt`dQeLs--VkV{|IR2Bp(*s$nD`yLhflg&*We@4=bHVJo4$T*n>Ju8=1QC?z zYtY7QPxCzmX-1`{U6?FASXJom8qZinKt1o;&BFqF_l`z3c{@EVthr&Uz*LRtmdY)# ziRKEw>FYb&J)XdDhL;v1phjir!BQL*T*3CgdOP#5oYS_C--<An_B8FQP*IbpY2y|` zqNr(XQ!+_In5c+RcOg+~3{Tm$m^QLZ7@Cw8Q6o{=n^b0uq~)Qk<NaJb$NS9jzVGwz z>o^ogb>F|=@4Bw@Jip6%rrwWD2DRY!e8Js&_eyWvP(jAmnRC^HvkZ=sB_s^#81dp6 z#A6w#LG5)z+;w)<^Yru??_am&PjR0U-jP2}UP7@en0E?Cl4;-YsZAU8Jrpcm^&1zC zGB+;;x^;FxnlTu~dDV<bMHkmB&Fp=DQ0;~CFs;5u6ZD<`;jNzRW@7u*KF1sN^@54) zdQxv56dWuink;^U`rdDywrDI4dT`#hLvx4`$%z0|7?9Mv!LDF7#k9zfES%er4a%~@ zLdWbg^xleEWb4kIid$V<H&TA?4$jTWD#mqaP<g4RwB=Y{-eB5@J72s|U+_naMOOdW zQP?|k9%$(5CQyCJtfvfHmWbhcGeh`zJag?Y{*W|MsyJoN!;TLnh#yhBFuiU&ZLfYD zNVn<Jn{_wOeTYlr#Oc$j)fQ@pRQ^_R{h%<>DN1fEf^ha33aaWuF8XF3`-^eL^>yf; z<(aqef(Du`B;h2SWhOpLB?guCpN{%eQ`4<oF?klYhKi^@@@hvV_`atgYjkMZ8da%H zlO7tK<HZm;gJ+KQKO8?k8bzbVVM@#qx!Fxv!BNJxvV*WGeIis8iqsC@nFmjww&@r! z<aY{=LgOT@^Xp&a9Nwdsw}$$2Tkj5QIOm7pan<V3g1dL`+AexADX1pJ`*_6M?C#!B zSpJF5-zw?rkLfD<c9W=QDA`s|ukXxaTAbkMC{AvlTWw4NNkC<!_DTu2=>7_`c%gCK zovmi&JFUl$Us-I@ItLlptgEP7F07?u2nh_%K2ra?Xq9cQ2$1Cv6(_>l=Xqo=nPWfF zu`|T^CVX)T!XfOYq*473MnQ@}$oX*vK0h~m!Wo*PnJ%W&?vRe5#ZFOku$Kxx+YXV~ zG-Yg+>|h_Y!|WYHD=Q_wsgwdLx6su6W#p@FQ`#Y7soJ2Z4r6DEOGwy^ZNi|TDo<zm z+y|{S9jfybx1^;qgs|+*)j{1{m=%nKNA{CvczNMtWQS~O2o8umm(h+*@Af{u6C+40 z3e62NWyu5@0r|m;`i4CRt7K#Zf}QOA`Ok6RjJ6H#*(lYH&v$EXX|cw|j%2N}Q{SZm z+PxNrkcVPqY1zK(pZl-G^v;SG8x?H;=<KRk0fQ(?vdu-0W@_CMr8kI|rnj#gKR?ZF zzQp`~68$^qn{U944p2Gwi#E;=&fI~ypF&I<5V!yP`b8TyIMM-VsItv($&$V{Ha3mP zaXTJsoRBc`GP9y)kCHPfRGt*fOomUO#gxth9kp;GQB>^*zr6ed<xC8A*qol8e(u-% z_jQQzoTW^;q|+2x3z(eSEOX?(N-Xz5oD4EIH)s5p8U{|;$&=Qa0q3aYX+4T&n;?$P z_*Hd82kiPu3Z(3Nw<w}C#>9*i3Umy0C9$;ptUKzgySuw+5IGDg+ppT-;p*BKV?Yeq zY@&%u$?e;kVr1;QAa&!|ItPgLp3aApDl`N3?%iu)YP#9F;x_D9DmkSls346JjaSXQ ztE#G+x_7VER@V$KUtdi@s~9D_HD~7LRwe4YTyr?+jv!p_K69pkC>dvnan?Mlad;nx zUU~9+UG^d~(<c&%(pvF50_N~-3GiZqZDPo72^S09t_A2+>xi%oL%2MeoqYxTQiG~e zO3w0KzT85@bz-{yH^Ax(k-eikQa)qex7nMgr)zFAJF1ky2F1kv8e!MrUTw}W8LP2N zOK|ODqX@^R$cPmwOZr4W88t8-{$Ooz`K3!T`ZP|m3ajUn_d`QN17ON@cG<2VBT+2` zF2|p0@%iU_A3b_>l31ysu3pZ0yi8{GU9)CvM~ZErpI<Q+nfdKKm|3jk<zp%a&E1Kn zaEBT+6~Z7iEG!TgiJuUI0<N^eIs}5B^U~6!!SkW_#FsGxHznnT;O8tsBg(kTQdWuo z7<#q{F>0BFa~AQ}TYD036bT=WSr3hERan>-Qm?pUhctJA-LVHyv`ZZ=FM&tdtwM$z ztZ5Y$dg<tu+FE5c9bhPQ_dQ`C5U8GkP|3UM6Io`?+I8^xsIx9q;jpBWI77x}#>E%b zb-@k9pyzo1j31(H)tWVB5C_`!Up_#c5o+vX973*dASe8paev~nwv)V@o4oJoz#%%i zy0rj5HFb4=gwcqer>O?FY!wL13I<>Sv~O~`ie|v7wQDz1F$z-6VbQL&F!y~G6nrkc zW9ydD>!@Yz`9M@*oc7~qB<z~aiHRP@KjvJhtgPG_a<-sgMTW^*rgADeIsFoR_AI0i z@>W7h)_0HF+KjRD<wyoADh|7Ev-i|txuhh8!<m_zQ5l=iAEfLkn6>K|r3ULe?++iF z%It-bn>SS%7X=g5s=KxD$8+airsukeXhYoORYTyvPY^&jv3>Yim3uhhn%de10U<s9 z46eC$%_FD%UGxTN>Xd8<f^)>>g2|rl?mNnwtdFEPgpU>zH}1nhH#9U55YGW+#IeR& z)|q%~mECW$;jTwww{EpTm+m2@XRjFZRnZbE(j_qVI^WmN1elfASgXA94+*(OBgdll z9<X~1W&_*BY3Hw0R{G)H$U{}Mu(C=ba-Ad@pIKb~$x&!%cv16nfBdc)*?ac=Uu^{B zr)*EbC_Z(0|4NZFcLsu}fo_gHNl69;md?%xNv9_So#y5fJ~dItf84lnc|?8jx4NQY z3pRlr;du&^F%!H9!prCF?7m<bp1|rmcYan@tyd%%p5gWztt<PprF|*ib9JVzXFncI zPyc6aizkTud7QnSofQwZD=v_a<1lITP;)nSJ@F=3wcY$tk;Kg;_A*4gRJef9Rjd4P z#8+2UZO7XcKhoUXhCsP3da-L!t$~vCNpf^_9}eQ6fPj`!XU$Aa{TWgZSY6q=WUZ=_ zQWOfks7*mQyVpPKz_*J4wTvPsDmT|02gr^wMK7fr?lUo>nF@-tRl%nt85w2v*-x4E zE6@O)J0TCQ!|%b={{|U({o<xg%v#(M{)3jerDZubniKRro0yr&OCVD_A)zi+RSn9? z$szv!N+}h^_>Re)xixiAHL-QZl>2a~Q=j-ZPZv%Pr0n?kcr8vAGPBQxnI<X^AqNvu zwrmJuikkPLq>Jx*=3l!qz~GnT<KA@4`;~3%N-c*O(azLZA4}gQ{1Mm5hK-(DIz!|P zGxYHPDdTFY*NDQ@f-_;GI5%~5a~qW_r=B~19szH6_+YODW=3znRsY?g%uG`@)wPk{ z?`Hp`clv0Z+3><Qa<P@xNV*o-%df6W=lYr`lNcNvKk3x#(ncqTY6pev3cS*S=3}iR z{{!G3E25dhq61Iw77T(i`&~DtPEI_7bZiTT<HrvJMfw&5t5Z@6YL5Uwr$o^N-~(g8 zD;)xl3Ayoq^9vfUCScue(zBjD7wB$>V!)rWeA?b2meM!}9AmY*u}<EeKaGhYEX-`~ z88hK*)WGa7I45z5i51w^cXAYFSnP3T^S#B0sDHrCdl9zul3TZiy?=e{qF+FOe1#^@ z*b0Rh)G5aXEHqXz3I}0>zqhhN@)BWd_`f0e3fYoB3*O#duunQAX2O7F2F$qyUK?r# zJZ}8O0Z(YTgV(W7wX_!FnNIoa>v-2l%xJvwpPx>&N0Nyrp^SL`iGs8i*s|YlwX|@} zT_zLJMHtQE(_yXWIKail1>aWpjyon)#M9g>-dRgiv*!EYrgD?V8mlf+D4vGOf1_(# zr<KE(0-#DA<X+{I(YEQm=ZS6G?Jn3tvVvailA}RRfsi6<@5y}u)i6R>Ww9s8b%vyt zQzK)gl6_I8AdEz<yV&(x;NksBWc^gyX}FmR4`!bkeAjvZyw(6z4Q;D|*@*`a>ajDB z!?2!uK}d-kZf<8uFe`p~Io9^aW5<rob_(psPksk<G8i#p07z2pEaX`U%Z{p10+zpQ zyf{YKuezt9phR<OnIGwoP_xL~#?euSf!9sC`P0^K3Qq1FQ>|arKX&y*AJ?Hnho+2Q zsmCad)wFKYvFFD@Zf@s1yu@Z?KRnNV@-u4;&!d2vCiRW^U%9v+ay~FAt>S}gEx7f+ zQ74(|@txCVAOVo`pvKz084&qhHUBO(%lfUOUKYC;!f0Lj@*4+THYFkt8)65esv0nG zU^MN35m3}lh^5L(O2yWhPlZQu<Lfos^lT)zPcK!Z&09v^=o;uVgRakhMX#TjgLQn; zHnLoH>$*(@Vj1~msW4<o`-Z?{clP#frKEIy19>h4dTPOMr%#bIG3@?&U0p1~lWQcW z2)W=$TH2+NU$*lE1dZvna}!}Io!$#zXe&ZIeFzE2d@Z<0w-N^=P4DT@PWt|P8Hs(y zyM-e^+1nozm?dEvo5rT+FYYXTZZdEs!Nq&@t))RhZpQA9CXk&n<6R`6&VnXBL)hkb z@A;D_*w6m;eaa8mRScF=u7~kJR)9M$!F+jF|JfMvT8;k)oK#f|Z6GtlX~9VR6-8|* z5!N%*jZz}7lD2l0y`iVc%!14AwX<<;B~PQ6U{K9Y_6;7-X+q7wG+*uDBW9}G;!RY5 zh<aR~JB$Xr_Io5>@jU6KDCT9xLenD??1>m2K^-r9+K1@<_Ho$!`S+$wzj5w^zsn66 ziGC$)1pMj|e~blIwziLSx4-=Y`hs^bT|F@@G;|Wy&UI6bU5|4VU551j<+gsZ?&MjD z9ZFJXsHQJ`@dS?xxgSTs6{X4IQG+ghK?FzdV(kX;yEPhDyQv=L>=7nZIv~W9@C2ZZ zmn2-@)9(c?${c2Bj3PRw!6Azmt4P$Xa!|>ZA<Z$WU|+RSPi+3t8vz<0q4Z|nTX_BE zQKUf2UhaHYnyXXpD;i)EQsOrR(LHEoV^fNEG-9gZ%wni3lg3TA0|HmBWRtx+|9Ac1 zhQVVED_#kgM|M~Dtk^>^2>yue1kowvmCS>0-q1}!9*WOmppMrHNJ9vz9D~Se=LJ5= z)?3I93={Nn%$lzJ(RR7OJ5=HWf)&3x#`vh=%-ypis}ab`szw#nK&oJXfrD_eGyTSQ zF84jaFr&h`Apk_Xg@$8~E5U!f`&vCZydRd>;}IT0o;NUz9g%c(&gT!#n+=)lxxSh~ zhe`YPZ6`c$zlS?QfP0xt4wFFu9ayuyy&t8VdU)MyRjg8)gfIIC1}eZDXXWOWg3Vw? zT6MRNI83H=T>pFk;w4t&$@AuEA@V8f>RP5WD@q8`rt57!a<9=4WC4wZ>+DuH_w~v1 zEwT3h*9jd>9AhAl6P57lrs6)JPJI!N>_dlB#9wyM^HlneY0Gumw1HrYu(k%vZOT<h z4yD+~Pp)nA!L!+4cshUr#o~=Ti*XpOf>v?G5_!};)hI;UA<P0e)zs0Uh=P^)XIz?y z5(f?(c)4fU*E>@$TyWTtkZ=<F|C-0w`#>gdh-sLO2LJr|^Rf<OBcmp0$M1flvO%Bf zjB6N?#le^M9c^wdidL_7K9EbAlj24gO>VQk2?c{IL~<AhG|f5W2$hvJH8oQiix*M; z%_^~Wam7@O$F2_Qfa|G+j32r!&dBR0JsV19ky-H6R8WaRw;P_U{u-R^{Qhld;y`VU z2Xez>m7?dLS~8IKcx869wzf93EhF)SS|NLqY}kG5)Ma;$$NhjaORlJNg&ld30-Z3J z1ug|R9L~<(My|@r%8EnHD$>P&)S7a_zjmyZSajj5Lk=)8HI2630`It(2n&PVq?<4z zj!|U>hK7B>?Ly?FyD%v=b>!&1!t;w;u>};gf7jLDKZe*J<~{q2`h@_mGWm!RMlIp) zI`sVQtI!?V<A(wvoAqp<*t^0aBO`t1&BI?E6)@ck>7EBO!P}$qjWx6?m$xFrHDhE@ zj#FU#7Tli$l(V>);IlM}iT&!g;|RB*JV#j?8zsK#X~4{g6qilT9{cU08F?X2qHvUV z_{J*PPr6yGB#o1+tgbGjvY(Z*Z(j>w@Z8!q4eliCrE%=sZ6Dr2Og(x?7lN=%Gvaq4 zxT6JH=0jLZP%^q4gu9sQ_!|{O*s>;SHzwoL{%Go+K+Q36;X+-K#6Pr>Uls9;$1WZv zjZ+dKJ$67H9pTRQLk1hh&i$7KRJvDFtM3O_+>J63wsFP%zx?B^%e<EAJ)c<wJ*BH& OGHtSl>v5@{?7sl98O>+_ literal 28979 zcmd43cRbc@|3Cgl8fX(0X;2{~l@&^nQL>5bB3s#8yQD!W$*73zl)Y1klrkbKmA$jK z@AK%oyYBBFpU)rP$K&_M?|R(#bs4Age4p>*b-Z5Bb(}X;l#ecF*uX$h)N%#+BWe^y z>q}8ItxFf<C;PrMwc$UD9Ap$Umf|1JrKhjs`(^g>Cmkqir7`(G%?s%m3;a;jQC7!M z-OkL>#rXUg%Es8y-rCO5+QMYB^O^Gw7IwD60zv}9yEdP7bhH;26#Vb+1?<k73;I$* zM<{ADrEuhshHGeFvzO<cv4w?&89s@~`ePC)8%ly-w{wYADtua_`bnK}%z2I2vKwut zyjK-JDXKMu_FBKtiQ4*6p`tu>`=fg%#~&F7e>3<lJ)7&D7xKC<tAAf|<JfsOeSzDZ z=h!Pde|?^5Zf_Y8z1?Y?qMN8iLs5kmO<MKzcn?pER#W&sl!;~)ep<Hb|I=?WnVo4C z(%nYP^2Qsi8q%yD-sX^q?&>-#IsAT`b1B#M?LAjm4L(O7e|-Pmy~qvwEW6&Wl-9^P z-_E7*=+UDMlJ2Pw1PzlMIzPo~#%sCH^k+CtG`oDa=MWPU6L%kdq4;1I&&^_!(G9r% z;Udelp3&}VhNVkC+hoj5OdYt!=(~RX`Z&$_2h(G{F-iJ49SsYOb_KB()zR+L-7)(t zs<_<T-FbzCR5xwflx`~L&-S!ZYUUW9R)Wf-J!ke>*6eiZt5?l&?pI4ODYkuaJM!oq z-rc)D*E`|Wf}Pvm2Q2J;<MgQ5GEH~n>85SF)+r^eFEq$=FRQK9yvZt<|5L2HyL<Te z?=L@#Z`QsxKcTST6RelxoQS*Na@aH`Cg#J34@yUGv#Z9cJs<WK!b)5X6?am+aN&Zu z+i=tYNy&wgsTCr0uKakj(?4$t?lr4mOS7s|&T$#kQc<~Ea%-bG;|+3kT3Qbp(rv6H z1UGL!xX<dVhP8E{CY_O)S@a%ZVVlO76A=%%f_OFksHGIeUiDEbS}V$cR^j;Z+DF3X zd!2eTBqv)h=@xkBZSopbb?UB?eZ1c$kn4F=)ccPgk3Uy>m@ZLQS$Tqohex|#V(V6! z@bK_-&I}(KsxUE5p1v?NlwCos^HbPZlS5^!bAQ7I3FicH=e`#%EpDA5ow|u%t)kn` z{VL&+lx!ZCINx^tO9O5myKDf1FCGzd>eMNXJa?y@am(7c?x7!V1G#*BeDpKz4_R1P zIG096N0(Pt4#s)q<m5EkPMmLKrKqNIzZzj*TU*=g!Tg!~_wS#s%eVV36(neA5a>9V zH&Kpzzj42{%AHM;TneSPIn-rjE=He-FlgOJN7?he(oM{E{r&9K>(`7q_w~rCw*CAW zXs7f=>=bptp~HWC@Vi~3va+(4r)O?%UY=&sDKW`~*^!=auVX^?Sw%nK*W=y3T`t0F z{89egxSm2F)-c?A+TzxReTrww?+0XLWXRpwl=%6xN}6qR_T3}~%2%)S3L~pyU%gP6 zq(}NorL5;cJ5^=Qe0UJ_`0?Z4`5d<EgY63DwD|SXD{5O}PH@V~$teb|6FBwa`t_xA z^D~1C%a(nKQn*)><zU{J(pec%H&Q8`j6eUzzi7_Tpik7P=RWErC_MaAzY#^vOOr|& zt25M0&=H!NoWug&TDhIgEKS$xM-e@TwD+lR-@e72$~vD^96l1F&MrWSGth3PsH`w) zAA_>8GG4>{T*do4RqNZEsy>BF#vi`6f@kN>_XC$$)mj`B1KgJ_Lc_cIcxsky{_*Ur zL`TFXit@d;g`&le*I)BsWTPnN<18iKs!x7#gf{$${&e%!t=hSX@5%dZ8g_2qUSh9V zSC?UD7<Rx-EnX{;cPK^BbTL|vXb|p^W!LAY4X@7J>_5Eb(^XQbp>lF^SEXi$4oi+y z2=~;dMB3cQLa{Y_O*Cgo@o!{h-Ep({8Qqc<@_}X*!B26|o4sfH8JL)ATHjo9>Zn-X z_&XpVV0d`A>Y<@`EGe(s5(A$vt(1CUT6$X{u;}eu78aK0wei{?Oj6#Pq||@iBOt%% zAVszA)k-=Qd-v|DE0-^K-PjTSc?qZI=J?vUM%$d@dG2FXRnL|BZyTmtKSHmH@?+ss z%pNrx===6M=<eMoFJA2Qnj5RzAZ+H}EM{IAUfY{okW_q=)oP^UqqyhfOB4sc0_kb) zqg}>~S4esOD!Ikj@pIvVImP&mR6;#>eXK^TTKK0<_hHsL(>uLohNtdGCUpk(^;w@e zbB0G)Sop-R$By%}QyZn6-&~|8rTHmBI(Z$xZdb)4VciVduuZP5G)}!=j|-bssHmw0 zmOm6s#(m>A%s*vX;-7L$=<MgsdWNW>+RMw<?)99s{}?QEIPiW@(DQ_ZU3nv)B+`B4 z&;=6=yr<46RB^a8Jw^NC6BbsVpP!GOXP6f@!+h{nug|cHa2{xi9ca!`3dA<9Nwd<H zm6cTpw5&@?#O?E%{BbQX3VT$!^bV(XU6Q_ymvuv`0*APxO0geH`>$V-3v+`$(=&qw zK>-2A646I)tZ}Bvv;Ou<@%VB6>q@!Ce-?vz>$Yu9zkkYV#%U<HG@ZLPkoju`zs}2p z@!H8+y1F4;vD(Q7=~U+VcEvP{YB|zTxVk?-J?!2VaFp~F@}^T#QbNQWqN9|AeI90I zXE)rkO3`UM%W1F;W$UZgg5^oG{<i0Mn7C$RhF!*-b6@?_^X=spxX8e$a7ho|En5zq zJ9p0S_HC`oa7i1lcg4lsxFI3?Z9+EeJ^Q3Qh_BILU{bKDsp)=Lm`z`U?c&ORo)}#| z=)r^Ne6N}E>gsCS?~!+;;!q%@n&VWSUfi(X`h|J8n;fg4K@CQpwM3Fhu^(wpE#5OW zPF<f5>1Ei~UZiK7ci?B?)%Z+`I4X)Oh(}!x4LHke`N&3LUsUS@R#fqlO|F}@C+|fa z-Q;#edVVs}d9a1QyUM0D!h2dFOu|L$_s?RR(%LvprRwP8)f??{pLz`DX=BQL=1a~U zj<D+fUQi$-BcqXQAc>~Yg(WGNV$4xk7a!St-+5Jxav14zRn^s4gjmmw*D}ig@5<ca z!_uKwh}NV%HQe4lFc2QIVZQ9`+h5U@bAom>toL(IRh}WgJ%9ey#fuluk3FmlEn-5? z`L}P72K4{-4ckyLtKnHJ&=viX<>z?%(0(Xe`mx?RZFH5k{{BSeFmV=VvAQD^`jwpP zu`#}Snzz+<J*kO$8FAPdXa+%F6Hb~Yuyb;9${sx$k6Rt59&?+Pj_%f&&4vtLQr{<2 z&jy6Mb*wKgDe1vra_ixfkkCVWP48C>;<cC<XkNiF&qc1?ek(A>blbfXdyNXIG`sHx zN)L9W^f%_Zz4l`jOv1~>de2RhK13Sf<;#m2g@6qn6ciMEP127Scj|eM>R7jxm0vds z6+;lgmMvQ_Y#t<N#dCERU14+||7Nb4Zhg|t)%AtU6$ZcS*LT+6Tv+nwr8jW;%%<>a zCnfM|#&xBXZmNDJd((PL|0`BQPu10{SCf9~>E%_Ms3(Hn@ohXp(nBjbZ#;yQSaET2 zS!wA}Mpl6u44Bd34}yGBQU;_z;*;~^=o#0cuU3X90#5uvWN0K0cAb32z{1Pi_Pp`O z`h%oXVsnnwo8D0f1p0{g_h<4N`6%MvdSQumY3}6Do8&fgS)fNTtXT22r#7Bkh62GV zW*<YIdkkciIx8PJav8|a%wsAN51yXtj2LV27_=z#r6Jvtyf6H=+$80N=hQI0Q<?TV zQaJVuGvoc$pTZ<+^E_M>0&m{DneE(9c2>?WY)GS-*O<a6)MwZ&dKj{gWey*<X+#gI z8!T8zdiioUi=e?NZ2E`ACSN9h|8i<Aq)|}&p6|uN$(f9{wS43LW4(3Bm6b|)0{=9+ z>;C>Vc)Vt;8n@o#TFapL`1q=-swB6hVw04)sq=+7eZxwdg@lAGTXM6wQ2B2MX0x4o zdBwy|_I^#Q);9EdzDd&k1?pgW^4FaaPkoU~<mTCb$iZF*poorDkFmr%O8*XEtO3B0 zLm%qWS8qJyfd1w^H>QmjZjbYtnVRAOy8G@q@^PQlPR+Rc?&E#2Sf#V=<w2zQ9DlyM zk!#p=+&+`a1lX3G9eN8Kd>3yL@Hff5ukZrhnG~J94jspG##2-GUb%9mth!nqt%QM* zvAXQuj*%QktmqR`llb*mf1~z3%bJ%nlG9z0&KVcBZQVNhQRMsC!Ir!YyN!Iy@AEh` zUkNjG@8<p(vQO!?*~cohxm5GYO?eCRGa=$maa`Sacn!wm=m)`qv$(x@o;z)WgMxyB z$)0%w@#iwB;DCUqx@ndc15H_V2W-)u8uGnzXlZHB_xCpEWc>W~i)HQF$lzeM;?h#S zUAuM_D1w@;5a8uiTzO!~7of+X-veEK^w%9}OYFwLC~X5bSpak`OjMwDA+OorrRN<S za?BRyrpKyt-9}P9r_O^qc#(C&w2Z=H|1;je24aEQ37&KuYCV*wpQDx0tR<>i@gtnZ z`g?v}Lx!EtN7MPqUn?bN`d&|Cr_??@yd=l##*G_(H*S1Mw`sIqc}@UB2{2F%`=p-} zZD)7yY`wJIbd!&?uzACd2+#AiUQRtVU$PuJQ?Wb>YPd>3`6Rwg8`rIKu2p!B3m~&B z@Vw!Ph55PK0-uG)Jd%c@O-!w0ZEbBkckMb>DZP+uS@R;mYT;|5UVC@9#l+$hZ?7)L zSF!c=^%r-sfV*uGBY$x0@&4li0s@#Y^(+%H!0~l;BHB60$;r67qz1cxd4Y0Rxkn0x zyZQn+UO&g#0!`8H=FQh0bJO3GQY$Uf3`RnMLyw2-Et3xeP3*!09%18Y*Pmg&zSW6w zGG{Pf-O@4+GjInhD{EF}CNSJ#;G+Be4!l4M(<7Blm7CcCJH5TVwfjAqGS4ZVIFT`T zn_Vo>@F8e`$_u|5CL<l4V89leI@F#7>N3`_z&kKX?$!oS27L+VJ{7DVw*qFfw!3@w z1`%7&+Wh9`W-@7fih=1FrzR#Gj19c!IVAp943DW<GT^GMtvxj})7UbQsFfJ9$#dw? z*x+}=k<QA8T+7*ZALNj5Zd+~y7TGoYiW>u~azex<tohq(b8h`|@o4aarYwgT-E?b< z#!X`9qqt&EL`eDh`{zEDdLW=5XZ-eRtW9HjU=${gzKi}L>~F7iH)S!Rs(=3~<<{37 zXw$8%XT9u`|IUr?rEa3Q$9RI8x_Wzi`*TeF^_*{i&>G!b$3I_vzw^zdB@StB9v)U@ zf!vGZG1?wx<>V0f6&o8{GF|aumo_?Riuf@hA*;^zc5cumy4to5dqUks_unkux^m^p zlZjgGZEbAbuYvSMyas-}y>_d{NX;PWXg2fa5z~&rJ!E!d@;tO#^7!t<&SfLQJDYbM z^^tld&re5(S)5Cd5db}T)c@OH{{P+IKmd3wV)qsJD#6#6I=O3IqHbDwx*#8)l3?!8 zMN9@KC<=V&t0$zUqHb@Jw5&Udu3J=8)Yje2bK=Aalw)^W**$tM9<oi!uu9-07*)@) zk2@M%#`{iUj9L_3UWRQ`9TVY=K0S_puIyYoHqfm1`}c1&2-Rc9{7E-kw@yptA)co7 z^C^ZLUmvUgbe3W-G~tGq#+URPL&Y343cS5Iq&#)d)zwW*o|%}Kq}nv@-*@)&!N63b zH?-)<@(KzHV1;E9R(LM^)6fSd4WC0w0E+>32uyYA)q<wcP!z$n&$?a(&+T{XR-UCa z#1vqS`^_tH0a_YSbgfe(o%@{objWo+R}A`x@1sxDrR3-5D=H{tSsDUBq?<36l$2y) zV+*!(LTNBZZ{0|}e)Xy*URw|q*43Z!J-yL9Mdv(+U7Bukz4y1wbHDC~huh9@h@F4u z`Vkj+yAVw@clF7WCwsm`UJJZ;Zw&0T(QJXM?E7~^pZO{CB!}Ca(gM=b1@YtE0|Nps zE)$-yVEKgs%vtLTKl~W1GxVt?Gjz|H^7?)|v_Ig`G}F=*dX3}#jo`gWP(fH7v&iz4 z{xG8|L&YNJW+ocb@7=qHi7seoXGgzi(F*`PvO=8^K1twFjSgNdEiD)h>S>m>qpg=# z#`!GFhe-S6a}nqPs=3XM!n=J>L)S`R#;Y?*jpOeFws9+XO^>R&x@MwHZc~WU&q;4~ z!#uvFn|gMOf^N2B?A7HPqcIH>197Jvi=F7v;X(u6(idXdyhk&2T1Xq$?*KgIR(cS; zixCU)K(#HL2E*cD%m1#qu`TljxFAT*W+pMCMXkRftv1KS?)mfQkJt$Cy^05r(SXjw zt<dn={0=(j^J|;j<(I5rf0_7e2_8~vbef~Et?jgCvVnf!XxWYkLV-i}obmhl^XKU2 z!z=$j(IXm12Zw6X@)Qic#)A48*>RiFAEBF3qKmf)`yPM1zZUnQHcDY_e|^efKJBFY zT>Xt1CoohqZfwD$jIIZ)FT~a;FD{lP$h70*<CvU*Y*eLfvp7+iwqM1ge7DG%Q6GZ% zVsP09v}YdM<y$=Sz|YxETXPhC_ntW}m^T`@$)iu`d33a5cIttOmOPK+%W*R)I{!t5 zxCoxTdo?f*>d1x@Hvc3a=sn%N<9Mj(mv?A_)b4lk6gxyH1FtbnoBEfBAQh#L187+S zj{SXo+Ey^TQ9zOo%L(Xn-+dNrOdvT>Y$jZkyPI2geTr#TgUeuxjmI`<8r1DK@>Cx{ ziGk}6dZLS&r0m?W<0$?rkH|fk?p6Q3a_9lKSD1%?-nRLtP}Q-X+S-JZ!uxESbjaKK zdz+jneSQ5j$8NQ}+2Icjc^(<(&!5+bQRaZ+^zFz5c^B{HsoVs$=Zb)`fn@@C_eQ}1 zRB%{W;`{gVHcgqT7zgELWhspd*ietzfQ_idsD!uIR%)L<9VPDA^#mLO&^fqk9ey=c zC{LYAF}d<bPQj9I&qjS8l-e`A?j;cN#Y>ltJ3<Kd^)%z6ZbQogpbM<E-1mh8e>xlx zpjW7uZv92%yJsX}GpMigPEKi!3*9jhA^*Oxfq{Yc?7H3f1NJ)s)a08boWWh>qUa0X z6P`-#6vx_0FU+bN8ygc~3fK;GXf}cC@cp&>JY%Na_k05Sw;v0B^6JcoSF85o7j(ko z>xNKWF<?fIyH?<7rry8jZ!Ii5Xlg~-w&b2%DLwlVknwbiPO4=sZ_b=nyylM|Kk)o2 zpuDxuWEuOLbLs|y|0lqEs|H&`rDpvyUzAV{7Tjs5_053^h!KBLsdkgJ*R&&1M<74f z;-xlY*b<omf@(TPH8vrErqBbiKmG39(bmz?3Ae)g$z);6qHluU3&~Flx~tlaHQeX< z7kL4pZybongs}XcKgTkGDg7~2O#N5s9qoq#`dkXCs;a{yBh`5Gd+l2fa&O;Gq{hZ{ z8}&<zmvbpFuG^&&DR&EkCxGkS$GM8QJ$jb{C{ZB#wauppy#~6gNw?9{dknQSM8qzs zzFucJyQm6!Ul8~^|Ez-Azi?59b~A>DyBw`igp}7YM+beVyMbKN($YE3_!l&q_s;&w z$#zK&US3`{orVhk7i8R{x(6@)R%i{y%>zaZ#;t}ywQJqYIPK&lG@<qn;=T(8W+j$s z4Qcuo>qC4vN9-_3LiSn&W^F|ov)>P(GJO`jB~gac0PoCe)@+5mhTFokd$)?A=kF`v zvZqbh0M(ST_kPd@^VRbJ$h6V={O`!P!zmqZ)iD%--X9aN?Px>#i7@f!sETc5NQti5 zW=FFUG!D1E3cqD$WsPahap?t#XJBBcdTIRj7}FL7d>IH14v1`Im1F<oA{5FfJe^V? zk@ZGL9ns+4I#>KNV{9cl6q}lxA%F1XaAG!L3o^BWnUc{Aq}JE~J`{<uAE$6{`$B9o z>gg;ne0o1xE-K_hW1}9%dG+<x+xDI*XT}ac8N{dk^*)cf#<L^K$ovh|NYvd&+G7ud zn2yQ_K?OpxAfmr##4=HPm?;3>{;xlB`k?S(Y|WhT>Tk+YhiIH?W0=nmfuHEcmKDK5 z;w}TBi&t(+K*1Bt03xdm7-`eFOFvr!i~l;0zX{hFpTvh|*J_O&3C?MWndiPRH=zmW zq43Z)XT^rS@?qjm(<i*r&VKnBV#t!Bvx{NnN^MuyNzFUI$H%`0ax2YOXX3&>6=83` zHm}qIfT($)x(y1&`?qfogD<Gs+rI?MKl7iadV29^U=wI|U+3p$f`Ws0V!<H@@QI4n z|EJm2i~9yLv$H3(wi=x=H@AY)rl_Q(-d*+l_zKjF?}0t%8C~C9UrnwPXnIw)+ek;8 zUPclWn=ycpkbfs3nL5B{+zQiuuPYtF?dL$(HVBzqF1f>L_iu}zN6$=8c1!@%lXM?d zL7|L}R-YiR`tN{v5)l#M4Sa&DAzK{E(E~2*=lzEH^h~&*$L4L^ccJUV9=)@PS1&yQ z_1}<cv8}sG*B3t3J|PTsiV1rJasbPQ4RL78@g5Tc=UrSLa<TAf9mD+n&tu9QsEYD( zHSF_!KJ%VV{SCFYhS-zC4U*&;K8y(aCM6{)%FBbv3#kIUQNxb{9v%$$99C!)_r<sN z)^Y$?C}0RDl4RUr;8f$5*k<^Y_^;o7mE1iV>~q*4Jzo>~6hLMpnx$-)lZ~=&8_f`= zuh5s}cp?Vh0CP&PvB0Mwe|A_2s-be==``Kr&g3=8+$~s`*8_T1BR30}fsDw&C?eq! zzV~;mQIl|ii2ZeKC8rkJo^`*Z$G8Rn@ZSfRQ+3IP$$|y5Y6LXsdthTg>)f*TFkYo_ z6)*<0G-n94P|*$3D~oXTg+dr>P#OM6Ta6%9(BRzr)9Pvf%l`gZhOePV|2~wfjD*WT z4a@}Ml%e%&?@$i?Jn`M<@5-NE{L+1_2hLvv!P%sh<K=p)p5KIEnh06%zd-0d@u3Lu z)o{U;z28wnYXoA@;<Mm|#!M6`C>K3-$q_J#{z8)`+Lly{d(b%ddW>5Go^`*wu{H#z z4}OwrQN8`|wF>(lzRIKudup#;%K=ggfzXW;!TPM;-`#?R-YtasqL_B?p__zNX@Px} zKD0xDFiAkKG5qN7MK1HjG}Z(|NE(0l?nrHte&c^Y2qq->sKw;aj|QwJRDhid(B~ev zxcBlCm0Nml^j?2G(cj+P#Vchh$kDc%-jUp{0NbJhV`<kqOeXvrmH(J+4IUv<CQltx zR^I=&OemdvPq($r_{|fF&5d|Ml<d&}Mqi?2D+%UBJ$bU3Xl!_IH4uLi;5{@7{ZE5A zV^us5f<3#e0HqAYVla2)cE_i%c)Xe85mNd@r|hVBWGZ1cJ=z^2WO|kSj?;Vc5QGKG z>gZd_c+B-6QpQ)evQing&2hl$#IYjcIs`a^#4X`V1`2$f|G(y}#%r_VuqnFX*nrHR z+@g*8R+;ptFTA(oL{HvCOEoAAw6tXkYNt+x63s{buzVoI{<vEkMKHI$0;AAkR0$qB zyi$^@yJ}+@MyF+CdSXAji;K%XyB6X%X!nbNutPY1HG>Ln^7GH7pRGP29g77U9v!X0 zVng8Z!RX)<7uUVEf};wutt;Qs%@p-d7T_4uHU;+|S6IQNk|5v3>7<yHfBN(lOw`6| zb#zpeeBkWtY~QC5z=fxWuR+p*j=KGL$ms-Sti{nYrx)A9o9=1MNGAFrx^jI3;BxZE z$0Co}ip;R(bYGpmL8QmbOi^+#u-CLTH10Q_TX(bAc4k<nMqAjnNfe~A(Hrixkgf=N zNF*sp@aVH}p0XMSvwH;^{X=;O@hG{?5fAlBA&}5%>}A;_M_xc28J?JU=xDM1B^;gf z5ojF_9~tKF0hwTs-ujB%MeB9{3d-Hp_0;c2S&(XBJOr~9bzoVt`}fbhx(RHkw|M)` zo$^?2!j5egu&v2H0+Oyj%TH=9Ab_feFG>(}d3kxDWA}J3qz*!ANgI8)YCl1T+fRfi z2<A`9ELpip!_hI7ScOM9y=wN4|7RRS{^x_k1NfndepLfd7nz19XO@+-r74)7?se~a zdabRuH=ZOAFm%9-RRP2Hx(%ORd$=Cs_~_d0$3DYx3<?Q}gTz1p*TRI)g1H1<>3<o= zx4we6sW1b1Vs2Jp9Y(PLd;$W>=xk)D5i#tUhXhz+4PIcMLq|pYCesuh=1rTDhKB5a z{X)$%J7LqFAwnoI`|j!E2@797UO|R&{twe@%Qrf|ev5bU=Vx~LQ_;=ikfEF)SGc*k zDP)hHxx0p25vt%r;<L6Mw4LT}1y*Ua8*VEjw06zfwNBUqVWQ_`pmCB*GH`2Wxi^Ma z*?h-;7oD(i(V~xd>o6+P(;=hgO?5nW?|%O97!#_UI*eg~`S$HAH5&u6JqK0)->GT@ zxLAYIC-4O=Au!dUQyI5xx8Z|}q}6{EUPH0M{3e_ccFx0V*RG*lYM&gU3*y()g1aC7 zcaAtwUY?$*&i%S`)4j=x$Bx~vwbTTe*LbeTvbGsFsBm*6<00*Tpw|9x+_)?z;q%0w zrev4=4=EQmM&IvV6Jg=5KbH}S#b@^SH7*NK2tvDKx8Sdgd_igmbB0XHYml2s21Z52 zF@>t`>X>~%b$`1%%@EXzT8Y8jNNDr#Yin!2wq%|&g-G<@t_}Uh5(+}nILI9lm;QTm zzP&n=1|{NDKiZ2bI{xv0@8<=&C6=V0Lm>y!fKU~OT!!Un*Ov_kTwngZCnw(AS}!bz z$cg2*SI>zj3<Orz+^i2#{OjvVDXag$pFTa(>%PMK7(<wV4@4FwcpZFR<G4R8Jg}2; zh2x60u@i<6iIc$gK34OG9|m@sQ~HQ(U;vQu@PeodQx)%(HWO}3(D>RN%E5n$+8=C4 z5Fg8`RZoW7KlF|_+GR9Wnta9|-F_=ic|yKF=7`1er8VNBl)>MRpwm!0sgeg50-}IG zNH7NjAJ#xMI<wWQKbMDx&LbG%-j`B33P+6?W`T~wZ3={8e|RXUZDjQ1hVefumf;)W z-ory&b374&0yoy~xc?9OgmlpGcU&^H5{e171joO=0Y>Ov^H1!^2EF_D0Y=Cp!5|Nc zpM&$8-J6v2-%7{&Tan}f#9<hAQA&cBnK-3RB^S(R-`LUz3xd=A=Zzg2(Z|-4+y@kY zh}=2WGl^R9B!GbXf+viZy}7O-$0hl<I4;}o+2efrQ`wFdn81zBP#fWCZDxI_q;HVx zstQrxy(c!tKWS|+oMm+U7qIb(<qwmOK!a}vQUL<=RqKV#cJA4u2EOuzczGmz!Na3~ zezI2!s?$ML$7&jTjMnYFna1$@)N#k8CR^$B^sX?lSo@^wCK9_GcsW@>KU+TV*|TSD zot-gwA&b_+3-tPg{T<Hgrs}~u?m0ERM1hb^(bJNbLpT)Kx|f!$)B*wGRzReLG)#b7 zo6gLfoOm=)sFB=6UXC$$+|pH3qXCVFcgK#>q`7d90ipdRD0G60Zn2B0p=&=QW<0PA zsbZqgg5h#0z)NuMoSK><KL0<17TRWdKgR9)`1RQN0l<qS6a^G*ZM86Wuh>M@**X1& zF_!}AuiDAs(Z?QbRUm%v_&_tcq;Bl%)QuAn30hW3=|1pRKf@69uU&@L_yUu=DcMjO z9-J)Zy10F-FEmV>0>n-*t)em8;tcY#_4Uwt94Cj26$qu|`esHesC4}J$9;U0Ct&la z!?x&mX?wpq@((*@0LaCCnFirtVQ05~mSbgE6?G#pFtD!yY%>P?L@5xE5nfp`yvP?| z)PYejY0qM%07DuyXPiJHa&kQ8S)hlsZpIKzx2#npuK$gsq~5H~@M-`bLNxoo7Z`qc zz+VlMCVtD|tMb{7y)K|oME83Z8A;+Q@a*CcyXXNszEDFZ!FxPg{zQvMPj$?lHEY%o ztV9wM{ev~J_s9)I^ezH_i>)R~rY@n=m{4QzH5+fE==_1eVgPRV`1y}7->~;HLU$+W zAjuuPN^4|oose$*?HLeIdxQBj_bLUF=mEubt)0UbC!)>Dl_KH-Sii)=e*i2GJTH=v zQC7a)T<JK8K5?)GOY5^Rmw{TzvdTY$VurTKML8q0MugZKN%+CtO8*0NI+z0hY|5mO zo;`au>ES_Wvt}lw|FaqO@wZA0WFAJ!tqLT*ZyY;~b~CD)PAHwIPL$u`ZI#sgy;{qU zW0FSrUUY>ijs2HrvJA_YYhos7=DMD1cBqup&2>$Q9l+Rv>Zw-<?ED#dno-bBA_E|3 zHjOR?626&G8TWy#&SOk?3xBATm_x_emRvXY;j$ezHt2I&Fb*-@l_@}J`^^9zoD$B* zaQUBV$?$+NRq>xrBu_EHO25YG9USik7>6X+tG~@3O?~@uH89vT*o$ahM{rjON<~vn zW+QV%#TAQ-d>%=;)P=dUkUV8KKxr-c8n4my<tXQ0-nxfwrvl0#8IHe6oCL1AZS60{ zL%96Hly-~7L+OpS*i~5_LU;{;{)!Jo_S?Pl1kG-7@#(l36I0WD=%7-Jm=3K*!^{*& zS!jGe5-eW;u1CL_M|7hv)**Wm`0&qc4NftqK#1ZnDtfWBU*NVAHy)NggpyA+s32<h z=1Q8~@B@i$1GzHIWzc|#pd^WhVQTSA3XfrX5TjRE9dn`%Fp+GZ_74vYe?K7C#;z1j z+X});mX3bO5<Zy7P}{T*4B-8L{P*gI&<An{_K-iY-{l5A91w7|9N!!~^h0Sg{;T{a zmezWDsqg>vO=~m}SxCGIELkI7iyx&9t=i_5#CAZt69;}@20qq=#7h((_=|#3el*m> zc8m3F)Ta+nwg{9)R7<eC>f#_2zbfb>ny6wxhwj*z@Tw<=E{*yCSrXU6#AK`0HCkV~ z%~X*wKVFpVa)cWM6i7i_UBM9t-CZfrePSRH>slWMj<5s-I=;}#Ne<Wcs&Nw-JAwa` zldhz(0S#!OS?)!xHVPqYw6m3$hSHXyF2BugVY<gP*7r?)o1Fj4uwOrZsP0S?&jF}| z9HNYTn<jLkQSj^DQcfQYa4nL^Anp~RelJub5h%(l5@4l54FY@???!YK{6b*SX)`mD z^~*-mjJR*{Xx!;1xtKYKxBw57nVFd|ND`^!R=lr*AZC9shUACR($aChldn#1(dzR= zw{)zOoOl5;AF|u{BC#2uxf6viFbeiXJT6T))BY*k@}~%VZ~!CO-&{;F_fv$ybRh&# zD>>0*S|?@lZYl7;6`&RYdY|AOK@a1VkjS~qD{BCN(+hx<@o~3)y8D<Vi4M3Ay;;Jt zdi7H%e9R_6pLT$rSba_Sf_I+)kD?byw+@q^H@=ov0n7K0uu!NYUC$pHsNprnu)|o+ z*HXS)Js2m^H{Agb(qQW4P4~p7k3(}L;R(`JFwyxH2uCCG7Vu;&=Ia&BJ+?9k7i7B( zCINT?O%&~a%cXz?P{aK81Q<pNHV=ely8WRcGYgC6drgUzA|fL2x}H9LN_-&<aw04% zs6m@+{!PTu6w^|M>|<160rR)@g%}Ypz@F3Zx_kpfA#p&=k%k7S>tqZd1!a&{fEUIK zmJeJja!Fgiy}PPU4NZq&ZW5CwA~|XS5vR*CNeylg5lD`g$UGpc_d`RiGmuU_Nkc<( zMT3KCeYI7LF4Lr#fz&q|+WRy?Zf;pvHN5)Ssroq(-!SWw8qEJgY;(2RKGvEA&x`eX zoJ3``_4LHyT4Mol=}*OuxS_??=DIl$l0dRpvm=#hUNg@8r!rO0p%!MoFYLAYx=S-r zR})ZD9V~DKyXX_xRO_6(sn$_H#0HQ{zPge#37lggMno!6t9M@==SC6RP`TUe7L8Ky zO|cRq8hjNw2i`hzgBHfWdiAR9*Dv)#O|ck!t3+}q(5R6{j|E@1{84npvLQ8=i$od9 zD=KP$@`+W6U-9ztmXB+RP)~-Kv#3m~IsxFH9!Pg-X(%@m6Ah7YNg@K=-wgC?Ul=`> zO_{&{9sK!UZ!dJ{3Lhi70X&`>xEw(O`nod{gW9-T064Z21<Ttapz|6`1RuJz_*){~ zQp%nQDhmM$9^-u}R?vkb;<OUuSPfhcA#Dyjrxx?ECi3V?1yU6EfenCbS>?(y73xBa zNzNWUoy294@gg@F%5x3S7gEM4MFKF1u4%Q=8bN4wko^8<OCNi*Cw&|cTNhMW4M}>E z$@>y{lyC3ey>*P+?5**nB%}|l!}xLovU?o3xR&cMnACxAiBU{|gjS+1&z?PjkI0B< z-bynBe4ExdJ~Z?hHaQS_Xb}$}OW_5UZ%!1(o4vz7ZMow?COy*=BTd~?b;pkW=r)_C zp=NhfP~yG>=wSn*3et%92Ym9!A(iL=5h4;M$zvi!jF653j6y!roe!#(Fg>Be+_*-F z;gzdb(Hitfs+PpX5KaP#mJcKjAcl?=bP-a!QZs$R%fSFJp4CyQ3{wmhrdO9yzO!%f zRFL>rlAJ8*x`fr##Hu@ucByi6an;}c1``m$12Q=<^^}Q7h+_e%Fd=0U*oh#q=-?8^ z=}`-Xz=sbXCibIdYT=4Gf%y<89*wFF0!a<VL=o>=JY2Pj)_FH5wtDn2Rq!{mN%dx+ zt{@b!8+`8sZuBtjC2ndWmVwm-qQ{e+TPg8iNDSQE+$8?$+)qFa>L~T>m#w74Htx6n zjAc*5wgpvUR{Q%mcL8`TfOK{9J+sOm@LO3)J9gb&CuooWzSCevAFml74U}Vv0&#B- z(q@Q14K%W7HO1juYY8v;UaIli#bk`35}ju%Bcufthy-+Wzg<rMe!MqkoiN~tuZ2pR zR~W%U)6H{FBPtHAT%V-+AZ*yYNk)o>gx86CNc1_fH;{3S!BjM`X-MU1>mjRom~y1Z ztVC@S%o-0RC;dC3l3NtW4nXXc#0CD71**Yj-3$8<qVhBQn*|p)o~)nV1KT1d++Fd_ zn>Uc84%28O-|z5q-AlT9#I>C6#6oz4$eKv|V*8-&Y2)4xgjJo~^*|<$nW8$6P?yhf zdJbJ^9>=N^Ndv3?0uDktQEy4B`ts!q2A1lfhp3k7pX+uHHwUdtmjZMm$5Oyt%S%gR zUMkX2g*>aNfaIj~mt$xvni;nG1|~GrlU4SNY*RgojCwwp7!BD@@sJqQuLsdkt+`8m z--~GL)w_d+5r-8vfR)Sa-7SYn-WNML;fWGGRT#U9GHx1Mm%d5V{s~E^*NsEux0s#$ zP4b)!uQU*4SVQyjZkq1U%>eKH<H?p$oNKo$hrS@^Wr%WXV{E0sd-7G(fddE77#)gP zrAd_UiY(yqCYn_x`uSI82u(;k0i_q?jNd<SXeHWRH8=u^U?Srbk0TQ#InS$=a8_bF z^iu$XT9Vj6M2jpHaS)pmJ*k6VWN|A+UcF4a;b8Azd-~}B;)_-&KSEn`b90$7plRq9 zpT!2lQ|W9mq@gw^FZK1km(?QE4D0qQ^uH*?O7tP`2XN*6F4>rkxjP0eA^zlRvn#6m zAs&6DSrpn-z}llMFRy~Q6^NImL<tVTfKsa>>;4pMY4=TPDf1HmtQU}M(4bQYEJY>f z%wYr(zl@B=NV(<M1IkJd1wLlDC#9-Ppjz8|dUz2n$}#-_*AT;l@4F=p#msI?Lr33* zj2ZfJ2mub5$88PL&>4{{J8qZz`|3dcjIGVLSF(^;?!%kS_uSX6$mnE<(@#PMbkvhp zAv#<SppX@`Uu5u`StgLy)YH#)RE6_LPEEmoGl<hoivzRsuRHJdbO4G#eO_}fqWDPu z6S<dT0Ruc9-Mge0E#v_0=GKG2F)^SbyqM1DCM1&%g69MqZsW$cmT9~P4xGX;se>hW zJ~2<{AHHabJ(ivhZjWq;U|pRaG`F=GzQ*@suOzN?XX`Er#!WkmSTNB%APhq@&pnlK zdSdW<G7(ci`9PjXWEg#(dDSZWx4(`B?><Cqa!}y*pFg+a9ec=C-tXnF(SLPX!D+BX zAO14wy$FTw!*M1e<FqgU@7XLm*t+=C;I}-274YVXMnT3ruC^!RyEoM68J`X2V0Lit z>t?4$oa*#lEb#hRy@yCkrVKf4V|0SoY=CB14d2+B@etZm#6F6qb=@Iqk?lbgFmT66 z{6GZ!31YcppuYKv^CMXcdrJ_o8Y|goM?z9sxFg&OAY8C(o)M$Ylx9e7U9+=`OC3z2 zedm52*=9;hu`^auEhb4B74L(EOksvvPy8COc0-UUk{k><bm$OK^kF(BlY?y2IR)WZ zHHcf?>5Y90b|-P-2hB2g*Lte8&F?hj4;cd|Wn%DN%j@d2?;IGuPBezwWq>2-;ZB`l zE<MoWv|&<=qVut;71MVPCze}%Yyws~hILMxgWEMnn=tfs1VzEOZQEh&Lv18OnAfk5 zg%EKTaS5VFHm4BCMWWl7K}vzhc0P;nnLi0%R1*!Kn96^SR;1fh!;2^3qn+?PP}BVW zN_14yGhAvDY_qFO>!0C=7ARG62I4<?@<t&L4;275?Bl0sRpf*V(GAG?t(4(em%KA_ zTxB?<CM6}6{=HrBU~9*z@Gz_9?6h#`ekAP#BBh3VpN8mfj_E|-w@An-7--6YQ9SC= zFYpEF8C)dyn%x=hK@R*88=geLfc#>C=~??^S%m5yPtC_XG;miy<Ij@1v4r{*B`-zC zwsx&)#vFn!5Yn}Y;Dmca3>(}nLnQpQfFiid(wmBrUUT|r-^P?~_dN;SE*__Q;tUJ& z5u+)dwslj1)Q8~gPFUedlA@wI;L531R`oA`?2C9af@l)B?C|qXdf&zjs<Nzcccwv| z+dDh8eQRlafAItg`z|=Uz0t+oQmFa|wx4ln+=W0R(QU1*t)JF(?IeW>b>=LV1M%6= zaomO-;jS`J&93O{2DEo{L<9G5h}st>Ndlm|G&^Z;(QH3ud%8mcn;y!H`>$IfI3`!W z(C?AA{v&!MiMkRoq&eGZ4AFC5DXILsar7$8V&et$c4VlNkXWjPI)YE?RnL9C<YZ;V zI(An%mrf(Z&duHKHwO;baODZ^f4kwAD}QK*v^Fpim<mZ}le2X(5$p3F0B({jO1jt7 zH%TPNNs<rEc7v2>=0MJ%6v;P|bSHXwSn~JZ#kW3`m3`^@5=p$ZMv*%l-^R@LUV<Hq z?M1(MaSt~5G{IjufClLx;F~8U67_ybp?QRt7O_N|wHcETS}_>Yi~dhsrF)n?9Blw| z46*PxMFPc7^}~7ctP%DIjDj?J7QQ4g{>V8ga%_rV8;ukb*^globzQWdwl~G0J6E>v zW11Q5R<D2FbhnpZ!f|SS)dvI5-?Cn#Uydp&Dv}uZNX`K&!*?qO1<(?QWjl+ml07q! zW77}oD;v%}P8;-@(WtAdCr=BO;wYsB8vGx<vwix=*TkI=+t9P_VI&wo{P{U)qA#3_ z^hAhyf=~SHBhl?a2gyM<!!c><<}0El0-TarjZvdp-%jn^icOfRo47&T@dea9o8II( zqPO!QFHHgkNJ!k}Ld!n3Y}v99DX(dk8AlhFg-Syk`y+MPny?#U3FXF2A08X4C1)%| zT3ipqp7=C&Qn*#?H(}iPsTOvJwGECRLSI!uLJb{6<9cm#F8H!8Hu-1JF=Dep^J#SW zOxa7y?x9w!-*be(HKK%|(Syk%%}UG{^!YK!r;RPPV~C?*#SyWv^x&J+a!S+-Lbl)) zIvq0y2Z7~~4UR%atw}if+Od?wg;}fYU_Ln<cHT?&ImlU0Q`Tz?TeCXvJUc0$srH8i z!V*~+0h+{<ujO#p1n>4vW`G@8LP9|%r5Zv5!O&h1?+(x{ZN17|Mu|ePRv_9%UxT3Z zQho*ExRDca;8Vc8%0$>WeY%Z*fxo)9InLkTe^;i7k-Eh>ePNC(F^{`D*E!@pbex}) z2}ULkM4r@8&UkO0=Q;Rw%|NlRo%0~aM@X|eT*B$hr#OQ{4gwJc8!3e6a5+^f4~!gp z`j!64*`0wjpHajUY>0sqiw@=?kFOtsyS;f)4v|R{!^Jp;#;dSmN^=XqENX$-d3{E1 z@5eQINTY>I>LJsq0lk_;oQH;nw*DR$3snkW@}-7OhztjQk`|kG@Tm*=GK$koggKyo z%Fz69R+q$IN&XCyvP?pQ*eSaQlSF30@uhtL(e(86-6XpT-jwD&=e{ZJ!eJa5^}Uq! z)xQHWAUVZJrUEP)6-0hWU<L}(?hhJN(mo~YG+h=<9H5TIubN>}UfG1G5!87{a$M)o z1BO#;;peeeV6P)O+E1IXSfmH-N8Echlb|*$2NxG@93Ws}Wj#t=Ca+r8n!vb&ZgLP( ziJ(Fi#7~PlZ!!$;NY7_>WRVO5SW=qV#|`ATEV9rf9zv!Q&VO>}H!v(Y43)O<iI1SD zcVe=N{x}+uB@Vt}V`P)_N{G9h?W<2AyUWIjv5<?lmH*-yIp5rzmh*ddoCvQ03x-q7 z+r5zH(s^!Q%-lkuVT>H(7Nu9*>-}H6Z;;ul6cS+Kuj^1?@HY#vI^#x@knn`d^(IrL z^)jk#BVpF2_r1PP<UHN^eV1&&5jEBAyixl41=86wv+O$@l|ML1#Y~L+7QJ({bJ{*v zO6<eTsR%0?xE3^&ySw}6>gr#CC#{hYJ7r;45At=SfWut2(olA#!4<%XM+R>uir>8} z{PbxB2q+B&QE3w{`v|JJpMLw;hB3aiGVgt$*}VDkMYZ0_1;EJO(eWL&5JjPRldo|A zpN7I_zZe<04KC?N{@3;FQl4ypUo5tmpVX5z!oka#n4ZMMT*KLtr-_-FY&aZxc(8?Y zA&{JaK9%RLIjMGTutgHbMGl+g>B(3!j8SW3j>JcooV_~#+H+;|iR1ZfBDU<1wzan{ zn0bxOD|16=l|ld9ZB$4p-)I|&H84T<T0~F3?55)qdiq0pdK)3p%y<3z6f8uSm6gRN zWU{EHwpLYF*AHU6=kVa_XFGYRWm_)W1K#_?y*BQtiH%_27PCt#tqabT1Wrl)=<ny> zwI~lsffGqIu01t82pKcs2#+k%o2xiER~i}`cJ=m}V)fJ9m7~0$(cWDpbN|TkiL<;p z*k`TKmY(%!&S?my9NGE??Pxb7sjN*dU@1U*i}R;@_^3i~(F@Q4tudY$4yy5;W!k*u z@+twSRn*3f8}Z~Lw~O?Rjh|MW7qqmr+{wrH>;Cl@;ypB6mshEEhD&+*AsRe8xG*m` zxKq&>66+*54UQ-2{a7;uEpNB?tZU1c>weER9h6zR#c2Sc1#sIdFZ&dnus@$dz9YGw zTL%pE^<JG`26#dbRN>Dqwgx@n9bl>uVoga|@8&(YIprjMFRzm6(KvZ>{R$58Mds$_ zp5rISq_09HN`VaN*t`99%AFt(+BZ;cQ(S+Ww5TqM8zO&5zj;ekOS~Xs>|jG0BR<EV z1mZJAfuy`yn5|qe!kl#q!C^C6jFo-o-Gl`NSE7SmNKO{iS57)qJD11%!QqxCn?CLK z%Xi(0nG`Hyw+VK*A6B=hs%i~{*&|3qg>|s)J-cd?l;_fI+afKcAKbaK(%Qzx*v3Ym zu1x+)&%8lUdzPG(zdKeuD*WZ^*Y~oeT?S6|wd8F?jz|XODE3^f39Rzm#DG3KPWvd& z=S{ylcH;P~fo6YJwBHMg3orj7HI*4M)FE$gsfv#u-AV)W+}*_?t6v4}IjpXJ&xJim zbz^~cigcX@3k&X-+MT8XA5QR?qpGT_(ZsGp<pwIc;<78N89)<9B<#<hU(<jYb8c=L zf@%sK&X1^QYj*~#gjx<y#PL?{)0D;iyIivkC*wuEXGP%!kQq_&SY#z`<+|o2ub)5X zMlOoKb{tqd67Y(Y7K;x<T-o>dR|oLEa;^eJx8lh5`-+MqDk`hMh74lX-(UUD^}#87 zg4(AjU=18|-U2x4*qhtk)%D(=yZY-_IRbFK{U7bA6i@!=31I-j*z?B>-v9&BG;F@H z+@Ar}6<f+?x>G=)gX2;SU2eD){NSXNc5|E-zX9W5tFZ7|vfwC<FUf`l8{GP`Nm*R3 zSqm-w4XW}>^zn^t?d?AxZ(R5DD?;hI9*tj(ci6#kh?{5FL$kQE0U*A$x0ey;chPT_ zAX<6l@#Bs7$by@A^ziK+B4jg1g(Js653msM=+O-{i%77V`gEI(i&wDg@GTg4o!o!r zeOzy+uKtVq$8I>oL;1pZ`UN$tKFx|1M7mTgZ1$g}Ho-2<-FDYqrZ(IP%`elTa|I-* zEwxh6(~lfI>Y%78nYwl<_NvVLqd2Yi1eJmNeBaXU%QJfT_HTGJ$5d$K$pfBkzHy{T z;-XB~n%(5#8d?Shn=m&o&tQgLzjNnWQj%@Z4m(gVrk<XjJ$YWJ3T#r@TOEmfHFSTT zAqb;v$<d=nMa9KwDexj1e4T)ZPWc@)V4AgS*W%EZY}_1RN>_8v)vrzorhoqMNG#7t zQJIOBp_zrAj!r9rQnlWpIQjUdx_WJNbo6c!ky6kdSY9-&tPeNHpPV4?e3i`7smlOV zYv7zug2&#)jYJWdfZCj`OX3(j%kY$RISr<<;DBt6h;TdX<`OKMTj`6{dl%z3OR)%l z9zwoQ?WP?9Il2}QusSt0^)u=9`JRtlxbUdUs|GaEsiq7&PMkTug*?QT?b{cFycNMk zx*HNwF0p~!t)<$$63e)D?_LF;AXv&voH7Cf5p@}0hhP2@9|7>Gn>>>mjwC8<JUI0* zEtZG(B(;u}^%oX17@SUH2BEK2>(`&`{?5b8E2F9!V82isKm1J|Wtf!rgevzNwrV?P zprmu_z=}Axd`BtH>xVi=E-ZOA;>H?Qxy0o2lxOROE>GT?nk?!duHn`JWt+N{EO#n{ zFBbSjUqlBkr(R;H#X4gae1-6%xNtioLxk?2NcNXowvLe-o4dA$=pjcclNP8rPHDON zTrw-EUm{{-@Y09|fsAcUIGJ|X*?B)s6g?f?C|Te=w_)+(#qvr@hao1pmd5TE=WLhj zjBM)iyuUH*&iS>~Q@cy<K7Op%KIi{ECXn6;X!&VU(rwsZ3TftMW-^9`1yu!+v9ZgJ zA3q+uUthmj_k+$x=xEPIPw2RPp4ut$-{*U*^C<@9z_6en6iT1)^yvj4{Z_y?wSEM$ ze1n2mVl>DKY$-p)?LpBP!7H0Am7c!<yhr*jXqcR(C9m11(6YZv)dNy4m3z2mF-j5w zGA%xCpb&P2ii(Oa$ix#|V{T+EgOig}eM7^~*OeNP(J>>cwvJNew>}-s`hsiIl!#av zusgbI*7#3>Y?b2``=_KXd$N&!g`bWw{V!k0^FzZah0}ee0hse9H0>2PmZA`MJ}oMe zX<5xY3?k0^eW@W5Y&&^*11$sv1mxu9mtH$^Bxw2iJ&Qn7_Vg3;5-O=ES6n!5%oY(5 z?P!qDl@OtR-F@NSV|79KL*eyf;MhS636CC~<li295^BuQ*jO=o&2JR?HX)(au*)t! ze8`HR!%~=WEvu#Ru{(+7Gjb0fJotJ(B;+|(3nUkZR}|wU`S=(?6`qa!*u}-Q2(iMu z-#)zpIzn?R#$f?bZuJTXH|G#DdDJfo?y`Kxjvcee7K8@!$Q?hv9Ab^g=;&xv1Mc@1 zaNI9mJhPvBJJHh4VwXHGzQ0F3ZQ@d#TEFHBO%=Si%>nX-Y%`}*wnXpfN6%Z$&d!9$ z^Xy2(0&@e(B?Z|!Rc4auW%l--ofN{cgq46)I=Z^FRFXlSAA<S`r?LXIk6iKU@-V?f zgSXQodVI{QYitA^z_LHF>;r^uV>>&+5t;{&AKygB_DiC29lO`yB;J_@Mn=YSQ^RLg z++Y?)V3YDavh@NE)@xia03<^cOV#GY^z@k3?vg^b!?v~p=ognwk*OB-nog~7P+^i9 z^}ZVvbOj*goV<p}3yS#>@{2z@aa!Ef-Q5J)3N=}|g9oYExj8pk4SB5We!CVX2?>eV z{^^Q}3LKy*6gpYEeA5A)$tlDV4jCD3hL*iERZLvGsHTP))m#GKvUJ<CCK8~f+BuCd zI<DNlecK)<E0`gPbbU?SnCyC9SVZI+E*$?03k$1JT)-8|D=S~k%abg<?57%ZalDz2 z9Yfr8yzh~H0f7sZ;j&PQT#u;B7Y~LDT}7cB*3;7yjYj9j4T%-ZEmy`(cAaxXPNO3e zKoOrdBgUofAIH)){ESxRG}H3sS0dng+%fc-V_C6ch0NnekJd9WF}1e0)1w?Vt!5d9 z*x)$ObTT?GA%O`-4*MF-O0d|JbHC&sjbpc(K(%0B!()bHgjBh7;eOkurPzZ%g0Eg( zj3U+PcH@ort9yqrz59HdeC);apPsnLQoisejl?I3nMGRy<aaxEsXY4C(9lr-(wOer zX}>984H^o2`Z7)#b$Q$qFvwk6!iE@vA7t03u-%G;mM>q<wqXMi_}JH$AFX9@bW^3- zX@XBx*$sowLS)Y#8Bfmx*N*u0eu-QnV3@y4vy~Q~SFps$*7g%m&`-R-m$>{zix$bJ z*)^mtLrV-*D@%S>>W0L;sORKvw3QMu)iGBO54xQT2j?Mco++ugaVw4`6+tT_|EB;# zna=$oHNj%I5VF^Ll&<T{!WMq8K`tOL@Zz?kx1V5_5mHckdN?+-JRmgGAHY__t>X17 zwKQ6NTmS11va-Y+&Yd#`RF=^*Hr_(M?&#=XzgIR012F*|!WtkU0bw<3oWQWgT|RdB z@TD+mpKzB0S?B_O81O0hAP5Xv5&T^VJeD`8>UwMo*V6d(47=~IP`wYr!ahp)$v_o9 zi7{a>&CBZt+2JNEtQ)4h#Wvsb!*Gmv`1T<wBcwrh?c8}?s6<Cud4*MN91G49OIBA^ znc$p;M5p7hghK~Yj_1@X`voZ7rl6&#;Ttfwd&mH;A~U&mK-zmY7$0@;01Cxz)3}DC z^iQrM3!VZ>p8^oO2b;yr+`JSS`7(4B9SsfULD%yV9^<=Sr=%Fe*fc=_R5Uc`)(93t z=}$+Dp`)*_A~ZC#9z)|KX2GBi!=rsx>l(A2_)HUWCvj@nWX+AsNY`H1&vgy36QyTj z7rhIqes>AmhxhM|;4zpzc<>+rv)>vTKxlQ@2^Pi%;0cC_PCzkm`SM--{NDhd#kQ*- zgVs`y3uiO+ha++YN@hw`m0CJZmEofiL_T%5nIj_O8F1^?tuq@oY_NvcV2wjg6}7cN z4&qa>x)%&IH7LJRfL@KM7Hda_hK!J#Hp8wrgB9_ELrDN<5zm0tt!Wo2ohDV=hjHOA zR03_g2+H(_f$s&CIMe$sAt6CD=3Ycnl7N!Dypg|9I<$=+?C`EjA@aD&sb5D0LKiy& zV1~cOhowA9Pv1{oa-*dCUI%CAj!<Q9+{Rtp+*gE3>QP3IKvZzE?CZXM?O1<&3%Sjx zVEiqPnUJ&9c)!iLZuh!-dmWBNvuW~uj`u5}{4^94mLVqn13dso5L(W~$?EHKpr+sD zc}yIUE=eegS+_DRB_%eJYu`39WAri>Q=w8`8weEvC-~9PvAu$M-MzrTHO8~wedb3v z|3FJ{`l}dC;a`}~cYfj+9sJHoy~GU;4hh)`eue;3Da81c(`9QuEo;B`;KBA|3=;06 zJM-tKt-zPxHZ*KVPfz!J%ku=hQWU=k4GTLr&?M&S>WTwx!ov7av$a^K7yymgSx>4x zIl}J@aZm)Z_bsQ{EG^B6dcajm`)mRsxe^kx0iOVJH!!eBOuzqW*uf)l$Tf(bZ(BOZ zwyy*Y8{x5^9FZkJ66H<MYPL4LlQT5rMCUD1V~1u=r!rr{Q*vkuN@gLt8JRn$5KRf@ zc^n>o8w+W#yhB=(@2r!fBZdn7lUcf~pHtD#pI^Cnkrr?N72YGJ$}?OD8;<3@w624* zBP&*BgE(xku=xr?`5*)Qz*(0?D)}#+@L4WmG4~8*FN8mMa24-<*;<Epx=*yh0k>j{ z_=Bkq-&Wm*#O{yo?i(m@)WzXZy%?A`^3Aw2gwc)m)~&=H?&LUOi{4d?U<4hex;)n! zWQ*CURV*yraZVK9<kS?&@4ie+D+WFPf&-^iA^gQF;d_Nt(=IxrLlBUR55Kz&hxut? zrpJT;Ia4SSN=sG=pRM;3H$xPkMDf|y+<eEbG_LIhbK@IHfPwvh>niw7MGgKSxT|Yl z9cF3a|BQf%;52Ps+*v-+)gwST=(<Mc=GTA|-ybt3hk-@8??r?(EG!5N4^+|i6)-mW zS!mz8vux!`#Q2xsv~HP%U(<?B2d+a~^@pi8`GAw1-5(L2Ws=G!Y8d-C8K$&c!WCeH zoLS0lW`f90w1r&2V5IXoK<yS&I+^xcAZgnJ2BGjqhTD~P8RW8|xlST(dF5GAW8+3_ zd7ad=t2;06yCifKxAEq{RJ32Xqz5VIQH0qh58wlXHUI{1Ef>tvJcP!lnZDSF{>G85 zaY}JGp3EsZKOnHt1mcw>S%^P9;khC^JNrv~z>V=64S@Suc-cjp62Z<OG~Dg_8n&oh z#JvF62Fi2m&z~1@W4B34auA1E_r!@^8nI6_Ss+6nQB?GM8U7UbV19Pw1gxFK6k$Y> zXfe-bFFwA1|1wTUuf+iYodR!3XuDU?mx>AtDHLui$B8W~Ii-%MsjUIze3F=`|D0dD z7;O6)Gp2<x5X^{)ss9#!{^f8?TJIkd8nm^bzaMc_PHr(;`W0;MBS((dD{haN5K*wJ zidqJ<{R+%5npJ~KbQBdC`uh6=LPGqWJ==;;<a|by1Dth)=q39)2Zw(<j^50{gxU&? z_Wr{(LBo81BpZJ~%M`V5rI13ohUs7U{{2$uxNRKCiZwMgLw(<be=NL!TaoEClU~ys zgFhgG)p~VX++ntTmdpu>iK{nlx{bL@=JU|RL@5Z9JwAV@Wp#TXupLD7#Xz5z0fL|( zAcnFdGb`&5(2?08AI(Fil;)zD!NtucXHK8Kj5Yp{ibC;8jZr^|>C1=#+lBny(CbT= z=zun1nfVOe);=>R9rJFM-J=fb+h-tTFm-vGg*NO-_+Pin+OU^4!kA*1z}&#i;BBfb zFJDt4e4`9!tOy!9Yh|@fTzmtjB<0)GbPD08iw@Z#+$Ec$(GW}_0%zL)pal%RJPz-^ z6|M#QJ)V-cZ|M*<I*5TtiaPZKtZd7Q=(wk}Ds4DvU}R~@U}Iy0XMS5z!GsUHY2{F| z<?ONa8R=LKbU?TVfb9YI(WRIJfR=JtDjX7D4MF_XFNJq)VEH--4Axv@Rmra#hu<Og zJn;Jr{S?~D5Yps#=GO~B)Dr{ByADAD;h@DbIpA_|dBnDFJ2<sJ>eHuBd-|aZ7xmc) ziTTVQKveIv*=`pEs-Hi+@YIQt-37Xhv;C8G1@rV)RsiIyR<FK*&kcoo!&;=PUTzI0 zNR=1Rd*EP^*9<Q|zbv2va+$?|j7S{b4GX)4?8Y)1Fs-5SaXHLfU<7#Si@)?8;tP;J z&EOz3u9KULQCjuk>xu9KtH08zvzW{-WMkGmw>q=mqG}Q9t?jP7&|rZ#uZQG~0F2u` zd-j}8`t<JIyQRVxi@5{^PxQQwDT3#EWW*qGDK8~UjMAkH3~Sc?SmY~YTJi*D^w96$ zt7U26+0s49eYh2g+_<B1hY!;M7y-8K0G5Z@dLc0}QH3gjy8ZFvavZzey?=iRmXS!P zlarG}%XMY_g)+0V8SF0Nb76wv9Pj>>`?D&vo^)kMB5xrx(A@XgUEnsdxTU4V-rl}7 z^fuY#*thpj4#RZgE1DD&0wBe>kohvv@o{7D<HwAzj}_NTFj5F)O~OS#hcFBpJ@S`Z zFlJ$RsYM($F*Y^5gmYHA^wOEA%MK&~wX@D9kr(fD*S2kRT3T9r`th%4Cuhg7EL3^j zBfSFeye`(}T&La#L)&R7LLq%==w6=ru#WP*aDfJJLROvfMkYjU0`mT@5%YJGR|~-S zm_UAE93BP0h2#%4mM+!$+ZtpBe{dw1bFEnh#qlXLMy5#Dk&at0ouk?6J}>fxw`>Fl zok(kV2tJ?X+*pq$CZ?aI_C55odoGuCe?Y6l)=jmlTj4L{IM~7greuHa9MJ&XO7At| zCL3E>-9+N@2?+1P{6qm!UQe5w-!v-<-+Ff4%j=JVgA0p`=^>=ww=_gWqqwi(^rCWE zk03uk{{hz_3ao%C*RH^azp=J3b4q(NLBwq9=ot1_pb#lGLNfmP?b~uttdWbpg}|D^ zaRyaq$ml^yaQD4ZK2|OE1@<%sFT#gyEuzj^S{C60(s~%z{ov!_S%!~zK>NA`5|HiK zT_h1$MCJ(u+@@SNbyw+0beJpL9vaY5w>skpBmyfmD8Wl<Y5Q<T5C^%44%3Ij{Z)su z%XuLp;250D*|R$_?$11sUG*X4D*i#va0yzw%BfRK_-v3+(Q~xU&dyZ1;IfsQ4zNLy z2)(yPb0~I9nD4q1$47vsccrDs?J!}vSlg%l2WNg<;~WA8d4iKVxsi9>Z!;VW$Z;Fl zj^t{<-MgC|i!1TLU5<@)$3jI{qXqv_Z(zc{XS)A?dOP!YtkbrQ-_nA}E-H)(NeeeE zme8O^H=`j+%`<Ot_ozH%ZD_Qp6e7%2V%o?SYC>b$Of?m%Y0-psL#C7#(Z0XmOY^+% ze4gi>=g;?#J}pYW>-t^Sc^>C+9Orp&1Ev?x?aqNKW*|&UdwUWX<zqWLeMtCYmuo?= zfiMXBm%VjpFL?TehsViJGg7-M$7v=#zx1n4K9$eXCvi32V}c0h?=G}${LI6*kDo-V zFt@BMmB{|{)vMpdZU|ZY?u|X2qN&hhckbNz2x_@>SQ9dno}M0*$*t^fs}FtASHgG; z0shF~^Dmd@?Tnv)1S@Rljf4ITo4yCwbzitrxtxmJu&88ar&ZrklRIaBX*qoO&y)g> zKmwQr{9?XPKYU1c!RpxU%Y%lv`sj{Pc28_Jem>2B^uE_9+Xjx=0GxTwadH*QYlZoU zd)lEJ6BCn5Tv_ynvePos!1I1oRMgz7Mib%^kgkmo<e_|SCnhE3=jJBFriCnCez%y6 zdi~&{{;?0Qf0L8#-qDP^W$sn_P#+<9Cs&p)eg8JMprAMB=23UI?_H~V4T=`7==85! zDo|-cFPAH%Nqv?o-M@Z7n?H3USQOrqYWfabczyp0l!u`vCM#lfR+qS{UrZwQw9vbY zyGpIoqCg9yR1$$|{`9;T8z8no)GY*_>ih<k4h5Y&!rjRquoljt=&_5u*_$RY(}Out z*seWfvXzGqI|BhNs$RKBX1(3spiJpsP+a^t-DeaB&}NUZzZAn6Sbw%opWF$akCK<Z zEK>oF>^Ru{WH?!ZxgE}(`c-Q<TAov<Cc$lDdXthG#ZH);n^((Eojr9bCAPP3hM4-+ z;Vf6h93+@ycb5TADU7OzHhhN-ZEf&BJYXX&J71anC=J$iwDaE?_g0L)-mvM8TrMws zIAhCs{Fn7WTJMq&V{R2$eFO=&7Z%iskXIUm2CXHTb)^?tBT_YMdN_5z8#o}vy%*D} zI0ZT^7mA?$t@`c^sjY2Ujp^8M1Qp-sBb+nCukTmDOX6Mf`?QLaPM;rs*N!oywWCAD z)6-LOt0pue!WskDsS!JlS$-8!np;#PCSbumEQwlEZbP6NXsVTR=G;im?T*bCMye0u zwc^uw^!TwGDS&~}Yh_l-sTb%(j0DS=nwbT{M$zqKPtMIl$2$GiETuc1%fPp>xHPvc zTlHC9&Gz5Sel!w;AZ0i9SXc|{M04om2w9;WRk+u7XG3B63;FG*`0>zKX_yJSlqX9h z^qsSyZQ((8!=~%7MF9G~U<p+wp9E*!J#=D>uFJS_dW0c2fKAtnF1e7O{@^2#9Gnj< z9BktMQ~>B%KPXYOXa~Q?eWHqrK8lg+1BgNAl*?}4P~ZJ0R?W$aodv5Eo9J!91)%{b z%(^mM5S@681+`E-cP%rYCsBSA_<-3eMB(OdlDFK^Dp&d}$ENVK-19^bkRf8xqSZ%^ zxZp<``9lUaX|mst^_u?QZr!TJdzi5Fl@5&e%8Cl9zI=adrxD(%bV8{6<jJRZY41$? z4>XwB!CvLLxuT0W)6yE-hIwh&tLDGYnUg?4d0N%#GG4Rtu!4ZU9r{WQ1|P{~;OHEg zsTQrYigf+<qtLIj&kMqc&HR!<*AK<2Yo+wAt*u38+_p1m`ckc-?#g2p{F>;r<VB{m zHak6ifTv6rlqP+~{uY&#oU$F{V}eW;#AU!nFwdqAsFGb$GJG>|I%#fWeqmt^v*7-I zwXLl!Co4-2&I{0|&jeVxR*HK^QE_n{<Iv7IZgT~j*MI;6GVBkUK9xu`!o+6;7&!qL z21iFn%c%$$NHOa^6l-;x{}qxz4$p!W)A$<4BsC=^5cM^b>V6ko#x7v^eMG-%`ROo% z4J~TUa{A@eKnYF|n})A%Zw?PYZMlrL0~s||MDO}i{}FnIhG+HHkCu=*>X;jHUM!9{ zai5hLPgur??cn+I17cT%B>yEhHtge%SJDF~Hb0#aHI>YQ$XdVtI1IvN(p?UuV`^I3 z?}pP%XmSuV&6z7#a?qM8u(DS|D>YaA-VMWT3$_Q}m`8&W)5Mpla-%(_JCT*-n8s}$ za43<$77o#BR2!t|v0){G{Wh27=?sF)hks07w{G>pgAU-%Ip8Qn<F0fMzl5S9bBeWk zC{bAkHrw9hkjdml!(nCHfM<am2ok<_)^6&TahzrEZ4*-HPmA^r7z^)eZEby&-a$(z zXL+q^L1AGlD&(RqX_8y_Zf>=Y?u$oY_mdYW{;e%72~;PSA}c#CG&G#xH8BBnUdc1Y zqa33skLm2^KS?f0@fES10#j_d04o45IAencn>|BBa~GGlO<hlPjS4N$u(<RvPcqcr z-T<?pyQ^z2eR;NZU@rh$Jwte!yGrj9rU*!{Te|SVYYO%Cxck_l6DGXLuL}ze?XZ~g z6kvon^nO(xyS9$e)ec^Mw$QAERm*7-Uh&$6X+;xI0A+a~C8leu2)7JMrvMzVO0@v^ zxKD>#o@j11f{KIj8<?AyXR>J5&YemW3+4OvwQum2!ahgD#wH<uz)mf$E6L(kWjtJq zrEwaLw8p?4-vRnYTzvM>-MM1q#fug4Pq~#3R!#C@7DdDFrWM(av7^OgH<4U;{h1!7 z;z>j(U}qnv&xp61z0&;Y-@s9}<HiM(6~489MmTGt2~E$)=$wgx&m?#Oui3M=GnQfn z9mBM?r1$)w!NNvSQL%~bzOe3>ptK6!rAk|H4cb5&wfKWob?=${5gZaSD=8(UmRMf6 z5aaJW-sC*RzN&NHzdXO3ds2yix*VFg93Yw_r#<*geSPMrnU#rdl^u@I(tCgWF_^qP zPyA?VYPye6ty<m)YFC~C!1_MsQP^r0VmoYDv4fYp-wpmO8)krjsU4hMT2fL0qNrqS z#iR6M4Y0GGhQ?7q3r@yL()|nv+h@ouf-W^)dPHM0ZEj7>u4tB)?gFB~OGruW!QtWO z@#AMhzYMmIGMzSK#yluiU+r&_qw=SGlsLl8&25WQydL3gqtR3kkE5ugh^@2gP9>LO zr4(hiL?W&Q_{V;j?-sHbFkg|J#A*LD`1)^CUEZD;p0fndW6eZ}Ar3K0Vtm)5!?<BG zc<`gAPAM~lFah?iy<lt`^70+x6Y21{qiiE8wj8W|dG9=R#!V<lK$$f#)&23U0llNV z1DssmJW^ct^436~hXYKl#HJDNP~Owev|Ll6qDmyH;y>uHGcdUsICt)opNXKoMHlJu zF^#09!?ONq*rv0hZEC8IgxkLEY_1ic8>*DxV`nzB^B@09f^?<(-kYzb)XMCOuaZ+! zU(Zp{A3Qh}aNH1S>3YJ<-aaQbanM+n2(Didn>OeouE4<GZpY3H2$(}t#TMU2RsVa^ zvZ+ghN-t!1yZ`d@&kD={FNxV&2!n_!(MA{k&CE27$e>&DLuljD*~?y<kr?9Yc4qxW zd7lD5)0IAB6gT_?QGIdUM=H6!G9#++(oh!V6QKlS!eNeh^CA)=ur<}{@I;hjK}$QD zo5-w?5Rvuko0`_ZIdiVdnVH#KQ9264{ts>wsmz%GV1Ov2vX7&~{NP-~8gA>rTb1KF zeG9G7&u1k)ZSA(2xG6z--m6!yn1X!*swzVa1`iwU4CZ?S;b#10+y;$az5Dh(_RBB* z-@m$aWKLL^TCF}?Y|UJ{>bS06e6ULO&?Zl`V8K%&=Sw8$J`vH;tIEnoq^(<5L2D&* z)b#iZOI9_xbw8Q4EHq3e<$Ss?fS?gZ1qx;`&wPM;r$=J)O6&uS++B--+oP&qzkbJY zcs9s;f9B;SR^;>WB%dWSnCi*(vO^frQmKFc<6L)%M4UtbE7^B(?XFEwR0Vo%@ANB1 zHjJ{uK_$z_*DQ`_r+#9xH>4S<X71J9yLP#|EPL1kTrEa2DWgMWGSiA1ylt@*y3Lw3 zC}V!qv?`;$P!B3D2rBwm<dawzN`=CPa8G^sP5$rhwE_2f+!Wv3yMDlyj~K}yUdpOE zukBOE2h+Mi=G<wuv0EA(99)%Ma2uPK2<?=!IA|uv#EfCFmP5{;Jv)gEZD+pA0bsVV zLmD4D^)AP#5BV?KE^<fx?K^jr2pyr#)dqqq%}B1I0CZK-Jb)872y=@PG;CE>Rr#g` z#7myxyyJgutUly-`q1jy*UuFqb@V5`Y`!QSfBSZ0VW<K+1=Z(%a;yt6cFDV!?L)Mu zZ?$}${pC+z>r$p~SisX;K`oy%@wt&JMUCIl3x+p^8GNRurU}wDpbh!M>jzM~+6{Nh zgu`XKJSeCI#+!sZMOXg=j7(a;UVBe=)W6k+`p{CxA_+8%R6NR<8oBHx1#lpuny*u^ zq&)ulLa|O1nw#2QpAd8U2>V=*J_uA92%~X=)NgNA_9aNhRmsPa-z6cfkYNk1GMY#Y z{7>P&ej2gi>!&IX4i4C?Hieu&cWw<J3I-3Y-3#Cx`TAK{PHP`#;J_vhgm<$?gQrJD z%KQVdC*|ek63Lt2-oG^lNkmZh=d|aQ<}Si|<X*r5qoRCw;o7|IDd7cENzR4Wu2Br7 zMAe3Z$N#?8MM70@;0`|nCU!pg_AWWd-TsSf1P}g0oRscdXWC4y8D$11TN>Kc?{jr* zQpru+dgr*2Ha1!mLlU`HO#k7`KOamKhm>~7EiCLK!P9UI;wb_KTiBDl1CBIaGP3n| z6E>@znS2Jp$bfPQZrh_6KpD_v6GMK)h9*EUS*V93d3Ik}@~_ax11GwMj*jWr+cCIR zcHCD`Rabw1Cvx!M%?>i(eLY89H*?#z0hDCD`}LD!Qx7;0W#;AO6<6mQKyiQpY`FiN z{rmUlLJ3yqYwG3~nL7@8cq6AhC}+`Vt1h(x*mph_^*Fw!74YS`xJH%K)HcovaSz2U z-}3O`0A3yX(&KbD%e)Y$V8Ed7a}c-&ydGb<aJD|HpD>FidoVuo`1$kdh+VbJ85Nt@ z{7pzAH9mFN1<2%P9}lw*&y1OIadxCg_wo0tOF!IE*z{F|OwRxGyCW~EfYiP}?$<b; zz9OVAQMy2`$T&8#j(4w}f}v=(eA~E7u-I5KV8tlWt5%Ly(B=_(>YZ!RDO2c-uV9y{ zUUqQ$Y}(!lRO5Y0>GQ+8&c4QiHvqqd&$H~Gj;NXR6wCotAselgr|9~K_pMRrdu339 zE)W#;*e_{;hkr^{ofrS&@|s6qQiTfI4Xv(r_{;}AmQy7Y|7R6Od;dxJ7iGupGwFT0 zzbjR2pqlK1gHWT;ze}@l?WTh@GXJm7{70HrBxA3c@rXY7ZDhm#@3f+n@E2)p_;)we zC3xBkzP4?HTDpCE4FY)}t+Ile5M{PB@WWFl`zoudiUgxhNQWsirXXDf8p2VOf-caV zJ|i67y%ovPddwIdp&^u4RD6Qtmot%$YSAn$yN~wkYn#PgNL5hmE4)xbof8HQzTbBF zv8$8zCgAY3*vP^9$ihO2&RET>%t&0tz8<Zs>qn*<a<OsthfJkAI%oQmY=)m4yL?$6 z#zG8YZEb5iK7PI~Lfu3pp#8>rdVS;L<6D<u)5^@ssvSCOYi^IVR%Z4mQugG@lP#Ia zlG01^qCI;u*e1!u$Ve%NFmS<w;r|TlaW!R&{s>jzt?z%x<imB01<`1tso=M#?hG)S z&|JqriwcDHG4#EJ46gZ~z}z(YU~exa6ZaAZQ0U+rn#h?(UBRjmaqF~hB51xeX-cEt zlWYTheI=)0a;a{5>h8Kw6bpUFuSVhpS2Z*=<i>&r-RurC)HF?Bx>9V|FGq1%`8bRZ zZ+1suLUzNA!~RM!%TkCwPM;pmbX-O9In~B-<RDB$Y514(=A#J><aHzgU821$w{wj@ z!KN7OSi|fH=syqL)uo9dJ((mf`IVJPah@Pg@o@hZ_<fP;MGRNb?XS8#4KWB)*F6i( z60fBXq|-#DD!%qFyc#7`L!hOrpLUNsY{PU)B)yRV39u-0el3_1@MEX}^O27_>Y42L z^p6!n1-@u^d9Tc;D^)LjttVW-@IboD6B{&oY~dE`SVAk!sFS(Nf}3Vn=|$GImP%U< z*J*5Yz4zfk{=@{K6{Z>3uEE~ur?*a6w0G!;oUnWMI<CN}GuBHl0d4MC>zb6N2p@8y z#cB}Ek|+axSC6wF&JiBtHRdaX&n>aRwr8dPPrrDtUcdNER?O=0MYP^arcCm7|K4@Z G%Kru+)hsyx diff --git a/predihood/main.py b/predihood/main.py index a3556fe8..54771dc0 100644 --- a/predihood/main.py +++ b/predihood/main.py @@ -28,8 +28,10 @@ url = "http://127.0.0.1:8081/" def index(page): """ Render the main page of the interface, i.e. `index.html`. + Args: page: The name of the specific page to display. + Returns: The page to display. """ @@ -42,6 +44,7 @@ def index(page): def get_algorithms_page(): """ Render the page of the algorithmic interface, i.e. `algorithms.html`. + Returns: The page to display. """ @@ -52,6 +55,7 @@ def get_algorithms_page(): def get_details_iris(): """ Get all information about the given IRIS. + Returns: A page that contains all information about the given IRIS (descriptive, grouped and raw indicators). """ @@ -67,6 +71,7 @@ def get_details_iris(): def get_classifiers(): """ Get list of available classifiers (stored in AVAILABLE_CLASSIFIERS). + Returns: A list containing the names of the available classifiers. """ @@ -75,6 +80,12 @@ def get_classifiers(): @app.route('/getParameters', methods=["GET"]) def get_parameters(): + """ + Get parameters of the given classifier by give its name. + + Returns: + A dictionary containing for each parameter its name, its types, its default value, and its definition. + """ if 'name' in request.args: name = request.args['name'] parameters = signature(name) @@ -88,6 +99,7 @@ def get_parameters(): def run_algorithm(): """ Run classifier on data with the specified parameters. + Returns: The computed accuracies for each EV and each list. The top-k are also returned. """ @@ -115,6 +127,7 @@ def run_algorithm(): def predict_iris(): """ Predict the environment (i.e. 6 EV) of the given IRIS. + Returns: Predictions for each EV """ @@ -130,6 +143,12 @@ def predict_iris(): @app.route('/getIrisPolygon', methods=["GET"]) def get_iris_for_polygon(): + """ + Get the list of IRIS in the given polygon. The polygon is defined by two points given in the AJAX request. + + Returns: + A list of the IRIS that are in the given polygon. + """ lat1 = float(request.args['lat1']) lng1 = float(request.args['lng1']) lat2 = float(request.args['lat2']) @@ -144,6 +163,12 @@ def get_iris_for_polygon(): @app.route('/countIrisPolygon', methods=["GET"]) def count_iris_for_polygon(): + """ + Count the number of IRIS in the given polygon. The polygon is defined by two points given in the AJAX request. + + Returns: + The number of IRIS that are in the given polygon. + """ lat1 = float(request.args['lat1']) lng1 = float(request.args['lng1']) lat2 = float(request.args['lat2']) @@ -154,6 +179,12 @@ def count_iris_for_polygon(): @app.route('/searchCode', methods=["GET"]) def get_iris_from_code(): + """ + Get an IRIS object (represented by a dictionary) given its code. The code is a string of 9 digits. + + Returns: + An object that represents the IRIS corresponding to the given code. + """ code_iris = request.args['codeIris'] iris = model.get_iris_from_code(code_iris) if iris is None: @@ -165,6 +196,12 @@ def get_iris_from_code(): @app.route('/searchName', methods=["GET"]) def get_iris_from_name(): + """ + Get IRIS given its name. The search is done on IRIS's name and IRIS' city. + + Returns: + A list of IRIS corresponding to the given name. + """ query = request.args['querySearch'] iris = model.get_iris_from_name(query) if iris is None or len(iris) == 0: @@ -174,19 +211,14 @@ def get_iris_from_name(): return json.dumps({'status': 'OK', 'geojson': iris}) -@app.route('/getEnvironmentValues', methods=["GET"]) -def get_environment_values(): - variables_with_values = {} - for env in ENVIRONMENT_VALUES: - temp = [] - for key in ENVIRONMENT_VALUES[env]: - temp.append(ENVIRONMENT_VALUES[env][key]) # get english values - variables_with_values[env] = temp - return json.dumps(variables_with_values) # {"result": variables_with_values} - - @app.route('/add_iris_to_csv', methods=["GET"]) def add_iris_to_csv(): + """ + Adds an assessed IRIS to a CSV file. Not available in production mode. + + Returns: + A message that explain the status of the request, i.e. OK if the IRIS has been added, KO else. + """ assessed_values = [] for env in ENVIRONMENT_VALUES: assessed_values.append(request.args[env]) @@ -197,6 +229,12 @@ def add_iris_to_csv(): @app.route('/favicon.ico') @app.route('/<page>/favicon.ico') def favicon(): + """ + Display the favicon. + + Returns: + The favicon. + """ return send_from_directory(os.path.join(app.root_path, 'static'), 'favicon.png', mimetype='image/favicon.png') diff --git a/predihood/model.py b/predihood/model.py index 748fe7bc..24917137 100644 --- a/predihood/model.py +++ b/predihood/model.py @@ -19,13 +19,15 @@ json_iris_indicator_code_to_label = 'static/data/dictionnaire-indicateurs.json' def get_iris_for_polygon(lat1, lng1, lat2, lng2): """ Get IRIS that are in a box represented by given coordinates. + Args: - lat1: A float for latitude of the first point of the box. - lng1: A float for longitude of the first point of the box. - lat2: A float for latitude of the second point of the box. - lng2: A float for longitude of the second point of the box. + lat1: a float for latitude of the first point of the box + lng1: a float for longitude of the first point of the box + lat2: a float for latitude of the second point of the box + lng2: a float for longitude of the second point of the box + Returns: - A list of IRIS that are in the box defined by given latitudes and longitudes. + a list of IRIS that are in the box defined by given latitudes and longitudes """ # polygon = db.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) polygon = Mongiris.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) @@ -37,13 +39,15 @@ def get_iris_for_polygon(lat1, lng1, lat2, lng2): def count_iris_for_polygon(lat1, lng1, lat2, lng2): """ Count IRIS that are in a box represented by given coordinates. + Args: - lat1: A float for latitude of the first point of the box. - lng1: A float for longitude of the first point of the box. - lat2: A float for latitude of the second point of the box. - lng2: A float for longitude of the second point of the box. + lat1: a float for latitude of the first point of the box + lng1: a float for longitude of the first point of the box + lat2: a float for latitude of the second point of the box + lng2: a float for longitude of the second point of the box + Returns: - An integer representing the number of IRIS that are in the box defined by given latitudes and longitudes. + an integer representing the number of IRIS that are in the box defined by given latitudes and longitudes """ polygon = db.convert_geojson_box_to_polygon(lng1, lat1, lng2, lat2) iris = db.geo_within(iris_collection, polygon) @@ -54,10 +58,12 @@ def count_iris_for_polygon(lat1, lng1, lat2, lng2): def get_iris_from_code(code_iris): """ Get an IRIS given its code (9 digits). + Args: - code_iris: A string corresponding to the code of the IRIS. The code should be a 9 digits string. + code_iris: a string corresponding to the code of the IRIS (the code should be a 9 digits string) + Returns: - An object that represents the IRIS corresponding to the given code. + an object that represents the IRIS corresponding to the given code """ iris = db.get_iris_from_code(code_iris) return iris @@ -66,10 +72,12 @@ def get_iris_from_code(code_iris): def get_iris_from_name(name): """ Get an IRIS given its name. + Args: - name: A string corresponding to the name of the IRIS. + name: a string corresponding to the name of the IRIS + Returns: - An object that represents the IRIS corresponding to the given name. + an object that represents the IRIS corresponding to the given name """ # the query string (name) is searched in both the name of the iris and the name of the city regx = re.compile(name, re.IGNORECASE) @@ -81,10 +89,12 @@ def get_iris_from_name(name): def parse_json_to_dict(json_file_path): """ Convert a JSON file to a dictionary object. + Args: - json_file_path: A string containing the path of the file to load. + json_file_path: a string containing the path of the file to load + Returns: - A dictionary containing the data in the file located in the given path. + a dictionary containing the data in the file located in the given path """ assert(os.path.isfile(json_file_path)) with open(json_file_path) as data_file: @@ -96,10 +106,12 @@ def parse_json_to_dict(json_file_path): def get_coords_from_code(code): """ Get geometry of the IRIS corresponding to the given code. + Args: - code: A string corresponding to the code of the IRIS. The code should be a 9 digits string. + code: a string corresponding to the code of the IRIS. The code should be a 9 digits string + Returns: - A list representing coordinates of the IRIS. + a list representing coordinates of the IRIS """ iris = db.get_iris_from_code(code) if iris: @@ -111,8 +123,9 @@ def get_coords_from_code(code): def get_indicators_list(): """ Get all INSEE indicators that are stored in the database. This corresponds to the collection 'collindic'. + Returns: - A list containing all names of indicators, e.g. ['POP0002', 'POP0204', ..., 'P14_RP_MAISON']. + a list containing all names of indicators, e.g. ['POP0002', 'POP0204', ..., 'P14_RP_MAISON'] """ list_indicators = db.find_all(db.collection_indic) return [indicator["short_label"] for indicator in list_indicators] @@ -121,8 +134,9 @@ def get_indicators_list(): def get_indicators_dict(): """ Get all INSEE indicators that are stored in the database. This corresponds to the collection 'collindic'. + Returns: - A dictionary containing all names of indicators, e.g. {'POP0002': 'Population aged from 0 to 2 y.o.', ..., 'P14_RP_MAISON': 'Number of principal residences'}. + a dictionary containing all names of indicators, e.g. {'POP0002': 'Population aged from 0 to 2 y.o.', ..., 'P14_RP_MAISON': 'Number of principal residences'} """ list_indicators = db.find_all(db.collection_indic) return {indicator["short_label"]: indicator["full_label"] for indicator in list_indicators} diff --git a/predihood/predict.py b/predihood/predict.py index 7f85ce08..d67729b2 100644 --- a/predihood/predict.py +++ b/predihood/predict.py @@ -4,7 +4,7 @@ from collections import OrderedDict from predihood.classes.Data import Data from predihood.classes.Dataset import Dataset from predihood.classes.MethodPrediction import MethodPrediction -from predihood.config import ENVIRONMENT_VARIABLES, TOPS_K, FOLDER_SELECTED_INDICATORS, RANDOM_STATE, FILE_LIST_DISTRIBUTION +from predihood.config import ENVIRONMENT_VARIABLES, TRAIN_SIZE, TEST_SIZE from predihood.selection import retrieve_lists from predihood.utility_functions import check_dataset_size, get_most_frequent from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier @@ -33,15 +33,17 @@ CLASSIFIERS = [ def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=False, remove_rural=False): """ Compute accuracies for each EV and each list with the given classifier. + Args: - data: a Data object that contains assessed IRIS and their attributes. - clf: an object which is a classifier (with `fit` and `predict` methods). - train_size: the size of the training sample. - test_size: the size of the test sample. + data: a Data object that contains assessed IRIS and their attributes + clf: an object which is a classifier (with `fit` and `predict` methods) + train_size: an integer corresponding to the size of the training sample + test_size: an integer corresponding to the size of the test sample remove_outliers: True to remove from the dataset IRIS that are detected as outliers, False else - remove_rural: True to remove IRIS that are in the countryside to avoid bias while predicting + remove_rural: True to remove IRIS that are in the countryside to avoid bias while predicting, False else + Returns: - A dict of results for each EV and each list of selected indicators. + a dictionary of results for each EV and each list of selected indicators """ log.info("... Computing accuracies ...") train_size, test_size = check_dataset_size(train_size, test_size) @@ -86,18 +88,20 @@ def compute_all_accuracies(data, clf, train_size, test_size, remove_outliers=Fal return results -def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outliers): +def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outliers=False): """ Predict the 6 EV for the given IRIS, the given data and the given classifier. + Args: - iris_code: A string that contains the code of the IRIS (9 digits). - data: A Data object on which classifier will learn. - clf: An object (classifier) to perform the prediction. - train_size: The size of the train sample. - test_size: The size of the test sample. - remove_outliers: True to remove from the dataset IRIS that are detected as outliers, False else. + iris_code: a string that contains the code of the IRIS (9 digits) + data: a Data object on which classifier will learn + clf: an object (classifier) to perform the prediction + train_size: an integer corresponding to the size of the train sample + test_size: an integer corresponding to the size of the test sample + remove_outliers: True to remove from the dataset IRIS that are detected as outliers, False else + Returns: - Predictions for each EV. + A dictionary containing predictions for each EV. """ train_size, test_size = check_dataset_size(train_size, test_size) lists = retrieve_lists() @@ -109,40 +113,23 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier for top_k, lst in lists.items(): dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type="supervised") dataset.init_all_in_one() - algo = MethodPrediction(name='', dataset=dataset, classifier=clf) - algo.fit() - algo.predict(iris_code) - predictions_lst.append(algo.prediction) + algorithm = MethodPrediction(name='', dataset=dataset, classifier=clf) + algorithm.fit() + algorithm.predict(iris_code) + predictions_lst.append(algorithm.prediction) predictions[env] = get_most_frequent(predictions_lst) # get the most frequent value and the number of occurrences - print(predictions) # TODO + print(predictions) # TODO: give an example of the dictionary return predictions if __name__ == '__main__': - # data = Data() - # data.init_all_in_one() - # dataset = Dataset(data, "geo") - # dataset.get_environment_variable() # Create data - # data = Data(normalization=None, filtering=False) - # data.init_all_in_one() - # - # data = Data(normalization="density", filtering=False) - # data.init_all_in_one() - # - # data = Data(normalization="population", filtering=False) - # data.init_all_in_one() - # - # data = Data(normalization=None, filtering=True) - # data.init_all_in_one() - data = Data(normalization="population", filtering=True) data.init_all_in_one() - # # Run expes on data - # expe1(data) - # expe2(data) - # expe3(data) - compute_all_accuracies(data, RandomForestClassifier(), 0.8, 0.2) - # expe5(data, RandomForestClassifier(), 0.8, 0.2) + # Compute accuracies for each EV and each top-k + compute_all_accuracies(data, RandomForestClassifier(), TRAIN_SIZE, TEST_SIZE) + + # Predict EV of the "Part-Dieu" IRIS, which is he CBD of Lyon (Central Business District) + predict_one_iris("693830301", data, RandomForestClassifier(), TRAIN_SIZE, TEST_SIZE) diff --git a/predihood/selection.py b/predihood/selection.py index dbfe22e6..3dc25892 100644 --- a/predihood/selection.py +++ b/predihood/selection.py @@ -7,26 +7,17 @@ import pandas as pd from predihood.classes.Data import Data from predihood.classes.Dataset import Dataset from predihood.classes.MethodSelection import MethodSelection -from predihood.config import TOPS_K, ENVIRONMENT_VARIABLES, FOLDER_SELECTED_INDICATORS, FILE_HIERARCHY, FILE_LIST_DISTRIBUTION -from predihood.utility_functions import apply_hierarchy, union +from predihood.config import TOPS_K, ENVIRONMENT_VARIABLES, FOLDER_SELECTED_INDICATORS, FILE_HIERARCHY +from predihood.utility_functions import apply_hierarchy from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier log = logging.getLogger(__name__) -DEFAULTS = { - "threshold_HM": 0.4, - "min_col_HM": 0.5 -} - -PARAMETERS = { - "threshold_HM": DEFAULTS["threshold_HM"], - "min_col_HM": DEFAULTS["min_col_HM"] -} - def generate_all_data(): """ - Generate all datasets, i.e. one with density normalization, one for population normalization and one without normalization. The three are filtered. + Generate all datasets, i.e. one with density normalization, one for population normalization and one without normalization. + The three are filtered. Generated datasets are located in generated_files/datasets. """ data = Data(normalization="density", filtering=True) data.init_all_in_one() @@ -38,75 +29,97 @@ def generate_all_data(): def retrieve_lists(top_k=None): """ - Get all lists generated for each top-k. - :param top_k: Specify a top_k to get the list of the top_k. - :return: a dict containing the lists of indicators for each top-k + Get all lists generated for each top-k or for the given top-k if provided. + + Args: + top_k: a integer corresponding to the top-k of the list to retrieve + + Returns: + a dictionary containing the lists of indicators for each top-k """ lists = {} if top_k: - if not os.path.exists(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv")): generate_lists() - indicators_csv = pd.read_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv"), header=None) - list = json.loads(indicators_csv.drop(indicators_csv.columns[0], axis=1).to_json(orient="index")) # drop([0]) to remove column with row indexes, axis=1 to delete column - list_temp = {} - for key, value in list.items(): - list_temp[ENVIRONMENT_VARIABLES[int(key)]] = [value2 for key2, value2 in value.items() if value2 is not None] - lists[str(top_k)] = list_temp + # a top-k is specified, so the list of indicators of size top-k is retrieved + lst = retrieve_one_list(top_k) + lists[str(top_k)] = lst else: + # no top-k is provided, so all lists form 10 to 100 indicators are retrieved for top_k in TOPS_K: - if not os.path.exists(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv")): generate_lists() - indicators_csv = pd.read_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv"), header=None) - list = json.loads(indicators_csv.drop(indicators_csv.columns[0], axis=1).to_json(orient="index")) # drop([0]) to remove column with row indexes, axis=1 to delete column - list_temp = {} - for key, value in list.items(): - list_temp[ENVIRONMENT_VARIABLES[int(key)]] = [value2 for key2, value2 in value.items() if value2 is not None] - lists[str(top_k)] = list_temp + lst = retrieve_one_list(top_k) + lists[str(top_k)] = lst return lists +def retrieve_one_list(top_k): + """ + Retrieve the list of selected INSEE indicators corresponding to the given top-k. + + Args: + top_k: an integer corresponding to the size of the list to retrieve + + Returns: + a list containing indicators of the list of size top-k + """ + if not os.path.exists(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv")): generate_lists() + indicators_csv = pd.read_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list" + str(top_k) + ".csv"), header=None) + lst = json.loads(indicators_csv.drop(indicators_csv.columns[0], axis=1).to_json(orient="index")) + list_temp = {} + for key, value in lst.items(): + list_temp[ENVIRONMENT_VARIABLES[int(key)]] = [value2 for key2, value2 in value.items() if value2 is not None] + return list_temp + + def generate_lists(): + """ + Generates lists of INSEE indicators that are relevant for prediction. + This selection process is based on: + - removing fully correlated indicators + - select a limited number among the most relevant indicators (using Random Forest and Extra Tree classifiers) + - taking into account the diversity of categories of INSEE indicators based on a hierarchy of these indicators + """ # 1. Create data data = Data(normalization="density", filtering=True) data.init_all_in_one() - # # 2. Run heat map and get less correlated indicators - dataset = Dataset(data, "batiment", "unsupervised") + # 2. Run heat map and get fully correlated indicators by using a correlation matrtix + dataset = Dataset(data, "building_type", "unsupervised") dataset.init_all_in_one() - heat_map = MethodSelection(name="heat map EV-agnostic", dataset=dataset, parameters=PARAMETERS) - heat_map.results() - # heat_map.draw_and_save() + heat_map = MethodSelection(name="heat map", dataset=dataset, parameters={"method": "spearman"}) + heat_map.compute_selection() fully_correlated_indicators = heat_map.best_indicators - log.info("fully correlated indicators: %d %s", len(fully_correlated_indicators), fully_correlated_indicators) + log.info("fully correlated indicators: %d %s", len(fully_correlated_indicators), ", ".join(fully_correlated_indicators)) + # 3. Select a limited number (top-k) of indicators by using Random Forest and Extra Tree classifiers. + # Then take into account the diversity of indicators by using a hierarchy of the INSEE indicators hierarchy = pd.read_csv(FILE_HIERARCHY, sep="\t") - # # 4. Run Feature selection and heat map EV-oriented for each EV - # # to construct primary and secondary lists of selected indicators # TODO + # 4. For each size of list (i.e. for each top-k), select a limited number of indicators with Extra Tree and Random Forest classifiers + # Then merge the two results to obtain a single list of relevant indicators. for top_k in TOPS_K: log.info("constructing list of %d indicators", top_k) all_lists = [] # to keep lists of indicators for each EV (for the current top-k) - PARAMETERS["top_k"] = top_k for env in ENVIRONMENT_VARIABLES: - # B. FEATURE IMPORTANCE on uncorrelated indicators (the ones that are not chosen by heat map) to select the most relevant ones - dataset = Dataset(data, env, indicators_to_remove=fully_correlated_indicators, _type="supervised") # WARNING: fill _type parameter + dataset = Dataset(data, env, indicators_to_remove=fully_correlated_indicators, _type="supervised") dataset.init_all_in_one() - # a. get best indicators for ET - fi_et = MethodSelection(name="feature importance ET", dataset=dataset, classifier=ExtraTreesClassifier(), parameters=PARAMETERS) + # a. get best indicators according to Extra Tree classifier + fi_et = MethodSelection(name="feature importance ET", dataset=dataset, classifier=ExtraTreesClassifier(), parameters={"top_k": top_k}) fi_et.fit() - fi_et.results() - best_indicators_FI_ET = fi_et.best_indicators + fi_et.compute_selection() + best_indicators_ET = fi_et.best_indicators - # b. get best indicators for RF - fi_rf = MethodSelection(name="feature importance RF", dataset=dataset, classifier=RandomForestClassifier(), parameters=PARAMETERS) + # b. get best indicators according to Random Forest classifier + fi_rf = MethodSelection(name="feature importance RF", dataset=dataset, classifier=RandomForestClassifier(), parameters={"top_k": top_k}) fi_rf.fit() - fi_rf.results() - best_indicators_FI_RF = fi_rf.best_indicators + fi_rf.compute_selection() + best_indicators_RF = fi_rf.best_indicators - # c. merge best indicators from ET and RF - best_indicators_FI_ET.extend(best_indicators_FI_RF) - all_selected_indicators = best_indicators_FI_ET # all selected indicators, i.e. union between RF and ET + # c. merge indicators that have been selected by ET and RF classifiers + # in this step, if an indicator have been selected by the two classifiers, its score is the addition of its score for RF and the one for ET. + best_indicators_ET.extend(best_indicators_RF) + all_selected_indicators = best_indicators_ET # all selected indicators, i.e. union between RF and ET keys = set([element[0] for element in all_selected_indicators]) # store indicators' names selected merged_indicators_temp = {key: 0 for key in keys} @@ -114,22 +127,22 @@ def generate_lists(): indicator = all_selected_indicators[i] merged_indicators_temp[indicator[0]] += indicator[1] # adding score - # transform it into a list of sublists, e.g. [[indicator1, score1], ..., indicatorN, scoreN]] + # transform it into a list of sub-lists, e.g. [[indicator1, score1], ..., indicatorN, scoreN]] merged_indicators = [[key, merged_indicators_temp[key]] for key in merged_indicators_temp] - # d. apply hierarchy on selected indicators to keep the best ancestors - selected_indicators = apply_hierarchy(merged_indicators, hierarchy) + # d. apply hierarchy on selected indicators to taking into account the diversity of categories of indicators + indicators_hierarchy = apply_hierarchy(merged_indicators, hierarchy) # get the names of each selected indicator - selected_indicators_names = [indic[0] for indic in selected_indicators] + selected_indicators_names = [indicator[0] for indicator in indicators_hierarchy] # e. add uncorrelated indicators of heat map to the lists all_lists.append(selected_indicators_names) # C. Transform lists of selected indicators for the current top-k and save it as a CSV file # indexes = {i: ENVIRONMENT_VARIABLES[i] for i in range(len(ENVIRONMENT_VARIABLES))} - selected_indicators_prim = pd.DataFrame(np.array(all_lists).tolist(), index=None, columns=None) - selected_indicators_prim.to_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list"+str(top_k)+".csv"), header=False) + selected_indicators = pd.DataFrame(np.array(all_lists).tolist()) + selected_indicators.to_csv(os.path.join(FOLDER_SELECTED_INDICATORS, "list"+str(top_k)+".csv"), header=False) if __name__ == "__main__": diff --git a/predihood/utility_functions.py b/predihood/utility_functions.py index d282e679..178055e5 100644 --- a/predihood/utility_functions.py +++ b/predihood/utility_functions.py @@ -2,10 +2,7 @@ import ast import inspect import json import logging -import math -import matplotlib.pyplot as plt import numpy as np -import os import pandas as pd import re import requests @@ -13,58 +10,77 @@ import stringdist from area import area from predihood import model +from predihood.config import AVAILABLE_CLASSIFIERS, TRAIN_SIZE, TEST_SIZE, ENVIRONMENT_VARIABLES, FOLDER_DATASETS, FOLDER_DISTRIBUTION, TRANSLATION, OLD_PREFIX, NEW_PREFIX, FILE_MANUAL_ASSESSMENT log = logging.getLogger(__name__) -#################### iris function #################### -from predihood.config import AVAILABLE_CLASSIFIERS, TRAIN_SIZE, TEST_SIZE, ENVIRONMENT_VARIABLES, FOLDER_DATASETS, FOLDER_DISTRIBUTION, TRANSLATION, OLD_PREFIX, NEW_PREFIX, FILE_MANUAL_ASSESSMENT - - +# 1. IRIS function def address_to_code(address): """ Get IRIS code from address. - :param address: the address of the iris. - :return: the code of the iris. + + Args: + address: a string containing the address of the iris + + Returns: + a string containing the code of the searched address """ - response = requests.get("https://pyris.datajazz.io/api/search/", params=[('q', address)]) + response = requests.get("https://pyris.datajazz.io/api/search/", params=[("q", address)]) json_response = response.json() return str(json_response["complete_code"]) if "complete_code" in json_response else None def address_to_city(address): - response = requests.get("https://pyris.datajazz.io/api/search/", params=[('q', address)]) + """ + Get city from address. + + Args: + address: a string containing the address of the iris + + Returns: + a string containing the city of the searched address + """ + response = requests.get("https://pyris.datajazz.io/api/search/", params=[("q", address)]) json_response = response.json() return str(json_response["name"]) if "name" in json_response else None -def append_indicator(raw_indicator, iris, arr, append_col, indicators): +def append_indicator(raw_indicator, iris, lst, append_col, indicators): """ - Append an indicator in the current array (departure or arrival). - :param raw_indicator: the name of the indicator, e.g. P14_POP. - :param iris: the iris object that contains the indicator. - :param arr: the array where to append the indicator's value. - :param append_col: True to append the indicator to the cols variable. - :param indicators: the list of the columns of the dataset, i.e. the indicators. - :return: the array containing indicator' values and the other one containing indicators names. + Append the value of an INSEE indicator to a list corresponding to the data of the assessed IRIS (departure or arrival). + + Args: + raw_indicator: a string containing the name of the indicator, e.g. P14_POP + iris: the iris object that contains the indicator + lst: the list where to append the indicator's value + append_col: True to append the indicator to the cols variable + indicators: the list of the columns of the dataset, i.e. the indicators + + Returns: + the list containing indicator' values and the other one containing indicators names. """ if raw_indicator in iris["properties"]["raw_indicators"]: val_indicator = iris["properties"]["raw_indicators"][raw_indicator] - arr.append(np.nan) if (val_indicator is None or not val_indicator) else arr.append( - val_indicator) # append 0 if indicator is null, else append the value + # append NaN if indicator is null, else append the value + lst.append(np.nan) if (val_indicator is None or not val_indicator) else lst.append(val_indicator) else: - arr.append(np.nan) + lst.append(np.nan) if append_col: indicators.append(raw_indicator) - return arr, indicators + return lst, indicators -def append_target(row, target, arr): +def append_target(row, target, lst): """ - Append the target to the current array (departure or arrival). - :param row: the current row of dataset. - :param target: the target to append, i.e. the value of the environment variable. - :param arr: the array where the target is append. - :return: the array with the appended target. + Append the target to the current list (departure or arrival). + + Args: + row: a list corresponding to the current row of dataset + target: a string containing the target to append, i.e. the value of the EV + lst: the list where the target is append + + Returns: + the list with the appended target """ if target == OLD_PREFIX + "geographical_position" or target == NEW_PREFIX + "geographical_position": # get only geo position without the city, e.g. South-East Lyon gives South-East @@ -73,20 +89,24 @@ def append_target(row, target, arr): else: city = row[target].split(" ")[0] # city_name = ''.join(split_city[1:len(split_city)]) - value = TRANSLATION[city] if city in TRANSLATION else np.nan # + " " + city_name - arr.append(value) # if (value is None or not value or str(value) == "nan") else arr.append(value) + value = TRANSLATION[city] if city in TRANSLATION else np.nan + lst.append(value) else: value = TRANSLATION[row[target]] if row[target] in TRANSLATION else np.nan - arr.append(value) # if (value is None or not value or str(value) == "nan") else arr.append(value) + lst.append(value) - return arr + return lst def indicator_full_to_short_label(full_label): """ Convert the full label of an indicator to its short label. - :param full_label: the full label of the indicator, e.g. Population 0-2 y.o. in 2014. - :return: the short label of the indicator, e.g. P14_POP0002. + + Args: + full_label: a string containing the full label of the indicator, e.g. Population 0-2 y.o. in 2014 + + Returns: + the short label of the given indicator, e.g. P14_POP0002 """ indicators = model.get_indicators_dict() key_list = list(indicators.keys()) @@ -100,24 +120,32 @@ def indicator_full_to_short_label(full_label): def indicator_short_to_full_label(short_label): """ Convert the short label of an indicator to its full label. - :param short_label: the short label of the indicator, e.g. P14_POP0002. - :return: the full label of the indicator, e.g. Population 0-2 y.o. in 2014. + + Args: + short_label: a string containing the short label of the indicator, e.g. P14_POP0002 + + Returns: + the full label of the indicator, e.g. Population 0-2 y.o. in 2014 """ indicators = model.get_indicators_dict() return indicators[short_label] -def apply_hierarchy(best_indicators, hierarchy): +def apply_hierarchy(selected_indicators, hierarchy): """ Apply hierarchy on the given indicators in order to go up children indicators in their parents. - :param best_indicators: the best indicators selected by the feature importance. - :param hierarchy: the hierarchy of the indicators, for each indicator, there are its level in the hierarchy and its first ancestor. - :return: the new best indicators list, with some children go up in their parents. + + Args: + selected_indicators: a list of indicators selected by the feature importance process. + hierarchy: the hierarchy of the indicators, i.e. for each indicator, there are its level in the hierarchy and its first ancestor. + + Returns: + the new selected indicators list, with some children which have go up in their parents. """ - list_indicators_FI = [best_indicators[j][0] for j in range(len(best_indicators))] + list_indicators_FI = [selected_indicators[j][0] for j in range(len(selected_indicators))] indexes_to_remove = [] - for i in range(len(best_indicators)): - index_row = hierarchy.index[hierarchy['INDICATOR'] == best_indicators[i][0]].tolist() + for i in range(len(selected_indicators)): + index_row = hierarchy.index[hierarchy["INDICATOR"] == selected_indicators[i][0]].tolist() if len(index_row) == 0: continue # pass this indicator because it does not exist in the hierarchy else: @@ -128,66 +156,79 @@ def apply_hierarchy(best_indicators, hierarchy): if ancestor in list_indicators_FI: index_ancestor = list_indicators_FI.index(ancestor) if ancestor in list_indicators_FI else None while index_ancestor in indexes_to_remove: - ancestor2 = hierarchy.iloc[hierarchy.index[hierarchy['INDICATOR'] == ancestor].tolist()[0], 3] # name of ancestor + ancestor2 = hierarchy.iloc[hierarchy.index[hierarchy["INDICATOR"] == ancestor].tolist()[0], 3] # name of ancestor ancestor = ancestor2 index_ancestor = list_indicators_FI.index(ancestor) if ancestor in list_indicators_FI else None - if hierarchy.iloc[hierarchy.index[hierarchy['INDICATOR'] == ancestor].tolist()[0], 2] == 1: + if hierarchy.iloc[hierarchy.index[hierarchy["INDICATOR"] == ancestor].tolist()[0], 2] == 1: break if index_ancestor not in indexes_to_remove: - best_indicators[index_ancestor][1] += best_indicators[i][1] + selected_indicators[index_ancestor][1] += selected_indicators[i][1] indexes_to_remove.append(i) - for index in sorted(indexes_to_remove, - reverse=True): # remove in reverse order to do not throw off subsequent indexes - del best_indicators[index] - return best_indicators + for index in sorted(indexes_to_remove,reverse=True): # remove in reverse order to do not throw off subsequent indexes + del selected_indicators[index] + return selected_indicators def get_classifier(name): """ Get an instance of a classifier with its name. - :param name: a string containing the name of the desired classifier - :return: an instance of a classifier + + Args: + name: a string containing the name of the desired classifier + + Returns: + an instance of a classifier """ classifier = AVAILABLE_CLASSIFIERS[name] return classifier() -def set_classifier(clf, parameters): +def set_classifier(classifier, parameters): """ - Tune the classifier with the user's parameters. - :param clf: instance of classifier to tune - :param parameters: a dict containing the tuning parameters - :return: an instance of the tuned classifier + Tune the classifier with the given parameters. + + Args: + classifier: an instance of the classifier to tune + parameters: a dictionary containing the tuning parameters + + Returns: + an instance of the tuned classifier """ - keys_clf = list(clf.get_params().keys()) + keys_clf = list(classifier.get_params().keys()) # remove None parameters and parameters that don't exist in sklearn (e.g. train and test size) parameters = {key: value for key, value in parameters.items() if value != "" and key in keys_clf} - clf.set_params(**parameters) - return clf + classifier.set_params(**parameters) + return classifier def signature(chosen_algorithm): """ - Get the signature of an algorithm, i.e. its parameters, the default values and the type of each parameter. - :param chosen_algorithm: the name of the algorithm in str, e.g. RandomForestClassifier - :return: the instance of the classifier and a dict containing each parameter associated with its default value and the type of the parameter. + Get the signature of an algorithm, i.e. its parameters, the default values and the type of each parameter. The documentation of the algorithm must be in NumPy style. + + Args: + chosen_algorithm: the name of the algorithm in str, e.g. RandomForestClassifier + + Returns: + the signature of the given algorithm, i.e. a dictionary containing for each parameter: + - a list of the accepted types + - the default value + - a description of the parameter (e.g. "The train_size parameter aims at tuning the size of the sample during the learning step.") """ # special case for no selection if chosen_algorithm == "Algorithm": return json.dumps({}) try: # model = eval(_chosen_algorithm) # never use eval on untrusted strings model = get_classifier(chosen_algorithm) - doc = model.__doc__ + doc = model.__doc__ # TODO: specify case when there is no doc (user-implemented algorithm) param_section = "Parameters" dashes = "-" * len(param_section) # ------- number_spaces = doc.find(dashes) - (doc.find(param_section) + len(param_section)) attribute_section = "Attributes\n" # sub_doc is the param section of the docs (i.e. without attributes and some text) - sub_doc = doc[doc.find(param_section) + len(param_section) + number_spaces + len(dashes) + len("\n"):doc.find( - attribute_section)] + sub_doc = doc[doc.find(param_section) + len(param_section) + number_spaces + len(dashes) + len("\n"):doc.find(attribute_section)] except: raise Exception("This algorithm does not exist for the moment...") - params = inspect.getfullargspec(model.__init__).args[1:] # get parameter' names -- [1:] to remove 'self' parameter + params = inspect.getfullargspec(model.__init__).args[1:] # get parameter' names -- [1:] to remove self parameter defaults = inspect.getfullargspec(model.__init__).defaults # get default values assert len(params) == len(defaults) parameters = {} @@ -197,36 +238,36 @@ def signature(chosen_algorithm): index_next_newline = sub_doc[index_param:].find("\n") # find returns the first occurrence parameter_string = sub_doc[index_param:index_param + index_next_newline] doc_param = sub_doc[index_param + index_next_newline:] - index_end_sentence = re.search('(\.\s)', doc_param).start() # search for the first sentence + index_end_sentence = re.search("(\.\s)", doc_param).start() # search for the first sentence first_sentence = doc_param[:index_end_sentence + 1] # format first sentence to have a prettier display. first_sentence = first_sentence.replace("\n", " ") while " " in first_sentence: first_sentence = first_sentence.replace(" ", " ") types_and_default = parameter_string[len(param_name):] - if '{' in types_and_default and '}' in types_and_default: # for cases like {'auto', 'kd_tree', 'brute'}, optional - types_and_default = types_and_default.replace('{', '') - types_and_default = types_and_default.replace('}', '') - if ' or ' in types_and_default: types_and_default = types_and_default.replace(' or ', ', ') - types_defaults_split = types_and_default.split(', ') + if "{" in types_and_default and "}" in types_and_default: # for cases like {"auto", "kd_tree", "brute"}, optional + types_and_default = types_and_default.replace("{", '') + types_and_default = types_and_default.replace("}", '') + if " or " in types_and_default: types_and_default = types_and_default.replace(" or ", ", ") + types_defaults_split = types_and_default.split(", ") types = [] default = -1 - variants = ['optional (default=', 'optional (default = ', 'optional', '(default=', '(default = ', 'default ', - 'default: ', 'default='] # DO NOT CHANGE THE ORDER OF ITEMS + variants = ["optional (default=", "optional (default = ", "optional", "(default=", "(default = ", "default ", + "default: ", "default="] # DO NOT CHANGE THE ORDER OF ITEMS for item in types_defaults_split: if not any(value in item for value in variants): - if item.startswith('length'): + if item.startswith("length"): pass # exceptions else: types.append(item) # item is a type else: for value in variants: if value in item: - if value.startswith('optional ('): + if value.startswith("optional ("): default = item.split(value)[1][:-1] - elif value.startswith('(default'): + elif value.startswith("(default"): default = item.split(value)[1][:-1] - elif value.startswith('default'): + elif value.startswith("default"): default = item.split(value)[1] elif value == "optional": default = "None" @@ -235,23 +276,25 @@ def signature(chosen_algorithm): type_of_default = str(type(ast.literal_eval(str(default))).__name__) else: type_of_default = "str" - types[:] = ['int' if x == 'integer' else x for x in types] # replace 'integer' by 'int' - types[:] = ['bool' if x == 'boolean' else x for x in types] # replace 'boolean' by 'bool' - types[:] = ['str' if x == 'string' else x for x in types] # replace 'boolean' by 'bool' + types[:] = ["int" if x == "integer" else x for x in types] # replace "integer" by "int" + types[:] = ["bool" if x == "boolean" else x for x in types] # replace "boolean" by "bool" + types[:] = ["str" if x == "string" else x for x in types] # replace "str" by "string" if len(types) == 0: types.append(type_of_default) # fill missing types - types[:] = [x for x in types if 'None' not in x and 'NoneType' not in x] # remove None type - description = "" - parameters[param_name[:-3]] = {"types": types, "default": default, - "description": first_sentence} # -3 to remove ' : ' + types[:] = [x for x in types if "None" not in x and "NoneType" not in x] # remove None type + parameters[param_name[:-3]] = {"types": types, "default": default, "description": first_sentence} # -3 to remove " : " return parameters def check_dataset_size(train_size, test_size): """ Check train and test size and update with defaults or divided by 100 if needed. - :param train_size: The value for train size (should be between 0 and 1). - :param test_size: The value for test size (should be between 0 and 1). - :return: The values for train and test sizes. + + Args: + train_size: an integer or a float corresponding to the value for train size (should be between 0 and 1) + test_size: an integer or a float corresponding to the value for test size (should be between 0 and 1) + + Returns: + the train and test sizes """ if 0 < train_size < 1 and 0 < test_size < 1 and train_size + test_size == 1: return train_size, test_size # default case @@ -267,15 +310,17 @@ def check_dataset_size(train_size, test_size): return train_size, test_size -#################### list functions #################### - - +# 2. list functions def intersection(lst1, lst2): """ Intersect two lists. - :param lst1: the first list to be intersected. - :param lst2: the second list to be intersected. - :return: the result of the intersection of the two lists. + + Args: + lst1: a list corresponding to the first list to be intersected + lst2: a list corresponding to the second list to be intersected + + Returns: + a list corresponding to the result of the intersection of the two given lists. """ return list(set(lst1) & set(lst2)) @@ -283,123 +328,79 @@ def intersection(lst1, lst2): def union(lst1, lst2): """ Unify two lists without repetitions. - :param lst1: the first list to append. - :param lst2: the second list to append. - :return: the union of the two lists. + + Args: + lst1: a list corresponding to the first list to append + lst2: a list corresponding to the second list to append + + Returns: + a list corresponding to the union of the two lists """ return list(set(lst1) | set(lst2)) -def sim(val, arr): - """Check if a value is enough similar to the data. - :param val: the value on which the similarity is computed. - :param arr: the array containing other values to check similarity. - :return: the index of the similar value, -1 if no value os enough similar. +def similarity(value, lst): """ - for i in range(len(arr)): - if val in arr[i]: return -2 # this value is already append, to append it a second time - for elem in arr[i]: + Check if a value is enough similar to the data. + + Args: + value: the value on which the similarity is computed + lst: the list containing other values to check similarity + + Returns: + the index of the similar value, -1 if no value is enough similar + """ + for i in range(len(lst)): + if value in lst[i]: return -2 # this value is already append + for elem in lst[i]: if not isinstance(elem, float): - dissim = stringdist.levenshtein(str(elem), str(val)) - if dissim == 1: return i + dissimilarity = stringdist.levenshtein(str(elem), str(value)) # compute Levenshtein similarity + if dissimilarity == 1: return i # if the given value has only one difference with the current value, store it as a similarity return -1 def get_most_frequent(lst): - """Get the most frequent item in a list. If many elements are frequent, it returns the first one. - :param lst: the list to find the most frequent element. + """ + Get the most frequent item in a list. If many elements are frequent, it returns the first one. + + Args: + lst: the list to find the most frequent element + + Returns: + a dictionary containing the most frequent of the given list and its count """ most_frequent_element = max(set(lst), key=lst.count) dictionary = {"most_frequent": most_frequent_element, "count_frequent": lst.count(most_frequent_element)} return dictionary -#################### plot functions #################### - - -def auto_label(rects, ax): +# 3. plot functions +def auto_label(rectangles, axes): """ - Adds a text label above each bar in *rects*, displaying its value. - :param rects: the bars of the plot. - :param ax: the axes of the plot. + Adds a text label above each bar in rectangles, displaying its value. + + Args: + rectangles: the bars of the plot. + axes: the axes of the plot. """ - for rect in rects: + for rect in rectangles: height = rect.get_height() - ax.annotate('{}'.format(height), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), - textcoords="offset points", ha='center', va='bottom') - - -def draw_features_importance(importance, std, indices, X_names, env, clf, xlim_min, xlim_max, threshold): - """ - Plot (and save) features importance in a bar chart order by importance value. - :param importance: the importance of each value. - :param std: the standard deviation for each feature. - :param indices: the indices to order the plot. - :param X_names: the names of the features. - :param env: the name of the considered environmental variable. - :param clf: the name of the classifier used to compute importance. - :param threshold: the indicators under this threshold are considered not significant. - """ - j = 0 - step = 20 - while j < len(indices): - next_j = j + step - if next_j >= len(indices): - range_indices = len(indices) - j - next_j = len(indices) - else: - range_indices = step - - # Plot the feature importance of the forest - current_indices = np.argsort(importance)[::-1][j:next_j] - fig, ax = plt.subplots() - fig.set_size_inches(30, 20) + axes.annotate("{}".format(height), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha="center", va="bottom") - plt.title("Importance des indicateurs INSEE selon " + str(clf)) - plt.barh(range(range_indices), importance[current_indices], yerr=std[current_indices][::-1], align="center", - color='orange') - plt.axvline(x=threshold, color='red', linewidth=4) - plt.yticks(range(range_indices), [X_names[i] for i in current_indices]) - plt.ylim([-1, range_indices]) - plt.xlim([xlim_min, xlim_max]) - ax.invert_yaxis() # labels read top-to-bottom - ax.set_xlabel('Importance') - for i, v in enumerate(importance[current_indices]): ax.text(0, i + 0.15, str(round(v, 4)), color='black') - if not os.path.exists(FOLDER_FEATURE_SELECTION + str(env)): os.makedirs(FOLDER_FEATURE_SELECTION + str(env)) - filename = os.path.join(FOLDER_FEATURE_SELECTION, str(env), "feature-importance-" + str(clf) + str(j) + ".png") - fig.savefig(filename) - j += step - plt.close(fig) - - -def distribution_data(): - """ - Plot distribution of the supervised data, i.e. the appraised iris to show how targets are distributed. +def add_assessment_to_file(code_iris, values): """ - if os.path.exists(os.path.join(FOLDER_DATASETS, "data_density.csv")): - data = pd.read_csv(os.path.join(FOLDER_DATASETS, "data_density.csv")) - else: - return - - for env in data.loc[:, ENVIRONMENT_VARIABLES].columns: - value_env = data.loc[:, env].unique() - distribution = [len(data[data[env] == value]) for value in value_env] - - if env == "geo": - fig, ax = plt.subplots(figsize=(50, 5)) - else: - fig, ax = plt.subplots(figsize=(10, 5)) - plt.bar(value_env, distribution) - plt.xticks(rotation='vertical') - plt.show() - fig.savefig(os.path.join(FOLDER_DISTRIBUTION, "distribution-" + str(env) + ".png")) - log.debug("Plot generated for %s", env) + Add an assessed IRIS to the CSV file. + Args: + code_iris: a string corresponding to the code of the IRIS (9 digits) + values: the values of the 6 EV that represent the environment of the assessed IRIS -def add_assessment_to_file(code_iris, values): + Returns: + the string "okay" if the assessed IRIS has been added to the CSV file + """ df = pd.read_csv(FILE_MANUAL_ASSESSMENT) - codes = df['CODE'].tolist() + codes = df["CODE"].tolist() codes_lst = [str(elem) for elem in codes] if code_iris in codes_lst: return "iris already assessed" @@ -418,8 +419,7 @@ def add_assessment_to_file(code_iris, values): # adding insee indicators indicators = model.get_indicators_list() - indicators_to_remove = ['IRIS', 'REG', 'DEP', 'UU2010', 'COM', 'LIBCOM', 'TRIRIS', 'GRD_QUART', 'LIBIRIS', - 'TYP_IRIS', 'MODIF_IRIS', 'LAB_IRIS', 'LIB_IRIS', 'LIB_COM', 'CODGEO', 'LIBGEO'] + indicators_to_remove = ["IRIS", "REG", "DEP", "UU2010", "COM", "LIBCOM", "TRIRIS", "GRD_QUART", "LIBIRIS", "TYP_IRIS", "MODIF_IRIS", "LAB_IRIS", "LIB_IRIS", "LIB_COM", "CODGEO", "LIBGEO"] for indicator in indicators_to_remove: if indicator in indicators: indicators.remove(indicator) @@ -429,17 +429,6 @@ def add_assessment_to_file(code_iris, values): iris.extend(values) # adding assessed values cols.extend(ENVIRONMENT_VARIABLES) - df = pd.DataFrame([iris]) #, columns=cols) - df.to_csv(FILE_MANUAL_ASSESSMENT, mode='a', index=False, header=False) # WARNING: don't erase header in the csv file + df = pd.DataFrame([iris]) + df.to_csv(FILE_MANUAL_ASSESSMENT, mode="a", index=False, header=False) # DO NOT ERASE HEADER IN THE CSV FILE return "okay" - - -if __name__ == '__main__': - # log.info("%s", indicator_full_to_short_label("Pop 0-2 ans en 2014 (princ)")) - # log.info("%s", indicator_short_to_full_label("P14_POP0002")) - - log.info(signature("RandomForestClassifier")) - - # distribution_data() # works only if dataset have been generated - - # pass -- GitLab