Skip to content
Snippets Groups Projects
Commit 9bde30f6 authored by Nelly Barret's avatar Nelly Barret
Browse files

[M] working on similarity between curves

parent 71599410
No related branches found
No related tags found
No related merge requests found
...@@ -8,12 +8,10 @@ def generate_charts(): ...@@ -8,12 +8,10 @@ def generate_charts():
data = Data(normalize="density", filter=True) data = Data(normalize="density", filter=True)
data.init_all_in_one() data.init_all_in_one()
lists = get_selected_indicators_lists(10) lists = get_selected_indicators_lists(10)
print(lists) for j, env in enumerate(["batiment"]):
for j, env in enumerate(["batiment", "usage"]):
print(env)
dataset = Dataset(data, env, selected_indicators=lists["10"][env], train_size=0.8, test_size=0.2) dataset = Dataset(data, env, selected_indicators=lists["10"][env], train_size=0.8, test_size=0.2)
dataset.init_all_in_one() dataset.init_all_in_one()
algo = Chart(name='chart', dataset=dataset) algo = Chart(name='chart', dataset=dataset, number_of_iris=4)
algo.compute_trendline() algo.compute_trendline()
......
...@@ -6,37 +6,108 @@ from scipy.interpolate import interp1d ...@@ -6,37 +6,108 @@ from scipy.interpolate import interp1d
from predihood.classes.Method import Method from predihood.classes.Method import Method
def point_distance(y1, y2):
return abs(float(y2) - float(y1))
def similarity_point(y1, y2, step):
distance = point_distance(y1, y2)
if distance == 0:
return 1 # points are the same
elif distance/(2*step) > 1:
return 0 # points are too different
else:
return distance/(2*step)
def slope(x1, y1, x2, y2):
if y1 == 0 and y2 == 0: return 0
leading_coeff = (y2-y1)/(x2-x1) # coefficient directeur
# print(leading_coeff)
teta = np.degrees(np.arctan(leading_coeff)) # incline of the line between the two points
if x1 < x2 and y1 > y2: teta += 360
return teta
def similarity(data1, data2, max_distance, nb_points):
somme = 0
for point1, point2 in zip(data1, data2):
x1, y1, x2, y2 = point1[0], point1[1], point2[0], point2[1]
similarity = point_distance(y1, y2) / max_distance
if x1+1 < len(data1) and x2+1 < len(data2):
next_x1 = data1[x1+1][0]
next_y1 = data1[x1+1][1]
next_x2 = data2[x2+1][0]
next_y2 = data2[x2+1][1]
print("next is (", next_x1, ";", next_y1, "), (", next_x2, ";", next_y2, ")")
sinus1 = np.sin(slope(x1, y1, next_x1, next_y1))
sinus2 = np.sin(slope(x2, y2, next_x2, next_y2))
slope_factor = abs(sinus1 - sinus2)
print(sinus1, "-", sinus2, "=", slope_factor)
else:
slope_factor = 0
similarity -= slope_factor
somme += similarity
somme /= nb_points
return somme
class Chart(Method): class Chart(Method):
def __init__(self, name, dataset): def __init__(self, name, dataset, number_of_iris=12):
Method.__init__(self, name, dataset) Method.__init__(self, name, dataset)
self.chart = None self.chart = None
self.dataset = dataset self.dataset = dataset
self.trendline = None self.trendline = None
self.number_of_iris = number_of_iris if number_of_iris % 2 == 0 else 12
self.iris_per_line = 2
self.step = 0
def compute_trendline(self): # TODO: check order of selected indicators def compute_trendline(self): # TODO: check order of selected indicators
print("compute trendline") print("compute trendline")
# for indicator in self.dataset.selected_indicators: # for indicator in self.dataset.selected_indicators:
fig, axs = plt.subplots(6, 2, figsize=(15, 15)) # rows, columns fig, axs = plt.subplots(int(self.number_of_iris/2), self.iris_per_line, figsize=(15, 15)) # rows, columns
i, j, k = 0, 0, 1 # i and j are indices to plot sub-figures and k is the counter to place figures i, j, k = 0, 0, 1 # i and j are indices to plot sub-figures and k is the counter to place figures
for index, row in self.dataset.data.head(12).iterrows(): for index, row in self.dataset.data.head(self.number_of_iris).iterrows():
data = [] data = []
for indicator in self.dataset.selected_indicators: for indicator in self.dataset.selected_indicators:
data.append(row[indicator]) data.append(row[indicator])
max_value = self.dataset.data.head(12)[self.dataset.selected_indicators].values.max() max_value = self.dataset.data.head(self.number_of_iris)[self.dataset.selected_indicators].values.max()
x = np.arange(0, len(data)) x = np.arange(0, len(data))
y = data y = data
f = interp1d(x, y) f = interp1d(x, y)
axs[i, j].axis(ymin=0, ymax=max_value) axs[i, j].axis(ymin=0, ymax=max_value)
axs[i, j].set_xticks(np.arange(0, len(data))) axs[i, j].set_xticks(np.arange(0, len(data)))
axs[i, j].set_xticks(np.arange(0, max_value, step=max_value / 5)) self.step = max_value/5
axs[i, j].set_xticks(np.arange(0, max_value, step=self.step))
axs[i, j].plot(x, data, 'o', x, f(x), '-') axs[i, j].plot(x, data, 'o', x, f(x), '-')
title = str(row['CODE']) + " - " + str(self.dataset.env) title = str(row['CODE']) + " - " + str(self.dataset.env)
axs[i, j].set_title(title) axs[i, j].set_title(title)
if k < 2: if k < self.iris_per_line:
k += 1 k += 1
j += 1 j += 1
else: else:
k = 1 k = 1
i += 1 i += 1
j = 0 j = 0
fig.show() fig.show()
\ No newline at end of file self.compute_similarity()
def compute_similarity(self):
for index1, row1 in self.dataset.data.head(self.number_of_iris).iterrows():
data1 = []
data2 = []
for index2, row2 in self.dataset.data.head(self.number_of_iris).iterrows():
if index1 > index2: # compare charts only once (don't compare 2 and 1 and after 1 and 2)
for i in range(len(self.dataset.selected_indicators)):
indicator = self.dataset.selected_indicators[i]
data1.append((i, row1[indicator]))
data2.append((i, row2[indicator]))
sim_percentage = similarity(data1, data2, self.step, len(self.dataset.selected_indicators))
print(row1['CODE'], "/", row2['CODE'], " ->", sim_percentage)
print(data1)
print(data2)
if __name__ == '__main__':
print(slope(0, 1, 1, 0))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment