Skip to content
Snippets Groups Projects
Commit 6600a0e7 authored by George Marchment's avatar George Marchment
Browse files

Added function to get the most uniform selction of clusters

parent 5948cd65
No related branches found
No related tags found
No related merge requests found
Pipeline #14616 failed with stage
in 2 minutes and 1 second
......@@ -469,11 +469,52 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
self.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False)
clusters = self.graph.get_clusters_from_user_view()
#We want the number of clusters to be at least x% of the size of the workflows
if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and len(clusters)<min_nb_clusters):
if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and
len(clusters)<min_nb_clusters):
min_relevant_processes = random_relevant_processes
min_nb_clusters = len(clusters)
return min_relevant_processes
#reduction_beta is the maximum number of clusters depending on the number of processes given in a percentage
#For example if there are 10 processes and reduction_beta = 0.8 -> we want a maximum of 8 clusters
#In the same idea if reduction_beta = 0.6 -> we want a maximum of 6 clusters
#reduction_alpha is the same as above
def get_relevant_processes_which_uniformizes_cluster_distribution(self, reduction_alpha = 0.2, reduction_beta = 0.8, number_of_tries = 50):
import numpy as np
min_uniform_score, min_relevant_processes = np.inf, []
already_tried = []
for i in range(number_of_tries):
random_relevant_processes = self.generate_random_relevant_processes()
escape = 0
while(escape<100 and set(random_relevant_processes) in already_tried):
escape+=1
random_relevant_processes = self.generate_random_relevant_processes()
#Cause it means we've already searched the majority of the possibilities
if(escape>=100):
return min_relevant_processes
already_tried.append(set(random_relevant_processes))
#Get the clusters and the code
self.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False)
clusters = self.graph.get_clusters_from_user_view()
clusters_2_size = []
for c in clusters:
nb_processes = 0
for ele in c:
if(ele.get_type()=="Process"):
nb_processes+=1
clusters_2_size.append(nb_processes)
score = 0
average = np.mean(clusters_2_size)
for x in clusters_2_size:
score += ((average-x)/average)**2
if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and
len(clusters)<=reduction_beta*len(self.get_processes_called()) and
score<min_uniform_score):
min_relevant_processes = random_relevant_processes
min_uniform_score = score
return min_relevant_processes
#Method that returns the order of execution for each executor
def get_order_execution_executors(self):
dico = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment