diff --git a/src/workflow.py b/src/workflow.py index 96c7f6d0f21695c72d03b9ca9da577eb947e38bf..49486497e2f4e33bd980f0441c05c5957aebd748 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -448,6 +448,32 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen else: raise BioFlowInsightError("Trying to generate random relevant processes however option 'duplicate' is not activated.") + #The reduction alpha is the minimun number cluster depending on the percentage ofprocesses + #For example if there are 10 processes and reduction_alpha = 0.2 -> we want at least 2 clusters + #In the same idea if reduction_alpha = 0.4 -> we want at least 4 clusters + def get_relevant_processes_which_minimize_nb_of_clusters(self, reduction_alpha = 0.2, number_of_tries = 50): + import numpy as np + min_nb_clusters, min_relevant_processes = np.inf, [] + already_tried = [] + for i in range(number_of_tries): + random_relevant_processes = self.generate_random_relevant_processes() + escape = 0 + while(escape<100 and set(random_relevant_processes) in already_tried): + escape+=1 + random_relevant_processes = self.generate_random_relevant_processes() + #Cause it means we've already searched the majority of the possibilities + if(escape>=100): + return min_relevant_processes + already_tried.append(set(random_relevant_processes)) + #Get the clusters and the code + self.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False) + clusters = self.graph.get_clusters_from_user_view() + #We want the number of clusters to be at least x% of the size of the workflows + if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and len(clusters)<min_nb_clusters): + min_relevant_processes = random_relevant_processes + min_nb_clusters = len(clusters) + return min_relevant_processes + #Method that returns the order of execution for each executor def get_order_execution_executors(self): dico = {}