Skip to content
Snippets Groups Projects
Commit 67a4eeb5 authored by George Marchment's avatar George Marchment
Browse files

Added the concordance criteria for the automatic selection of relevant processes

parent 6600a0e7
No related branches found
No related tags found
No related merge requests found
Pipeline #14617 failed with stage
in 2 minutes and 9 seconds
......@@ -455,7 +455,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
import numpy as np
min_nb_clusters, min_relevant_processes = np.inf, []
already_tried = []
print('-'*number_of_tries+">")
for i in range(number_of_tries):
print('.', end='')
random_relevant_processes = self.generate_random_relevant_processes()
escape = 0
while(escape<100 and set(random_relevant_processes) in already_tried):
......@@ -483,7 +485,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
import numpy as np
min_uniform_score, min_relevant_processes = np.inf, []
already_tried = []
print('-'*number_of_tries+">")
for i in range(number_of_tries):
print('.', end='')
random_relevant_processes = self.generate_random_relevant_processes()
escape = 0
while(escape<100 and set(random_relevant_processes) in already_tried):
......@@ -514,6 +518,48 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
min_uniform_score = score
return min_relevant_processes
#reduction_alpha is the same as above
#reduction_beta is the same as above
def get_relevant_which_minizes_the_number_of_conditions(self, reduction_alpha = 0.2, reduction_beta = 0.8, number_of_tries = 50):
import numpy as np
import copy
min_condition_score, min_relevant_processes = np.inf, []
already_tried = []
w_save = copy.deepcopy(self)
number_processes_called = len(self.get_processes_called())
print('-'*number_of_tries+">")
for i in range(number_of_tries):
print('.', end='')
w = copy.deepcopy(w_save)
random_relevant_processes = w.generate_random_relevant_processes()
escape = 0
while(escape<100 and set(random_relevant_processes) in already_tried):
escape+=1
random_relevant_processes = w.generate_random_relevant_processes()
#Cause it means we've already searched the majority of the possibilities
if(escape>=100):
return min_relevant_processes
already_tried.append(set(random_relevant_processes))
_, cluster_organisation = w.convert_workflow_2_user_view(relevant_processes=random_relevant_processes, render_graphs = False)
tab_nb_executors_per_cluster, tab_nb_conditions_per_cluster = [], []
for c in cluster_organisation:
tab_nb_executors_per_cluster.append(cluster_organisation[c]["nb_executors"])
tab_nb_conditions_per_cluster.append(cluster_organisation[c]["nb_conditions"])
score = np.max(tab_nb_conditions_per_cluster)
#score = np.mean(tab_nb_conditions_per_cluster)
#score = np.median(tab_nb_conditions_per_cluster)
#Ratio
#score = np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster))
if(len(cluster_organisation)>=reduction_alpha*number_processes_called and
len(cluster_organisation)<=reduction_beta*number_processes_called and
score<min_condition_score):
min_relevant_processes = random_relevant_processes
min_condition_score = score
return min_relevant_processes
#Method that returns the order of execution for each executor
def get_order_execution_executors(self):
......@@ -1076,11 +1122,16 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
calls_in_operations = []
non_relevant_name = 1
subworkflow_clusters_to_add, subworkflow_cluster_calls_to_add = [], []
#This is a dico of cluster to info about the number of executors and conditions
clusters_2_organisation = {}
#subworkflow_clusters_to_add, subworkflow_cluster_calls_to_add = [], []
index_cluster = len(clusters)
#We replace the last clusters first -> this is cause the outputs of the last clusters aren't used anywhere else in the workflow by definition
for elements in list(reversed(clusters)):
nb_executors = 0
channels_to_replace_outside_of_cluster = []
#Check that there is at least one process in cluster
......@@ -1093,11 +1144,12 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
processes_added = []
things_added_in_cluster = []
if(len(elements)>=1 and at_least_one_process):
name, body, take, emit = "", "", "", ""
first_element = True
for ele in elements:
nb_executors+=1
if(ele.get_type()=="Process"):
#Determine the name of the created subworkflow cluster
......@@ -1263,6 +1315,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
channels_to_replace_outside_of_cluster.append((old_output_names[i], param_out_name))
#If there was only one single condition in the subworkflow cluster -> then we add it when the call is done
if(len(conditions_in_subworkflow)==1):
#TODO -> i think the case "else" -> needs to be removed cause sometimes the the empty channel created may overwrite an existing one
subworkfow_call = f"if({conditions_in_subworkflow[0].split('$$__$$')[0]}) {{\n{subworkfow_call_case_true}\n}} else {{\n{subworkfow_call_case_false}\n}}"
else:
subworkfow_call = subworkfow_call_case_true
......@@ -1298,6 +1351,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
code = replace_group1(code, pattern, new)
#code = code.replace(old, new)
#Since i've added the conditions myself -> i can just count them by searching for this simple pattern
clusters_2_organisation[subworkflow_code] = {"nb_executors":nb_executors, "nb_conditions":subworkflow_code.count("if(")}
#Add the subworkflow defintions
#-------------------------------------
code = code.replace(f'{subworkflow_section}', f"{subworkflow_code}\n\n{subworkflow_section}")
......@@ -1332,7 +1388,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
f.write(code)
f.close()
self.rewrite_and_initialise(code, self.processes_2_remove, render_graphs=render_graphs)
return code
return code, clusters_2_organisation
#return code
#
##So basically when retriving a thing (process or subworkflow)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment