From 67a4eeb5b2ad4517dd40aa3fe06919b2332cf1eb Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Tue, 15 Apr 2025 10:10:06 +0200 Subject: [PATCH] Added the concordance criteria for the automatic selection of relevant processes --- src/workflow.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/src/workflow.py b/src/workflow.py index 0e156e0..02d0650 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -455,7 +455,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen import numpy as np min_nb_clusters, min_relevant_processes = np.inf, [] already_tried = [] + print('-'*number_of_tries+">") for i in range(number_of_tries): + print('.', end='') random_relevant_processes = self.generate_random_relevant_processes() escape = 0 while(escape<100 and set(random_relevant_processes) in already_tried): @@ -483,7 +485,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen import numpy as np min_uniform_score, min_relevant_processes = np.inf, [] already_tried = [] + print('-'*number_of_tries+">") for i in range(number_of_tries): + print('.', end='') random_relevant_processes = self.generate_random_relevant_processes() escape = 0 while(escape<100 and set(random_relevant_processes) in already_tried): @@ -514,6 +518,48 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen min_uniform_score = score return min_relevant_processes + + #reduction_alpha is the same as above + #reduction_beta is the same as above + def get_relevant_which_minizes_the_number_of_conditions(self, reduction_alpha = 0.2, reduction_beta = 0.8, number_of_tries = 50): + import numpy as np + import copy + min_condition_score, min_relevant_processes = np.inf, [] + already_tried = [] + w_save = copy.deepcopy(self) + number_processes_called = len(self.get_processes_called()) + print('-'*number_of_tries+">") + for i in range(number_of_tries): + print('.', end='') + w = copy.deepcopy(w_save) + random_relevant_processes = w.generate_random_relevant_processes() + escape = 0 + while(escape<100 and set(random_relevant_processes) in already_tried): + escape+=1 + random_relevant_processes = w.generate_random_relevant_processes() + #Cause it means we've already searched the majority of the possibilities + if(escape>=100): + return min_relevant_processes + already_tried.append(set(random_relevant_processes)) + _, cluster_organisation = w.convert_workflow_2_user_view(relevant_processes=random_relevant_processes, render_graphs = False) + + tab_nb_executors_per_cluster, tab_nb_conditions_per_cluster = [], [] + for c in cluster_organisation: + tab_nb_executors_per_cluster.append(cluster_organisation[c]["nb_executors"]) + tab_nb_conditions_per_cluster.append(cluster_organisation[c]["nb_conditions"]) + + score = np.max(tab_nb_conditions_per_cluster) + #score = np.mean(tab_nb_conditions_per_cluster) + #score = np.median(tab_nb_conditions_per_cluster) + #Ratio + #score = np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) + + if(len(cluster_organisation)>=reduction_alpha*number_processes_called and + len(cluster_organisation)<=reduction_beta*number_processes_called and + score<min_condition_score): + min_relevant_processes = random_relevant_processes + min_condition_score = score + return min_relevant_processes #Method that returns the order of execution for each executor def get_order_execution_executors(self): @@ -1076,11 +1122,16 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen calls_in_operations = [] non_relevant_name = 1 - subworkflow_clusters_to_add, subworkflow_cluster_calls_to_add = [], [] + #This is a dico of cluster to info about the number of executors and conditions + clusters_2_organisation = {} + + #subworkflow_clusters_to_add, subworkflow_cluster_calls_to_add = [], [] index_cluster = len(clusters) #We replace the last clusters first -> this is cause the outputs of the last clusters aren't used anywhere else in the workflow by definition for elements in list(reversed(clusters)): + nb_executors = 0 + channels_to_replace_outside_of_cluster = [] #Check that there is at least one process in cluster @@ -1093,11 +1144,12 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen processes_added = [] things_added_in_cluster = [] if(len(elements)>=1 and at_least_one_process): + name, body, take, emit = "", "", "", "" first_element = True for ele in elements: - + nb_executors+=1 if(ele.get_type()=="Process"): #Determine the name of the created subworkflow cluster @@ -1263,6 +1315,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen channels_to_replace_outside_of_cluster.append((old_output_names[i], param_out_name)) #If there was only one single condition in the subworkflow cluster -> then we add it when the call is done if(len(conditions_in_subworkflow)==1): + #TODO -> i think the case "else" -> needs to be removed cause sometimes the the empty channel created may overwrite an existing one subworkfow_call = f"if({conditions_in_subworkflow[0].split('$$__$$')[0]}) {{\n{subworkfow_call_case_true}\n}} else {{\n{subworkfow_call_case_false}\n}}" else: subworkfow_call = subworkfow_call_case_true @@ -1298,6 +1351,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen code = replace_group1(code, pattern, new) #code = code.replace(old, new) + #Since i've added the conditions myself -> i can just count them by searching for this simple pattern + clusters_2_organisation[subworkflow_code] = {"nb_executors":nb_executors, "nb_conditions":subworkflow_code.count("if(")} + #Add the subworkflow defintions #------------------------------------- code = code.replace(f'{subworkflow_section}', f"{subworkflow_code}\n\n{subworkflow_section}") @@ -1332,7 +1388,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen f.write(code) f.close() self.rewrite_and_initialise(code, self.processes_2_remove, render_graphs=render_graphs) - return code + return code, clusters_2_organisation #return code # ##So basically when retriving a thing (process or subworkflow) -- GitLab