From 30ccf5a8e6d35079dd1b7695d6c936d72c7a0998 Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Tue, 6 May 2025 16:21:53 +0200 Subject: [PATCH] Added small update --- src/code_.py | 2 +- src/constant.py | 2 +- src/main.py | 2 +- src/outils_graph.py | 2 +- src/workflow.py | 24 ++++++++++++++---------- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/code_.py b/src/code_.py index e9eb67b..f8f4bb7 100644 --- a/src/code_.py +++ b/src/code_.py @@ -157,7 +157,7 @@ class Code: while(searching and timeout<constant.WHILE_UPPER_BOUND): searching = False #TODO -> do the same with flatMap -> 668 - for word in ["map", "flatMap", "view"]: + for word in ["map", "flatMap", "view", "ifEmpty"]: for end_char in ['{', '\(']: pattern = fr"(\.|\|)\s*"+word+r"\s*"+end_char for match in re.finditer(pattern, code): diff --git a/src/constant.py b/src/constant.py index a5cd310..c88a4e3 100644 --- a/src/constant.py +++ b/src/constant.py @@ -21,7 +21,7 @@ LIST_OPERATORS = ["distinct", "filter", "first", "last", "randomSample", "take", "branch","choice","multiMap","into","separate","tap", "count","countBy","min","max","sum","toInteger", "close","dump","ifEmpty","print","println","set","view", - "map_modified","reduce_modified", "flatMap_modified", "view_modified", #The articifiel operations + "map_modified","reduce_modified", "flatMap_modified", "view_modified", "ifEmpty_modified", #The articifiel operations "empty", "of", "fromPath", "fromList", "subscribe", "value", "from"]#This last line is added by me:) TOOLS = [ diff --git a/src/main.py b/src/main.py index 5bfec99..b6514af 100644 --- a/src/main.py +++ b/src/main.py @@ -64,7 +64,7 @@ class Main(Nextflow_Building_Blocks): if(new!=old): temp = code code = code.replace(old, new, 1) - if(temp==code): + if(temp==code and old.split()!=new.split()): print(exe) print(code) print("- old", f'"{old}"') diff --git a/src/outils_graph.py b/src/outils_graph.py index 303d088..4473d51 100644 --- a/src/outils_graph.py +++ b/src/outils_graph.py @@ -1002,7 +1002,7 @@ def relev_user_view_builder(dico_param, relevant_modules, alias_2_tools): new_nodes[i].sort() new_name = get_name_new_node(get_names_tab(dico, new_nodes[i]), relevant_modules, tag, alias_2_tools) name_printed = new_name.split(tag)[0] - shape = "ellipse" + shape = "rectangle" if(name_printed==""): shape = "point" node = {"id": '_$$_'.join(new_nodes[i]).replace('<', '').replace('>', ''), diff --git a/src/workflow.py b/src/workflow.py index 4ca2a7c..bce20cb 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -572,16 +572,18 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen import copy min_score, min_processes = np.inf, [] already_tried = [] - working_workflow = copy.deepcopy(self) + #working_workflow = copy.deepcopy(self) - processes_called = working_workflow.get_processes_called() + processes_called = self.get_processes_called() number_processes_called = len(processes_called) all_process_as_relevant = [] for p in processes_called: all_process_as_relevant.append(p.get_alias()) all_process_as_relevant = list(set(all_process_as_relevant)) - working_workflow.rewrite_workflow_remove_subworkflows(relevant_processes = all_process_as_relevant, render_graphs = False) - w_save = copy.deepcopy(working_workflow) + #working_workflow.rewrite_workflow_remove_subworkflows(relevant_processes = all_process_as_relevant, render_graphs = False) + #w_save = copy.deepcopy(working_workflow) + w_save = copy.deepcopy(self) + w = copy.deepcopy(self) scripts_2_tools = {} print("Extracting the tools from the processes") print('-'*len(processes_called)+">") @@ -597,7 +599,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen for i in range(number_of_tries): print('.', end='') #print(i/number_of_tries*100) - w = copy.deepcopy(w_save) + #w = copy.deepcopy(w_save) if(process_pre_selection == "bioinfo"): random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools(scripts_2_tools = scripts_2_tools) @@ -644,7 +646,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return nb_conditions_in_clusters - def get_score_from_set_relevant_processes(w_save, random_relevant_processes): + def get_score_from_set_relevant_processes(w, random_relevant_processes): #w = copy.deepcopy(w_save) #_, cluster_organisation = w.convert_workflow_2_user_view(relevant_processes=random_relevant_processes, render_graphs = False) #print(random_relevant_processes) @@ -655,7 +657,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen # tab_nb_processes_per_cluster_1.append(cluster_organisation[c]["nb_processes"]) # tab_nb_conditions_per_cluster_1.append(cluster_organisation[c]["nb_conditions"]) - w = copy.deepcopy(w_save) + #w = copy.deepcopy(w_save) w.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False, use_process_dependency_graph = False) clusters = w.graph.get_clusters_from_user_view() cluster_with_processes = [] @@ -671,7 +673,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen for cluster in cluster_with_processes: tab_nb_executors_per_cluster.append(len(cluster)) - print(np.array(tab_nb_executors_per_cluster).sum()) + #Number condtions per cluster tab_nb_conditions_per_cluster = get_nb_conditions_in_clusters(cluster_with_processes) #Number of processes per cluster @@ -700,10 +702,12 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen average_number_of_process_per_cluster = np.mean(tab_nb_processes_per_cluster) for x in tab_nb_processes_per_cluster: uniformity_variance += (average_number_of_process_per_cluster-x)**2/nb_clusters - dico_results = {"min_nb_clusters":(nb_clusters / number_processes_called), "min_nb_non_relevant_cluster":(nb_non_relevant_clusters / nb_clusters), "uniformity":(uniformity_variance / number_processes_called), "concordance":np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) } + + min_nb_clusters_value = (nb_clusters / number_processes_called)**2 + dico_results = {"min_nb_clusters":min_nb_clusters_value, "min_nb_non_relevant_cluster":(nb_non_relevant_clusters / nb_clusters), "uniformity":(uniformity_variance / number_processes_called), "concordance":np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) } score = concordance_factor * np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) + \ uniformity_factor * (uniformity_variance / number_processes_called) + \ - min_nb_clusters_factor * (nb_clusters / number_processes_called) + \ + min_nb_clusters_factor * min_nb_clusters_value + \ min_nb_non_relevant_cluster_factor * (nb_non_relevant_clusters / nb_clusters) return score, cluster_with_processes, dico_results -- GitLab