From 19346fe40ac630985e5d00ca2b8e742d572ecb69 Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Tue, 11 Mar 2025 13:44:29 +0100 Subject: [PATCH] Made the rewrite determinastic -> debugged -> by removing the zoom opti + removed the artificial nodes from the calculation of the user view --- src/graph.py | 3 +- src/nextflow_file.py | 27 +++++++------- src/operation.py | 2 +- src/outils_graph.py | 86 +++++++++++++++++++++++++++++++++++--------- src/process.py | 2 +- src/workflow.py | 20 +++++++++-- 6 files changed, 106 insertions(+), 34 deletions(-) diff --git a/src/graph.py b/src/graph.py index c04159d..11d9872 100644 --- a/src/graph.py +++ b/src/graph.py @@ -143,6 +143,7 @@ class Graph(): def get_specification_graph(self, dirc = 'graphs', filename = "specification_graph", render_graphs = True): generate_graph(self.get_output_dir()/ dirc /filename, self.full_dico, render_graphs = render_graphs) + generate_graph(self.get_output_dir()/ dirc /(filename+"_without_artificial_nodes"), remove_artificial_nodes(self.full_dico), render_graphs = render_graphs) def get_specification_graph_wo_labels(self, filename = "specification_graph_wo_labels", render_graphs = True): generate_graph(self.get_output_dir()/'graphs'/filename, self.full_dico, label_edge=False, label_node=False, render_graphs = render_graphs) @@ -310,7 +311,7 @@ class Graph(): #For now i'm only gonna work from the flattened dico #self.initialise_flattened_dico(self.dico_process_dependency_graph) self.initialise_flattened_dico(self.full_dico) - dico = self.dico_flattened + dico = remove_artificial_nodes(self.dico_flattened) user_view, self.new_nodes_user_view = relev_user_view_builder(dico, relevant_modules=relevant_processes) diff --git a/src/nextflow_file.py b/src/nextflow_file.py index c19e022..b7a523f 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -160,13 +160,14 @@ class Nextflow_File(Nextflow_Building_Blocks): #---------------------- #This method extracts the "main" workflow from the file def extract_main(self): - from .main import Main - #This returns the code without the comments - code = "\n"+self.get_code()+"\n" - #Find pattern - twice = False - for match in re.finditer(constant.WORKFLOW_HEADER_2, code): - if(self.first_file): + if(self.first_file): + from .main import Main + #This returns the code without the comments + code = "\n"+self.get_code()+"\n" + #Find pattern + twice = False + for match in re.finditer(constant.WORKFLOW_HEADER_2, code): + start = match.span(1)[0] end = extract_curly(code, match.span(1)[1])#This function is defined in the functions file self.main = Main(code= code[start:end], nextflow_file=self) @@ -174,9 +175,9 @@ class Nextflow_File(Nextflow_Building_Blocks): #TODO turn into biofow insight error raise Exception(f"Found multiple 'main workflows' in {self.get_file_address()}") twice = True - else: - #TODO add num - BioFlowInsightError("A 'main' workflow was found in the Nextflow file") + if(self.main==None): + raise BioFlowInsightError("A 'main' workflow was not found in the Nextflow file") + #---------------------- #FUNCTIONS @@ -194,8 +195,10 @@ class Nextflow_File(Nextflow_Building_Blocks): start = match.span(0)[0] end = extract_curly(code, match.span(0)[1])#This function is defined in the functions file #f = Code(code=code[start:end], origin=self) - f = Function(code = code[start:end], name = match.group(2), origin =self) - self.functions.append(f) + #Fobiden names of functions + if(match.group(2) not in ['if']): + f = Function(code = code[start:end], name = match.group(2), origin =self) + self.functions.append(f) def get_functions(self): return self.functions diff --git a/src/operation.py b/src/operation.py index 1a88d6e..5e838ab 100644 --- a/src/operation.py +++ b/src/operation.py @@ -817,7 +817,7 @@ class Operation(Executor): fillcolor = "" - dico['nodes'].append({'id':str(self), 'name':"", "shape":"point", 'xlabel': code, 'fillcolor':fillcolor}) + dico['nodes'].append({'id':str(self), 'name':"", "shape":"point", 'xlabel': code, 'fillcolor':fillcolor, "artificial": self.get_artificial_status()}) for o in self.origins: #Case origins is a channel diff --git a/src/outils_graph.py b/src/outils_graph.py index 17f24dc..0283822 100644 --- a/src/outils_graph.py +++ b/src/outils_graph.py @@ -596,14 +596,14 @@ def nr_path_pred(r, n, dico, R): #Added a dico so it knows what it's already searched dico_rSucc = {} def rSucc(n, dico, R, outputs): - try: - tab = dico_rSucc[n] - except: - tab = [] - for r in set(R).union(set(outputs)): - if(nr_path_succ(n, r, dico, R+list(outputs))): - tab.append(r) - dico_rSucc[n] = tab + #try: + # tab = dico_rSucc[n] + #except: + tab = [] + for r in set(R).union(set(outputs)): + if(nr_path_succ(n, r, dico, R+list(outputs))): + tab.append(r) + dico_rSucc[n] = tab return tab def rSuccM(M, dico, R, outputs): @@ -615,14 +615,14 @@ def rSuccM(M, dico, R, outputs): #Added a dico so it knows what it's already searched dico_rPred = {} def rPred(n, dico, R, inputs): - try: - tab = dico_rPred[n] - except: - tab = [] - for r in set(R).union(set(inputs)): - if(nr_path_pred(r, n, dico, R+list(inputs))): - tab.append(r) - dico_rPred[n] = tab + #try: + # tab = dico_rPred[n] + #except: + tab = [] + for r in set(R).union(set(inputs)): + if(nr_path_pred(r, n, dico, R+list(inputs))): + tab.append(r) + dico_rPred[n] = tab return tab def rPredM(M, dico, R, inputs): @@ -1082,4 +1082,58 @@ def check_if_equal(dicoA, dicoB): #TO do that we rewrite the structure using a commun language (without using the ids) -> then just check if the translated structures are the same return translate_dico(dicoA) ==translate_dico(dicoB) +#This function removes the artificial nodes from the dico +#MAtching the nodes together between the artificial nodes +def remove_artificial_nodes(param_dico): + dico = copy.copy(param_dico) + def get_list_artificial_nodes_rec(dico, tab): + for n in dico["nodes"]: + if(n["artificial"]): + tab+=[n["id"]] + for sub in dico["subworkflows"]: + tab = get_list_artificial_nodes_rec(dico["subworkflows"][sub], tab) + return tab + def get_list_artificial_nodes(dico): + tab = [] + return get_list_artificial_nodes_rec(dico, tab) + + def remove_node_rec(dico, node_id): + to_remove = [] + for n in dico["nodes"]: + if(n["id"]==node_id): + to_remove.append(n) + for n in to_remove: + dico["nodes"].remove(n) + for sub in dico["subworkflows"]: + remove_node_rec(dico["subworkflows"][sub], node_id) + def remove_node(dico, node_id): + remove_node_rec(dico, node_id) + + def get_links_and_remove_edges_rec(dico, node_id, links): + to_remove = [] + for e in dico["edges"]: + if(e["A"]==node_id): + links["sink"].append({"sink": e["B"], "label": e["label"]}) + to_remove.append(e) + if(e["B"]==node_id): + links["source"].append({"source": e["A"], "label": e["label"]}) + to_remove.append(e) + for e in to_remove: + dico["edges"].remove(e) + for sub in dico["subworkflows"]: + get_links_and_remove_edges_rec(dico["subworkflows"][sub], node_id, links) + def get_links_and_remove_edges(dico, node_id, links): + get_links_and_remove_edges_rec(dico, node_id, links) + + for n_id in get_list_artificial_nodes(dico): + remove_node(dico, n_id) + links = {} + links["source"] = [] + links["sink"] = [] + get_links_and_remove_edges(dico, n_id, links) + for source in links["source"]: + for sink in links["sink"]: + edge = {"A" : source['source'], "B" : sink['sink'], "label" : source['label']} + dico["edges"].append(edge) + return dico diff --git a/src/process.py b/src/process.py index 85c480e..b4cef1a 100644 --- a/src/process.py +++ b/src/process.py @@ -416,7 +416,7 @@ class Process(Nextflow_Building_Blocks): return self.printed_name def get_structure(self, dico): - dico['nodes'].append({'id':str(self), 'name':self.get_name_to_print(), "shape":"ellipse", 'xlabel':"", 'fillcolor':''}) + dico['nodes'].append({'id':str(self), 'name':self.get_name_to_print(), "shape":"ellipse", 'xlabel':"", 'fillcolor':'', "artificial": False}) def initialise_inputs_outputs(self): DSL = self.nextflow_file.get_DSL() diff --git a/src/workflow.py b/src/workflow.py index be488f2..efaab88 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -765,6 +765,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen relevant_processes = self.check_relevant_processes_in_workflow(relevant_processes) self.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = [], render_graphs=render_graphs) clusters = self.graph.get_clusters_from_user_view() + print(len(clusters)) broken_subworkflows = get_workflows_broken(get_subworkflow_2_executors(), get_clusters_with_calls(clusters)) #While there still are broken workflows -> need to redo the analysis while(len(broken_subworkflows)>0): @@ -794,6 +795,14 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #Get the topological order executors_in_order = self.get_order_execution_executors() new_clusters = [] + #for clust in clusters: + # print("*") + # for c in clust: + # if(c.get_type()=="Process"): + # print(c, c.get_code()[:20]) + # else: + # print(c, c.get_code()[:20], c.artificial) + for cluster in clusters: tab = [] for e in executors_in_order: @@ -802,9 +811,14 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen new_clusters.append(tab) clusters = new_clusters #clusters = self.graph.get_topogical_order(clusters) - #for e in executors_in_order: - # print(e.get_code()[:20]) - #print(executors_in_order) + #print('_________________') + #for clust in clusters: + # print("*") + # for c in clust: + # if(c.get_type()=="Process"): + # print(c, c.get_code()[:20]) + # else: + # print(c, c.get_code()[:20], c.artificial) #Creating the subworkflows from clusters -- GitLab