From 106ffe4d2f43f7001252c980277e279d289191cf Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Wed, 16 Apr 2025 10:55:38 +0200 Subject: [PATCH] Updated the selection of random processes + updated the extarction of paths and files (there was an issue with the groups) --- src/constant.py | 6 ++++-- src/process.py | 11 ++++++----- src/workflow.py | 39 +++++++++++++++++++++++---------------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/constant.py b/src/constant.py index b46c2b5..400e439 100644 --- a/src/constant.py +++ b/src/constant.py @@ -123,8 +123,10 @@ END_PIPE_OPERATOR = r"\s*(\s*\|\s*\w+)+" # PROCESS #-------------------------- -FILE = r'file +(\w+) *\n|file *\( *(\w+) *\) *\n' -PATH = r'path +(\w+) *\n|path *\( *(\w+) *\) *\n' +FILE1 = r'file +(\w+) *\n' +FILE2 = r'file *\( *(\w+) *\) *\n' +PATH1 = r'path +(\w+) *\n' +PATH2 = r'path *\( *(\w+) *\) *\n' FROM = r'[^\w]from ([^\n]+)\n' INPUT = r"\n\s*input *:" INTO = r'into +([\w, ]+)' diff --git a/src/process.py b/src/process.py index 8d4c181..6beb287 100644 --- a/src/process.py +++ b/src/process.py @@ -289,7 +289,7 @@ class Process(Nextflow_Building_Blocks): placed = False #Case there is a single channel as an input -> doesn't use from to import channel -> uses file (see https://github.com/nextflow-io/nextflow/blob/45ceadbdba90b0b7a42a542a9fc241fb04e3719d/docs/process.rst) - patterns = [constant.FILE, constant.PATH] + patterns = [constant.FILE1, constant.FILE2, constant.PATH1, constant.PATH2] for pattern in patterns: for match in re.finditer(pattern, line+"\n"): #In the first case it's "file ch" in the second "file (ch)" @@ -423,10 +423,11 @@ class Process(Nextflow_Building_Blocks): tab.append(self.outputs[-1]) self.outputs_per_line.append(tab) - pattern = constant.FILE - for match in re.finditer(pattern, code): - add_channel(match.group(1)) - self.outputs_per_line.append([self.outputs[-1]]) + patterns = [constant.FILE1, constant.FILE2] + for pattern in patterns: + for match in re.finditer(pattern, code): + add_channel(match.group(1)) + self.outputs_per_line.append([self.outputs[-1]]) #Function that extracts the inputs from a process (for DSLS workflows) def initialise_outputs_DSL2(self): diff --git a/src/workflow.py b/src/workflow.py index 144cf2f..0d0abe0 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -431,12 +431,13 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #So that means there are the 2 nodes which form the cycle edge in the relevant processes #-> it means we need to regenerated relevant processes searching = True - break - - name_select = [] - for p in sampled: - name_select.append(p.get_alias()) - return name_select + if(not searching): + name_select = [] + for p in sampled: + name_select.append(p.get_alias()) + return name_select + else: + print("here") #This methods generates a random set of processes to consider as relavant @@ -1036,21 +1037,27 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen def get_object(address): address = int(re.findall(r"\dx\w+", address)[0], base=16) return ctypes.cast(address, ctypes.py_object).value - #Check that there are no cycles which will break the creation of the user view: - edges_create_cycles = self.graph.get_edges_that_create_cycle() - edges_create_cycles_objects = [] - for e in edges_create_cycles: - edges_create_cycles_objects.append((get_object(e[0]), get_object(e[1]))) - for e in edges_create_cycles_objects: - n1 = e[0].get_alias() - n2 = e[1].get_alias() - if(n1 in relevant_processes and n2 in relevant_processes): - raise BioFlowInsightError(f"The processes '{n1}' and '{n2}' cannot both be relevant processes since there is a dependency apparant in the workflow between the 2") + ternary_operation_dico = self.ternary_operation_dico map_element_dico = self.map_element_dico if(self.duplicate): + + #Check that there are no cycles which will break the creation of the user view: + edges_create_cycles = self.graph.get_edges_that_create_cycle() + #if(len(edges_create_cycles)>0): + # #TODO -> add link for further details for the user + # raise BioFlowInsightError("An indirect cycle was detected in the workflow. This happens often when a same channel is used multiple times in the workflow. This is a norm that we do not recommend. Try rewritting the workflow in a different way. See [link] for more details", type="Cycle detected") + edges_create_cycles_objects = [] + for e in edges_create_cycles: + edges_create_cycles_objects.append((get_object(e[0]), get_object(e[1]))) + for e in edges_create_cycles_objects: + n1 = e[0].get_alias() + n2 = e[1].get_alias() + if(n1 in relevant_processes and n2 in relevant_processes): + raise BioFlowInsightError(f"The processes '{n1}' and '{n2}' cannot both be relevant processes since there is a dependency apparant in the workflow between the 2") + #First check if there are any duplicate operations #That method is in the "get_order_execution_executors" method -> so we just run that first self.check_multiple_subworkflow() -- GitLab