diff --git a/src/graph.py b/src/graph.py index e0e5dc58f0801f96967db05001c26b96cdd01766..2fd13eca88c66faaf91fc7066b7b3a0847a956a0 100644 --- a/src/graph.py +++ b/src/graph.py @@ -471,6 +471,27 @@ class Graph(): return False else: return False + + #This method returns a list of processes + def get_edges_that_create_cycle(self): + links_flattened = initia_link_dico_rec(get_flatten_dico(self.get_process_dependency_graph())) + not_source_2_sink = [] + node_2_sink = [] + for node in links_flattened: + if(links_flattened[node]==[]): + node_2_sink.append(node) + else: + not_source_2_sink+=links_flattened[node] + not_source_2_sink = set(not_source_2_sink) + source_2_node = list(set(links_flattened.keys()).difference(not_source_2_sink)) + links_flattened_source_sink = links_flattened.copy() + links_flattened_source_sink["source"], links_flattened_source_sink["sink"] = source_2_node, [] + for node in node_2_sink: + links_flattened_source_sink[node].append("sink") + + #The simple loops are included in this + _, edges_create_cycles = get_number_cycles(links_flattened_source_sink) + return edges_create_cycles #============================ #METADATA FROM GRAPH diff --git a/src/process.py b/src/process.py index 14c7b635518f5bd660707708d350061bc15d34a7..75cbb1f7de71f1746aa73c82eca1784ebb452df4 100644 --- a/src/process.py +++ b/src/process.py @@ -105,6 +105,11 @@ class Process(Nextflow_Building_Blocks): def get_script_code(self): + code = " "+self.script_code+" " + if(self.script_code.count('"""')==2): + return self.script_code.split('"""')[1] + if(self.script_code.count("'''")==2): + return self.script_code.split("'''")[1] return self.script_code def get_name(self): @@ -446,8 +451,9 @@ class Process(Nextflow_Building_Blocks): def initialise_inputs_outputs(self): DSL = self.nextflow_file.get_DSL() if(DSL=="DSL1"): - self.initialise_inputs_DSL1() - self.initialise_outputs_DSL1() + if(self.origin!=None): + self.initialise_inputs_DSL1() + self.initialise_outputs_DSL1() elif(DSL=="DSL2"): self.initialise_inputs_DSL2() self.initialise_outputs_DSL2() diff --git a/src/subworkflow.py b/src/subworkflow.py index 8c4c0576fa392bc3086218f77ccf51025c99a2db..d9c8ff016b55dfcd82538479b9aca39eaf4814ee 100644 --- a/src/subworkflow.py +++ b/src/subworkflow.py @@ -103,13 +103,33 @@ class Subworkflow(Main): for o in self.emit: code = code.replace(o.get_code(get_OG = True), o.simplify_code(return_tab = False), 1) - #Renaming the takes in the subworkflow + #Renaming the takes in the subworkflow (only body) + code_up_to_emit, code_after_emit = code, "" + for match in re.finditer(constant.EMIT_SUBWORKFLOW, code): + start, _ = match.span(0) + code_up_to_emit = code[:start] + code_after_emit = code[start:] name = self.get_alias() for t in self.take: if(len(t.get_gives())!=1): raise Exception("This shoudn't happen") ch = t.get_gives()[0] - code = replace_group1(code, fr"[^\w]({re.escape(ch.get_code())})[^\w]", f"{ch.get_code()}_{name}") + code_up_to_emit = replace_group1(code_up_to_emit, fr"[^\w]({re.escape(ch.get_code())})[^\w]", f"{ch.get_code()}_{name}") + + #Renaming the takes in the emits -> if the takes are given as emits + for e in self.emit: + channels_take = [] + for t in self.take: + channels_take.append(t.get_gives()[0]) + re_write_channel = False + for o in e.origins: + if(o in channels_take): + re_write_channel = True + if(re_write_channel): + ch = e.origins[0] + code_after_emit = replace_group1(code_after_emit, fr"[^\w]({re.escape(ch.get_code())})[^\w]", f"{ch.get_code()}_{name}") + + code = code_up_to_emit+code_after_emit return code diff --git a/src/workflow.py b/src/workflow.py index 10c4d0cd21ca3d14d3ab7dcdf22bb0b4359e02e7..7e496aff14ab9f6942bd8637319ffcbb499fe4e2 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -379,6 +379,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #This methods generates a random set of processes to consider as relavant #It's not a uniform random it's a bit of a gaussian, centered at 0.5 def generate_random_relevant_processes(self, alpha = -1): + edges_create_cycles = self.graph.get_edges_that_create_cycle() import random #Random value between 0 and 1, centered at 0.5 @@ -394,13 +395,6 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return val if(self.duplicate): - if(alpha == -1): - alpha = get_value() - else: - if(0<=alpha and alpha<=1): - None - else: - raise BioFlowInsightError("alpha is not in the interval [0; 1]") processes_called = [] if(self.get_DSL()=="DSL2"): @@ -410,11 +404,33 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen processes_called.append(p) else: processes_called = self.get_first_file().get_processes() - nb_2_select = int(alpha*len(processes_called)) - sampled = random.sample(set(processes_called), nb_2_select) - name_select = [] - for p in sampled: - name_select.append(p.get_alias()) + + searching = True + while(searching): + searching = False + if(alpha == -1): + alpha = get_value() + else: + if(0<=alpha and alpha<=1): + None + else: + raise BioFlowInsightError("alpha is not in the interval [0; 1]") + nb_2_select = int(alpha*len(processes_called)) + sampled = random.sample(set(processes_called), nb_2_select) + + sampled_str = [] + for s in sampled: + sampled_str.append(str(s)) + for e in edges_create_cycles: + if(e[0] in sampled_str and e[1] in sampled_str): + #So that means there are the 2 nodes which form the cycle edge in the relevant processes + #-> it means we need to regenerated relevant processes + searching = True + break + + name_select = [] + for p in sampled: + name_select.append(p.get_alias()) return name_select else: raise BioFlowInsightError("Trying to generate random relevant processes however option 'duplicate' is not activated.") @@ -753,6 +769,21 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen def convert_workflow_2_user_view(self, relevant_processes = [], render_graphs = True): self.iniatilise_tab_processes_2_remove() self.graph.initialise(processes_2_remove = self.processes_2_remove) + + def get_object(address): + address = int(re.findall(r"\dx\w+", address)[0], base=16) + return ctypes.cast(address, ctypes.py_object).value + #Check that there are no cycles which will break the creation of the user view: + edges_create_cycles = self.graph.get_edges_that_create_cycle() + edges_create_cycles_objects = [] + for e in edges_create_cycles: + edges_create_cycles_objects.append((get_object(e[0]), get_object(e[1]))) + for e in edges_create_cycles_objects: + n1 = e[0].get_alias() + n2 = e[1].get_alias() + if(n1 in relevant_processes and n2 in relevant_processes): + raise BioFlowInsightError(f"The processes '{n1}' and '{n2}' cannot both be relevant processes since there is a dependency apparant in the workflow between the 2") + ternary_operation_dico = self.ternary_operation_dico map_element_dico = self.map_element_dico @@ -763,6 +794,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen if(self.get_DSL()=="DSL2"): code = self.simplify_workflow_code() self.rewrite_and_initialise(code, self.processes_2_remove, render_graphs=render_graphs) + if(self.duplicate):