diff --git a/src/constant.py b/src/constant.py index ba73cea71e1ff1280046cb4d8bc33b330c84b4ad..76ee6a62047ed825fa3618ff8728fd4a8ed29ef3 100644 --- a/src/constant.py +++ b/src/constant.py @@ -49,7 +49,7 @@ TOOLS = [ # CALLS #-------------------------- -BEGINNING_CALL = r"(\w+)\s*\(" +BEGINNING_CALL = r"\s(\w+)\s*\(" CALL_ID = r"Call_\d+" END_CALL = r'\s*\(' diff --git a/src/nextflow_file.py b/src/nextflow_file.py index 0be98fa758bf66190ce5297d26ce52bd056f55d7..b78af3bb02c4bfb4ed7c3bc72855c4ad188bef7d 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -34,7 +34,7 @@ class Nextflow_File(Nextflow_Building_Blocks): self.initialised = False contents = check_file_exists(self.get_file_address(), self) Nextflow_Building_Blocks.__init__(self, contents) - + self.check_file_correctness() #---------------------- #GENERAL @@ -56,6 +56,21 @@ class Nextflow_File(Nextflow_Building_Blocks): def get_DSL(self): return self.workflow.get_DSL() + def check_file_correctness(self): + code = self.get_code() + if(code.count("{")!=code.count("}")): + curly_count = get_curly_count(code) + if(curly_count!=0): + raise BioFlowInsightError(f"Not the same number of opening and closing curlies '{'{}'}' in the file.", num = 16,origin=self) + if(code.count("(")!=code.count(")")): + parenthese_count = get_parenthese_count(code) + if(parenthese_count!=0): + raise BioFlowInsightError(f"Not the same number of opening and closing parentheses '()' in the file.", num = 16, origin=self) + + if(code.count('"""')%2!=0): + raise BioFlowInsightError(f"An odd number of '\"\"\"' was found in the code.", num = 16, origin=self) + + #Method which returns the DSL of the workflow -> by default it's DSL2 #I use the presence of include, subworkflows and into/from in processes as a proxy diff --git a/src/operation.py b/src/operation.py index 77356940cf8dd9cc96de1c19a9d05b1c654ef989..3b2254a06072eb1e234c46106a309a5b0acec430 100644 --- a/src/operation.py +++ b/src/operation.py @@ -706,14 +706,16 @@ class Operation(Executor): searching = True while(searching): searching= False - text = self.get_code(clean_pipe = clean_pipe) + text = " "+self.get_code(clean_pipe = clean_pipe) for c in self.calls: text = text.replace(self.calls[c].get_code(), "") for match in re.finditer(pattern_call, text): if(match.group(1) in to_call): - searching=True start, end = match.span(0) + #We do this cause the first caracter is a " " + start+=1 + searching=True call = Call(code =get_end_call(text, start, end), origin =self) call.initialise() self.calls[str(call)] = call diff --git a/src/root.py b/src/root.py index fd4c755b27103cdef355d4d5164b09ff0513c7ad..23edb5198a24679cb572da4f4dfea5d295f953c1 100644 --- a/src/root.py +++ b/src/root.py @@ -227,7 +227,7 @@ class Root(Nextflow_Building_Blocks): code = self.get_code() #For each block -> remove its code for b in self.blocks: - code = code.replace(b.get_code(), "") + code = code.replace(b.get_code(), "", 1) for match in re.finditer(r"\<src\.process\.Process object at \w+\>", code): for process in self.modules_defined: @@ -240,13 +240,14 @@ class Root(Nextflow_Building_Blocks): #Define the blocks code = self.get_code() conditions = extract_conditions(code) + #TODO -> normally it is not a problem -> cause i've removed the recursive option #But just check that the bodies don't appear twice in the dico #For each condition -> create a block for c in conditions: from .block import Block - body = code[conditions[c][0]:conditions[c][1]] + body = code[conditions[c][0]:conditions[c][1]].strip() c = c.split("$$__$$")[0] import copy block = Block(code=body, origin=self, condition=c, modules_defined=self.modules_defined, existing_channels = copy.copy(self.channels)) @@ -258,22 +259,26 @@ class Root(Nextflow_Building_Blocks): #Case DSL1 -> need to extract the processes which have been defined but rplaced in the code self.extract_defined_processes() + #This is to get the order of execution code = self.get_code() position_2_thing_2_analyse = {} for block in self.blocks: - block_code = block.get_code() - found = False - while(not found or len(block_code)==0): - pos = code.find(block_code) - if(pos!=-1): - position_2_thing_2_analyse[pos] = block - code = code.replace(block_code, "a"*len(block_code), 1) - found = True - else: - block_code = block_code[:-1] - if(not found): - raise Exception("This shouldn't happen") + block_code = block.get_code().strip() + if(block_code!=""): + found = False + while(not found): + if(len(block_code)<=0): + break + pos = code.find(block_code) + if(pos!=-1): + position_2_thing_2_analyse[pos] = block + code = code.replace(block_code, "a"*len(block_code), 1) + found = True + else: + block_code = block_code[:-1] + if(not found): + raise Exception("This shouldn't happen") for process in self.defined_processes: found = False @@ -284,11 +289,12 @@ class Root(Nextflow_Building_Blocks): if(not found): raise Exception("This shouldn't happen") - for e in self.executors: e_code = e.get_code() found = False - while(not found or len(e_code)==0): + while(not found): + if(len(e_code)<=0): + break pos = code.find(e_code) if(pos!=-1): position_2_thing_2_analyse[pos] = e @@ -306,19 +312,6 @@ class Root(Nextflow_Building_Blocks): element.initialise() - #for block in self.blocks: - # #TODO -> this would be the place you put the verification of the conditions - # block.initialise() - # - ##Analyse Executors - #for e in self.executors: - # e.initialise() - - #Initialise each subworkflow being called - #for sub in self.elements_being_called: - # if(sub.get_type()=="Subworkflow"): - # sub.initialise() - def get_process_from_name(self, name): for m in self.modules_defined: if(m.get_type()=="Process" and m.get_alias()==name): @@ -345,7 +338,7 @@ class Root(Nextflow_Building_Blocks): #For each block -> remove its code for b in self.blocks: - code = code.replace(b.get_code(), "") + code = code.replace(b.get_code(), "", 1) things_to_remove = [] #things_to_remove+= self.processes+self.includes+self.subworkflows+self.functions @@ -414,7 +407,7 @@ class Root(Nextflow_Building_Blocks): searching = True while(searching): searching= False - text = code + text = " "+code for e in self.executors: text = text.replace(e.get_code(), "", 1) @@ -422,6 +415,8 @@ class Root(Nextflow_Building_Blocks): if(match.group(1) in to_call): start, end = match.span(0) + #We do this cause the first caracter is a " " + start+=1 txt_call = get_end_call(text, start, end) txt_call = expand_to_pipe_operators(text, txt_call) #If the thing which is extracted is not in the conditon of an if diff --git a/tests/ressources/workflows/wf18/specification_graph.json b/tests/ressources/workflows/wf18/specification_graph.json new file mode 100644 index 0000000000000000000000000000000000000000..7fe9450dccde6c46ff2699a4da0f3c5323eb4a9c --- /dev/null +++ b/tests/ressources/workflows/wf18/specification_graph.json @@ -0,0 +1,139 @@ +{ + "nodes": [ + { + "id": "<src.operation.Operation object at 0x79717dcc3a00>", + "name": "", + "shape": "point", + "xlabel": "a = sub1.out", + "fillcolor": "white" + }, + { + "id": "<src.operation.Operation object at 0x79717db0d3c0>", + "name": "", + "shape": "point", + "xlabel": "b = sub2.out", + "fillcolor": "white" + }, + { + "id": "<src.operation.Operation object at 0x79717db0e2f0>", + "name": "", + "shape": "point", + "xlabel": "b = Channel.empty()", + "fillcolor": "" + } + ], + "edges": [ + { + "A": "<src.operation.Operation object at 0x79717db0d900>", + "B": "<src.operation.Operation object at 0x79717dcc3a00>", + "label": "sub1.out" + }, + { + "A": "<src.operation.Operation object at 0x79717db0dc30>", + "B": "<src.operation.Operation object at 0x79717db0d3c0>", + "label": "sub2.out" + } + ], + "subworkflows": { + "sub1_0": { + "nodes": [ + { + "id": "<src.process.Process object at 0x79717db0d0c0>", + "name": "M1", + "shape": "ellipse", + "xlabel": "", + "fillcolor": "" + }, + { + "id": "<src.process.Process object at 0x79717db0d180>", + "name": "M2", + "shape": "ellipse", + "xlabel": "", + "fillcolor": "" + }, + { + "id": "<src.operation.Operation object at 0x79717db0cdc0>", + "name": "", + "shape": "point", + "xlabel": "M1.out", + "fillcolor": "white" + }, + { + "id": "<src.operation.Operation object at 0x79717db0d900>", + "name": "", + "shape": "point", + "xlabel": "emit: M2.out", + "fillcolor": "" + } + ], + "edges": [ + { + "A": "<src.process.Process object at 0x79717db0d0c0>", + "B": "<src.operation.Operation object at 0x79717db0cdc0>", + "label": "M1.out" + }, + { + "A": "<src.operation.Operation object at 0x79717db0cdc0>", + "B": "<src.process.Process object at 0x79717db0d180>", + "label": "" + }, + { + "A": "<src.process.Process object at 0x79717db0d180>", + "B": "<src.operation.Operation object at 0x79717db0d900>", + "label": "M2.out" + } + ], + "subworkflows": {} + }, + "sub2_0": { + "nodes": [ + { + "id": "<src.process.Process object at 0x79717db0dfc0>", + "name": "M1", + "shape": "ellipse", + "xlabel": "", + "fillcolor": "" + }, + { + "id": "<src.process.Process object at 0x79717db0e110>", + "name": "M2", + "shape": "ellipse", + "xlabel": "", + "fillcolor": "" + }, + { + "id": "<src.operation.Operation object at 0x79717db0e200>", + "name": "", + "shape": "point", + "xlabel": "M1.out", + "fillcolor": "white" + }, + { + "id": "<src.operation.Operation object at 0x79717db0dc30>", + "name": "", + "shape": "point", + "xlabel": "emit: M2.out", + "fillcolor": "" + } + ], + "edges": [ + { + "A": "<src.process.Process object at 0x79717db0dfc0>", + "B": "<src.operation.Operation object at 0x79717db0e200>", + "label": "M1.out" + }, + { + "A": "<src.operation.Operation object at 0x79717db0e200>", + "B": "<src.process.Process object at 0x79717db0e110>", + "label": "" + }, + { + "A": "<src.process.Process object at 0x79717db0e110>", + "B": "<src.operation.Operation object at 0x79717db0dc30>", + "label": "M2.out" + } + ], + "subworkflows": {} + } + } +} \ No newline at end of file diff --git a/tests/ressources/workflows/wf18/test.nf b/tests/ressources/workflows/wf18/test.nf new file mode 100644 index 0000000000000000000000000000000000000000..c142ffbefd02f11ac5bfd8f756278096bb079509 --- /dev/null +++ b/tests/ressources/workflows/wf18/test.nf @@ -0,0 +1,53 @@ + +#!/usr/bin/env nextflow + +process M1 { + output: + path 'chunk_*' + + """ + SOMETHING + """ +} + +process M2 { + input: + path a + + """ + SOMETHING + """ +} + +workflow sub1 { + main: + M1() + M2(M1.out) + + emit: + M2.out +} + +workflow sub2 { + main: + M1() + M2(M1.out) + + emit: + M2.out + +} + + +workflow { + + if (!params.cloudProcess) { sub1(); a = sub1.out } + if (params.cloudProcess) { + sub2() + if (1==1) { b = sub2.out } + else { b = Channel.empty() } + } + +} + + diff --git a/tests/test_workflows_simple_duplicate.py b/tests/test_workflows_simple_duplicate.py index 30c2ed5a9a8ad920ddd76637435d1c55f9bb164d..9f2685ea23108a13343178ac1e1828460c4fbb3a 100644 --- a/tests/test_workflows_simple_duplicate.py +++ b/tests/test_workflows_simple_duplicate.py @@ -16,6 +16,12 @@ class TestWorkflows(unittest.TestCase): json_files = glob.glob(f'tests/ressources/workflows/wf1/*.json', recursive=False) self.assertTrue(w.check_if_equal(json_files[0])) + def test_wfwf18_simple_duplicate(self): + w = Workflow(f"tests/ressources/workflows/wf18", display_info=False, duplicate=True) + w.initialise() + json_files = glob.glob(f'tests/ressources/workflows/wf18/*.json', recursive=False) + self.assertTrue(w.check_if_equal(json_files[0])) + def test_wfwf13_simple_duplicate(self): w = Workflow(f"tests/ressources/workflows/wf13", display_info=False, duplicate=True) w.initialise()