diff --git a/src/call.py b/src/call.py index cd95252154ec2cf852ad1e865dfc457b92287718..65fe210edf79c3131a85a280ab35e360a1354692 100644 --- a/src/call.py +++ b/src/call.py @@ -176,7 +176,8 @@ class Call(Executor): def get_structure(self, dico): if(self.get_first_element_called().get_type()=="Process"): process = self.get_first_element_called() - dico['nodes'].append({'id':str(process), 'name':process.get_alias(), "shape":"ellipse", 'xlabel':"", "fillcolor":""}) + #Add process here + process.get_structure(dico) def add_parameter(p): #Case parameter is a channel @@ -363,7 +364,7 @@ class Call(Executor): param_index = 0 if(first_call.get_type()=="Process" or first_call.get_type()=="Subworkflow"): if(first_call.get_type()=="Process"): - dico['nodes'].append({'id':str(first_call), 'name':first_call.get_alias(), "shape":"ellipse", 'xlabel':"", 'fillcolor':''}) + first_call.get_structure(dico) else: temp_dico = {} temp_dico['nodes'] = [] @@ -422,13 +423,13 @@ class Call(Executor): fun = self.get_function_from_name(tab_call[0]) if(process!=None and subworkflow==None and fun==None): #If the elements need to duplicated -> then we need to duplicate it - temp = process - if(self.get_duplicate_status()): - if(process.get_number_times_called()>0): - temp = copy.deepcopy(process) - temp.set_alias(f"{process.get_name()}_{process.get_number_times_called()}") - self.first_element_called = temp - temp.incremente_number_times_called() + #temp = process + #if(self.get_duplicate_status()): + # if(process.get_number_times_called()>0): + # temp = copy.copy(process) + # temp.set_alias(f"{process.get_name()}_{process.get_number_times_called()}") + self.first_element_called = process + #temp.incremente_number_times_called() if(process==None and subworkflow!=None and fun==None): self.first_element_called = subworkflow if(process==None and subworkflow==None and fun!=None): diff --git a/src/channel.py b/src/channel.py index c51a20547b3202c6183d97ac8a05a4aff6bb17ee..cf2a68c4013865f8daf121e30e472e823f17bf4a 100644 --- a/src/channel.py +++ b/src/channel.py @@ -16,7 +16,9 @@ class Channel(Nextflow_Building_Blocks): def __init__(self, name, origin): self.name = name.strip() self.origin = origin - to_call = self.get_name_processes_subworkflows() + to_call = [] + for m in self.get_modules_defined(): + to_call.append(m.get_alias()) if(self.name in to_call): raise BioFlowInsightError(f"'{self.name}' is trying to be created as a channel{self.get_string_line(self.origin.get_code())}. It already exists as a process or a subworkflow in the nextflow file.", num = 4, origin=self) self.source = [] diff --git a/src/emitted.py b/src/emitted.py index d764792e1a388803809a52e5f2894f48812abf7c..b58a8690a93786dca9c30b9b4ff4722a8d1e95a5 100644 --- a/src/emitted.py +++ b/src/emitted.py @@ -73,11 +73,12 @@ class Emitted(Channel): end = "in the file" if(self.origin.get_type()=="Subworkflow"): end = f"in the subworkflow '{self.origin.get_name()}'" - raise BioFlowInsightError(f"Tried to access the emit '{self.get_code()}' but the {emits.get_type()} '{emits.get_name()}' has not been called {end}.", num = 8, origin=self) + raise BioFlowInsightError(f"Tried to access the emit '{self.get_code()}' but the {emits.get_type()} '{emits.get_name()}' ({emits}) has not been called {end}.", num = 8, origin=self) #Case if the emit emits a process if(emits.get_type()=="Process"): + #emits.get_structure(dico) if(self.emits==None): #for i in range(emits.get_nb_outputs()): # print("here") diff --git a/src/executor.py b/src/executor.py index c4b127f969f757fb5d93ab150f174f2b4e54c000..28f5cff9c6a9de1c1f75faa7596fd41b894e211f 100644 --- a/src/executor.py +++ b/src/executor.py @@ -96,7 +96,9 @@ class Executor(Nextflow_Building_Blocks): pipe = "=".join(pipe.split("=")[1:]) - to_call = self.get_list_name_processes()+self.get_list_name_subworkflows()+self.get_list_name_includes() + to_call = [] + for m in self.get_modules_defined(): + to_call.append(m.get_alias()) searching = True to_replace = [] while(searching): @@ -165,7 +167,7 @@ class Executor(Nextflow_Building_Blocks): #If it's an operation the executor should be outside the parentheses #If it's a call the operator should be inside the parentheses def return_type(self): - list_things_to_call = self.get_name_processes_subworkflows() + list_things_to_call = self.get_modules_defined() is_operation =False code = self.get_code() code = code.replace(' ', '') diff --git a/src/include.py b/src/include.py index 35ccf9ddabff89954803e3de175b7f47a396ec5c..0b2547d8fa7eb6fab516796430905a267aabb489 100644 --- a/src/include.py +++ b/src/include.py @@ -125,7 +125,7 @@ class Include(Nextflow_Building_Blocks): for match in re.finditer(pattern_as, include): found = True if(self.get_duplicate_status()): - thing_as = copy.deepcopy(self.file.get_element_from_name(match.group(1))) + thing_as = copy.copy(self.file.get_element_from_name(match.group(1))) thing_as.set_alias(match.group(3)) self.defines.append(thing_as) else: diff --git a/src/main_DSL2.py b/src/main_DSL2.py index ab27fc75cfd879654424e5a912db873a5758ae12..ae921c877f4da99eb255752a549e17a12b540bcc 100644 --- a/src/main_DSL2.py +++ b/src/main_DSL2.py @@ -1,7 +1,7 @@ from .nextflow_building_blocks import Nextflow_Building_Blocks from .bioflowinsighterror import BioFlowInsightError import re -from .outils import get_dico_from_tab_from_id, extract_conditions +from .outils import * from . import constant @@ -14,6 +14,11 @@ class Main_DSL2(Nextflow_Building_Blocks): self.calls = [] self.initialised = False self.conditions=None + self.modules_defined = [] + self.modules_called = [] + + def get_modules_defined(self): + return self.modules_defined def get_all_executors(self, dico): for e in self.get_executors(): @@ -29,7 +34,6 @@ class Main_DSL2(Nextflow_Building_Blocks): if(o.get_type()=="Call"): first = o.get_first_element_called() if(first.get_type()=="Subworkflow"): - print("here") first.get_all_executors(dico) else: raise Exception("This shouldn't happen") @@ -67,11 +71,35 @@ class Main_DSL2(Nextflow_Building_Blocks): called+=o.get_elements_called() return called - def get_processes(self): - return self.origin.get_processes()+super().get_processes() - + #def get_processes(self): + # return self.origin.get_processes()+super().get_processes() + + #def get_process_from_name(self, name): + # print("here") + # return self.origin.get_process_from_name(name) + def get_process_from_name(self, name): - return self.origin.get_process_from_name(name) + for m in self.modules_called: + if(m.get_type()=="Process" and m.get_alias()==name): + return m + + for m in self.modules_defined: + if(m.get_type()=="Process" and m.get_alias()==name): + #If we're duplicating we need to check that the processes hasn't already been called + #In that case we duplicate it + if(self.get_duplicate_status()): + if(m.get_number_times_called()>0): + import copy + process = copy.deepcopy(m) + self.modules_called.append(process) + return process + else: + m.incremente_number_times_called() + self.modules_called.append(m) + return m + else: + return m + return None def get_processes_called(self, defined = {}): @@ -147,15 +175,18 @@ class Main_DSL2(Nextflow_Building_Blocks): def initialise(self): if(not self.initialised): + #print(self, self.get_all_processes()) self.initialised=True + #Get the modules (Processes defined for the main/subworkflow) + self.modules_defined = self.origin.get_processes()+self.origin.get_subworkflows()+self.origin.get_modules_included() + #Check that includes are not defined in the main or subworkflows self.check_includes() #Extract Executors self.extract_executors() - #Analyse Executors for e in self.executors: @@ -163,6 +194,7 @@ class Main_DSL2(Nextflow_Building_Blocks): + """def add_channels_structure(self, dot): return self.add_channels_structure_temp(dot, self.origin.get_added_operations_structure()) """ @@ -270,4 +302,309 @@ class Main_DSL2(Nextflow_Building_Blocks): c.add_2_rocrate(dico, main_key) dico_main["hasPart"].append({"@id":c.get_rocrate_key(dico)}) - dico["@graph"].append(dico_main) \ No newline at end of file + dico["@graph"].append(dico_main) + + def check_if_there_is_a_thing_called_multiple_times(self): + from collections import Counter + called = [] + for e in self.get_executors(): + if(e.get_type()=="Call"): + for thing in e.get_called(): + called.append(thing.get_name()) + dico = Counter(called) + errors = [] + for thing in dico: + if(dico[thing]>1): + errors.append(thing) + if(len(errors)>1): + if(self.get_type()=="Main DSL2"): + text = "the workflow main" + else: + text = f"the subworkflow {self.get_name()}" + raise BioFlowInsightError(f"The elements {errors} were called multiple times in {text}", num="-64") + return called + + + + def extract_executors(self): + from .operation import Operation + from .call import Call + + #https://github.com/nextflow-io/nextflow/blob/45ceadbdba90b0b7a42a542a9fc241fb04e3719d/docs/operator.rst + #TODO This list needs to be checked if it's exhaustive + + if(self.get_type()=="Subworkflow"): + code = self.get_work() + elif(self.get_type()=="Main DSL2"): + code = self.get_code() + code = re.sub(constant.WORKFLOW_HEADER, "", code) + if(code[-1]!='}'): + raise Exception("This shoudn't happen") + code = code[:-1] + + else: + code = self.get_code() + + things_to_remove = [] + things_to_remove+= self.processes+self.includes+self.subworkflows+self.functions + if(self.main!=None): + things_to_remove+=[self.main] + + for to_remove in things_to_remove: + code = code.replace(to_remove.get_code(get_OG = True), "", 1) + + #We add this to simplify the search of the executors + code = "start\n"+code+"\nend" + + #This function takes an executor (already found and expandes it to the pipe operators) + def expand_to_pipe_operators(text, executor): + #If the executor ends with the pipe operator -> we remove it so that it can be detected by the pattern + if(executor[-1]=="|"): + executor = executor[:-1].strip() + start = text.find(executor)+len(executor) + for match in re.finditer(constant.END_PIPE_OPERATOR, text[start:]): + begining, end = match.span(0) + if(begining==0): + return expand_pipe_operator(text, executor+match.group(0)) + break + return executor + + + + #--------------------------------------------------------------- + #STEP1 - Extract equal operations eg. + # *Case "channel = something" + # *Case "(channel1, channel2) = something" + #--------------------------------------------------------------- + pattern_equal = constant.LIST_EQUALS + + searching = True + while(searching): + searching= False + text = code + for e in self.executors: + text = text.replace(e.get_code(), "", 1) + + for pattern in pattern_equal: + for match in re.finditer(pattern, text): + + start, end = match.span(2) + ope = extract_end_operation(text, start, end) + ope = expand_to_pipe_operators(text, ope) + + #If the thing which is extracted is not in the conditon of an if + if(not checks_in_condition_if(text, ope) and not checks_in_string(text, ope)): + operation = Operation(ope, self) + self.executors.append(operation) + searching= True + break + + #I switched step 2 and step 3 -> cause there were cases where there was operations in the paramters of a call -> they were extracted and removed + #----------------------------------- + #STEP3 - Extract the remaining calls + #----------------------------------- + #These are the processes and subworkflows we need to check are called + if(self.get_DSL()=="DSL2"): + to_call = [] + for m in self.modules_defined: + to_call.append(m.get_alias()) + pattern_call = constant.BEGINNING_CALL + searching = True + while(searching): + searching= False + text = code + for e in self.executors: + text = text.replace(e.get_code(), "", 1) + + for match in re.finditer(pattern_call, text): + if(match.group(1) in to_call): + + start, end = match.span(0) + txt_call = get_end_call(text, start, end) + txt_call = expand_to_pipe_operators(text, txt_call) + #If the thing which is extracted is not in the conditon of an if + if(not checks_in_condition_if(text, txt_call) and not checks_in_string(text, txt_call)): + if(txt_call.find("|")!=-1 and txt_call[txt_call.find("|")-1]!="|" and txt_call[txt_call.find("|")+1]!="|"): + first_thing_called = txt_call.split('|')[-1].strip() + if(first_thing_called in to_call): + call = Call(code =txt_call, origin =self) + self.executors.append(call) + else: + added = True + if(first_thing_called in constant.LIST_OPERATORS): + added = True + if(not added): + for operator in constant.LIST_OPERATORS: + for match in re.finditer(operator+constant.END_OPERATOR, txt_call.split('|')[-1].strip()): + start, end = match.span(0) + if(start==0): + added = True + if(not added): + raise BioFlowInsightError(f"In the executor '{txt_call}', '{first_thing_called}' is neither a process, subworkflow or an operator{self.get_string_line(txt_call)}", num = 14, origin=self) + else: + ope = Operation(code =txt_call, origin =self) + self.executors.append(ope) + else: + #We need to see if we can expand the call to a operation perhaps process().set{ch} + expanded = expand_call_to_operation(text, txt_call)#TODO update this + if(txt_call==expanded): + call = Call(code =txt_call, origin =self) + self.executors.append(call) + else: + ope = Operation(code =expanded, origin =self) + self.executors.append(ope) + + searching = True + break + + + #------------------------------------------------- + #STEP2 - Extract the terms which use the operators + #------------------------------------------------- + pattern_dot = constant.DOT_OPERATOR + searching = True + searched = [] + + + while(searching): + searching= False + text = code + for e in self.executors: + text = text.replace(e.get_code(), "", 1) + + for match in re.finditer(pattern_dot, text): + start, end = match.span(1) + + if(match.group(1) not in constant.ERROR_WORDS): + if(match.group(1) in constant.LIST_OPERATORS): + #TODO -> the function below might not work perfectly but i don't have any other ideas + + + #Use if there is an operator called right before opening the curlies/parenthse + #curly_left, curly_right = get_curly_count(text[:start]), get_curly_count(text[end:]) + parenthese_left, parenthese_right = get_parenthese_count(text[:start]), get_parenthese_count(text[end:]) + + #if(curly_left==0 and curly_right==0 and parenthese_left==0 and parenthese_right==0 and (start, end) not in searched): + #if(parenthese_left==0 and parenthese_right==0 and (start, end, temp) not in searched): + if(parenthese_left==0 and parenthese_right==0): + + + try: + pot = extract_executor_from_middle(text, start, end) + except: + try: + temp = text[start-10:end+10] + except: + temp = text[start:end] + raise BioFlowInsightError(f"Failed to extract the operation or call{self.get_string_line(temp)}. Try rewriting it in a simplified version.", num = 11, origin=self) + + pot = expand_to_pipe_operators(text, pot) + #IF the exact potential hasn't already been searched, then we don't do it + if((start, end, pot) not in searched): + searched.append((start, end, pot)) + #If the thing which is extracted is not in the conditon of an if + if(not checks_in_condition_if(text, pot) and not checks_in_string(text, pot)): + if(self.get_DSL()=="DSL2"): + to_call = [] + for m in self.modules_defined: + to_call.append(m.get_alias()) + if(pot.find("|")!=-1): + if(not checks_in_condition_if(pot, '|') and not checks_in_string(pot, '|')):#TODO checks_in_string is the first occurance + first_thing_called = pot.split('|')[-1].strip() + if(first_thing_called in to_call): + call = Call(code =pot, origin =self) + self.executors.append(call) + elif(first_thing_called in constant.LIST_OPERATORS): + ope = Operation(code =pot, origin =self) + self.executors.append(ope) + else: + raise BioFlowInsightError(f"'{first_thing_called}' is neither a process, subworkflow or an operator. In the executor '{pot}'{self.get_string_line(pot)}.", num=14,origin=self)#TODO -> try rewriting the operation using the standard syntaxe + + else: + from .executor import Executor + executor = Executor(pot, self) + self.executors.append(executor.return_type()) + + else: + from .executor import Executor + executor = Executor(pot, self) + self.executors.append(executor.return_type()) + else: + ope = Operation(pot, self) + self.executors.append(ope) + searching = True + break + + + #--------------------------------------------------------------- + #STEP4 - Extract the Executors which only use the pipe operators (which start with a channel) + #--------------------------------------------------------------- + to_call = [] + for m in self.modules_defined: + to_call.append(m.get_alias()) + + searching = True + while(searching): + searching= False + text = code + for e in self.executors: + text = text.replace(e.get_code(get_OG=True), "", 1) + pattern = constant.BEGINNING_PIPE_OPERATOR + + for match in re.finditer(pattern, text): + txt_call = expand_pipe_operator(text, match.group(0)) + full_executor = txt_call + + #start, end = match.span(0) + ## Check to see if a parameter is given such as in the example 'splitLetters | flatten | convertToUpper | view { it.trim() }' + #params, full_executor = check_if_parameter_is_given_pipe(text, start, end) + #if(params!=''): + # tab_to_call = txt_call.split('|') + # start = f"{tab_to_call[0]}({params})" + # txt_call = start + '|' + '|'.join(tab_to_call[1:]) + # print(start) + #print(params, full_executor) + + #If the thing which is extracted is not in the conditon of an if + if(not checks_in_condition_if(text, full_executor) and not checks_in_string(text, full_executor)): + tab_to_call = txt_call.split('|') + if(tab_to_call[0].strip() in to_call): + start = f"{tab_to_call[0]}()" + txt_call = start + '|' + '|'.join(tab_to_call[1:]) + first_thing_called = txt_call.split('|')[-1].strip() + + if(first_thing_called in to_call): + call = Call(code =txt_call, origin =self, OG_code= full_executor) + self.executors.append(call) + searching = True + break + elif(first_thing_called in constant.LIST_OPERATORS): + ope = Operation(code =txt_call, origin =self, OG_code= full_executor) + self.executors.append(ope) + searching = True + break + else: + added = False + #This is in the case "channel | map {dfvfdvd}" + for ope in constant.LIST_OPERATORS: + if(first_thing_called[:len(ope)]==ope and not added): + ope = Operation(code =txt_call, origin =self, OG_code= full_executor) + self.executors.append(ope) + added = True + searching = True + if(added): + break + elif(not added): + raise BioFlowInsightError(f"In the executor '{txt_call}', '{first_thing_called}' is neither a process, subworkflow or an operator (in the file '{self.get_file_address()}')", num = 14,origin=self) + + #--------------------------------------------------------------------- + #STEP5 - We remove the things which were falsy extracted as executors + #--------------------------------------------------------------------- + to_remove = [] + starting_by_to_remove = ["System.out"] + for e in self.executors: + for r in starting_by_to_remove: + if(e.get_code()[:len(r)]==r): + to_remove.append(e) + for e in to_remove: + self.executors.remove(e) \ No newline at end of file diff --git a/src/nextflow_building_blocks.py b/src/nextflow_building_blocks.py index af0bc0b307ee6839376a7c174f554f868e57e84e..b01127f6fb9c015f8cb6c1bb3420e8650a52d949 100644 --- a/src/nextflow_building_blocks.py +++ b/src/nextflow_building_blocks.py @@ -29,6 +29,10 @@ class Nextflow_Building_Blocks: #--------------------------------- #AUXILIARY METHODS FOR ALL CLASSES #--------------------------------- + + def get_modules_defined(self): + return self.origin.get_modules_defined() + def get_code(self, get_OG = False): return self.code.get_code(get_OG = get_OG) @@ -58,7 +62,8 @@ class Nextflow_Building_Blocks: #Only used by the process or subworkflow def is_called(self, called_from): - if(self.get_type() in ["Process", "Subworkflow"]): + #if(self.get_type() in ["Subworkflow", "Process"]): + if(self.get_type() in ["Subworkflow"]): executors = called_from.origin.get_executors() for exe in executors: @@ -72,6 +77,10 @@ class Nextflow_Building_Blocks: if(self in o.get_elements_called()): return True return False + + elif(self.get_type() in ["Process"]): + if(self.get_number_times_called()>=1): + return True raise Exception("You can't do this!") def get_line(self, bit_of_code): @@ -110,23 +119,18 @@ class Nextflow_Building_Blocks: p = Process(code=code[start:end], origin=self) self.processes.append(p) - def get_list_name_processes(self): - tab = [] - for p in self.get_processes(): - tab.append(p.get_name()) - return tab - def get_process_from_name(self, name): - for p in self.get_processes(): - if(p.get_name()==name): - return p - return None + #def get_process_from_name(self, name): + # for p in self.get_processes(): + # if(p.get_name()==name): + # return p + # return None def get_channels(self): return self.origin.get_channels() - def get_processes(self): - return self.processes + #def get_processes(self): + # return self.processes def get_workflow_code(self): return self.origin.get_workflow_code() @@ -191,282 +195,7 @@ class Nextflow_Building_Blocks: def get_executors(self): return self.executors - def extract_executors(self): - from .operation import Operation - from .call import Call - - #https://github.com/nextflow-io/nextflow/blob/45ceadbdba90b0b7a42a542a9fc241fb04e3719d/docs/operator.rst - #TODO This list needs to be checked if it's exhaustive - - if(self.get_type()=="Subworkflow"): - code = self.get_work() - elif(self.get_type()=="Main DSL2"): - code = self.get_code() - code = re.sub(constant.WORKFLOW_HEADER, "", code) - if(code[-1]!='}'): - raise Exception("This shoudn't happen") - code = code[:-1] - - else: - code = self.get_code() - - things_to_remove = [] - things_to_remove+= self.processes+self.includes+self.subworkflows+self.functions - if(self.main!=None): - things_to_remove+=[self.main] - - for to_remove in things_to_remove: - code = code.replace(to_remove.get_code(get_OG = True), "", 1) - - #We add this to simplify the search of the executors - code = "start\n"+code+"\nend" - - #This function takes an executor (already found and expandes it to the pipe operators) - def expand_to_pipe_operators(text, executor): - #If the executor ends with the pipe operator -> we remove it so that it can be detected by the pattern - if(executor[-1]=="|"): - executor = executor[:-1].strip() - start = text.find(executor)+len(executor) - for match in re.finditer(constant.END_PIPE_OPERATOR, text[start:]): - begining, end = match.span(0) - if(begining==0): - return expand_pipe_operator(text, executor+match.group(0)) - break - return executor - - - - #--------------------------------------------------------------- - #STEP1 - Extract equal operations eg. - # *Case "channel = something" - # *Case "(channel1, channel2) = something" - #--------------------------------------------------------------- - pattern_equal = constant.LIST_EQUALS - searching = True - while(searching): - searching= False - text = code - for e in self.executors: - text = text.replace(e.get_code(), "", 1) - - for pattern in pattern_equal: - for match in re.finditer(pattern, text): - - start, end = match.span(2) - ope = extract_end_operation(text, start, end) - ope = expand_to_pipe_operators(text, ope) - - #If the thing which is extracted is not in the conditon of an if - if(not checks_in_condition_if(text, ope) and not checks_in_string(text, ope)): - operation = Operation(ope, self) - self.executors.append(operation) - searching= True - break - - #I switched step 2 and step 3 -> cause there were cases where there was operations in the paramters of a call -> they were extracted and removed - #----------------------------------- - #STEP3 - Extract the remaining calls - #----------------------------------- - #These are the processes and subworkflows we need to check are called - if(self.get_DSL()=="DSL2"): - to_call = self.get_list_name_processes()+self.get_list_name_subworkflows()+self.get_list_name_includes() - pattern_call = constant.BEGINNING_CALL - searching = True - while(searching): - searching= False - text = code - for e in self.executors: - text = text.replace(e.get_code(), "", 1) - - for match in re.finditer(pattern_call, text): - if(match.group(1) in to_call): - - start, end = match.span(0) - txt_call = get_end_call(text, start, end) - txt_call = expand_to_pipe_operators(text, txt_call) - #If the thing which is extracted is not in the conditon of an if - if(not checks_in_condition_if(text, txt_call) and not checks_in_string(text, txt_call)): - if(txt_call.find("|")!=-1 and txt_call[txt_call.find("|")-1]!="|" and txt_call[txt_call.find("|")+1]!="|"): - first_thing_called = txt_call.split('|')[-1].strip() - if(first_thing_called in to_call): - call = Call(code =txt_call, origin =self) - self.executors.append(call) - else: - added = True - if(first_thing_called in constant.LIST_OPERATORS): - added = True - if(not added): - for operator in constant.LIST_OPERATORS: - for match in re.finditer(operator+constant.END_OPERATOR, txt_call.split('|')[-1].strip()): - start, end = match.span(0) - if(start==0): - added = True - if(not added): - raise BioFlowInsightError(f"In the executor '{txt_call}', '{first_thing_called}' is neither a process, subworkflow or an operator{self.get_string_line(txt_call)}", num = 14, origin=self) - else: - ope = Operation(code =txt_call, origin =self) - self.executors.append(ope) - else: - #We need to see if we can expand the call to a operation perhaps process().set{ch} - expanded = expand_call_to_operation(text, txt_call)#TODO update this - if(txt_call==expanded): - call = Call(code =txt_call, origin =self) - self.executors.append(call) - else: - ope = Operation(code =expanded, origin =self) - self.executors.append(ope) - - searching = True - break - - - #------------------------------------------------- - #STEP2 - Extract the terms which use the operators - #------------------------------------------------- - pattern_dot = constant.DOT_OPERATOR - searching = True - searched = [] - - - while(searching): - searching= False - text = code - for e in self.executors: - text = text.replace(e.get_code(), "", 1) - - for match in re.finditer(pattern_dot, text): - start, end = match.span(1) - - if(match.group(1) not in constant.ERROR_WORDS): - if(match.group(1) in constant.LIST_OPERATORS): - #TODO -> the function below might not work perfectly but i don't have any other ideas - - - #Use if there is an operator called right before opening the curlies/parenthse - #curly_left, curly_right = get_curly_count(text[:start]), get_curly_count(text[end:]) - parenthese_left, parenthese_right = get_parenthese_count(text[:start]), get_parenthese_count(text[end:]) - - #if(curly_left==0 and curly_right==0 and parenthese_left==0 and parenthese_right==0 and (start, end) not in searched): - #if(parenthese_left==0 and parenthese_right==0 and (start, end, temp) not in searched): - if(parenthese_left==0 and parenthese_right==0): - - - try: - pot = extract_executor_from_middle(text, start, end) - except: - try: - temp = text[start-10:end+10] - except: - temp = text[start:end] - raise BioFlowInsightError(f"Failed to extract the operation or call{self.get_string_line(temp)}. Try rewriting it in a simplified version.", num = 11, origin=self) - - pot = expand_to_pipe_operators(text, pot) - #IF the exact potential hasn't already been searched, then we don't do it - if((start, end, pot) not in searched): - searched.append((start, end, pot)) - #If the thing which is extracted is not in the conditon of an if - if(not checks_in_condition_if(text, pot) and not checks_in_string(text, pot)): - if(self.get_DSL()=="DSL2"): - to_call = self.get_list_name_processes()+self.get_list_name_subworkflows()+self.get_list_name_includes() - if(pot.find("|")!=-1): - if(not checks_in_condition_if(pot, '|') and not checks_in_string(pot, '|')):#TODO checks_in_string is the first occurance - first_thing_called = pot.split('|')[-1].strip() - if(first_thing_called in to_call): - call = Call(code =pot, origin =self) - self.executors.append(call) - elif(first_thing_called in constant.LIST_OPERATORS): - ope = Operation(code =pot, origin =self) - self.executors.append(ope) - else: - raise BioFlowInsightError(f"'{first_thing_called}' is neither a process, subworkflow or an operator. In the executor '{pot}'{self.get_string_line(pot)}.", num=14,origin=self)#TODO -> try rewriting the operation using the standard syntaxe - - else: - from .executor import Executor - executor = Executor(pot, self) - self.executors.append(executor.return_type()) - - else: - from .executor import Executor - executor = Executor(pot, self) - self.executors.append(executor.return_type()) - else: - ope = Operation(pot, self) - self.executors.append(ope) - searching = True - break - - - #--------------------------------------------------------------- - #STEP4 - Extract the Executors which only use the pipe operators (which start with a channel) - #--------------------------------------------------------------- - to_call = self.get_list_name_processes()+self.get_list_name_subworkflows()+self.get_list_name_includes() - - searching = True - while(searching): - searching= False - text = code - for e in self.executors: - text = text.replace(e.get_code(get_OG=True), "", 1) - pattern = constant.BEGINNING_PIPE_OPERATOR - - for match in re.finditer(pattern, text): - txt_call = expand_pipe_operator(text, match.group(0)) - full_executor = txt_call - - #start, end = match.span(0) - ## Check to see if a parameter is given such as in the example 'splitLetters | flatten | convertToUpper | view { it.trim() }' - #params, full_executor = check_if_parameter_is_given_pipe(text, start, end) - #if(params!=''): - # tab_to_call = txt_call.split('|') - # start = f"{tab_to_call[0]}({params})" - # txt_call = start + '|' + '|'.join(tab_to_call[1:]) - # print(start) - #print(params, full_executor) - - #If the thing which is extracted is not in the conditon of an if - if(not checks_in_condition_if(text, full_executor) and not checks_in_string(text, full_executor)): - tab_to_call = txt_call.split('|') - if(tab_to_call[0].strip() in to_call): - start = f"{tab_to_call[0]}()" - txt_call = start + '|' + '|'.join(tab_to_call[1:]) - first_thing_called = txt_call.split('|')[-1].strip() - - if(first_thing_called in to_call): - call = Call(code =txt_call, origin =self, OG_code= full_executor) - self.executors.append(call) - searching = True - break - elif(first_thing_called in constant.LIST_OPERATORS): - ope = Operation(code =txt_call, origin =self, OG_code= full_executor) - self.executors.append(ope) - searching = True - break - else: - added = False - #This is in the case "channel | map {dfvfdvd}" - for ope in constant.LIST_OPERATORS: - if(first_thing_called[:len(ope)]==ope and not added): - ope = Operation(code =txt_call, origin =self, OG_code= full_executor) - self.executors.append(ope) - added = True - searching = True - if(added): - break - elif(not added): - raise BioFlowInsightError(f"In the executor '{txt_call}', '{first_thing_called}' is neither a process, subworkflow or an operator (in the file '{self.get_file_address()}')", num = 14,origin=self) - - #--------------------------------------------------------------------- - #STEP5 - We remove the things which were falsy extracted as executors - #--------------------------------------------------------------------- - to_remove = [] - starting_by_to_remove = ["System.out"] - for e in self.executors: - for r in starting_by_to_remove: - if(e.get_code()[:len(r)]==r): - to_remove.append(e) - for e in to_remove: - self.executors.remove(e) #---------------------- diff --git a/src/nextflow_file.py b/src/nextflow_file.py index e25e2f03eca47db88dfaaf403093c38906c30b0c..2871d1d63ed2762c1dc5e762731aedb77d61cefc 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -13,7 +13,7 @@ from . import constant warnings.filterwarnings("ignore") from .nextflow_building_blocks import Nextflow_Building_Blocks -from .outils import extract_curly, get_curly_count, get_parenthese_count, get_dico_from_tab_from_id, check_file_exists, extract_conditions +from .outils import * from .bioflowinsighterror import BioFlowInsightError @@ -267,25 +267,25 @@ class Nextflow_File(Nextflow_Building_Blocks): #---------------------- #PROCESS #---------------------- - def get_process_from_name(self, name): - for process in self.processes: - if(process.get_name()==name): - return process - if(self.get_duplicate_status()): - for include in self.includes: - defines = include.get_defines() - for d in defines: - if(d.get_alias()==name and d.get_type()=="Process"): - return d - else: - for include in self.includes: - aliases = include.get_aliases() - for a in aliases: - if(a==name and aliases[a].get_type()=="Process"): - return aliases[a] - - return None - raise Exception(f"Process '{name}' couldn't be found in '{self.get_file_address()}'") + #def get_process_from_name(self, name): + # for process in self.processes: + # if(process.get_name()==name): + # return process + # if(self.get_duplicate_status()): + # for include in self.includes: + # defines = include.get_defines() + # for d in defines: + # if(d.get_alias()==name and d.get_type()=="Process"): + # return d + # else: + # for include in self.includes: + # aliases = include.get_aliases() + # for a in aliases: + # if(a==name and aliases[a].get_type()=="Process"): + # return aliases[a] + # + # return None + # raise Exception(f"Process '{name}' couldn't be found in '{self.get_file_address()}'") def get_processes_defined(self, dict = {}): @@ -295,6 +295,9 @@ class Nextflow_File(Nextflow_Building_Blocks): for include in self.includes: _ = include.get_file().get_processes_defined(dict = dict) return dict + + def get_processes(self): + return self.processes def get_processes_called(self): @@ -362,6 +365,9 @@ class Nextflow_File(Nextflow_Building_Blocks): names.append(sub.get_name()) return names + def get_subworkflows(self): + return self.subworkflows + def get_subworkflow_from_name(self, name): for sub in self.subworkflows: if(sub.get_name()==name): @@ -491,6 +497,12 @@ class Nextflow_File(Nextflow_Building_Blocks): def get_includes(self): return self.includes + def get_modules_included(self): + modules = [] + for include in self.get_includes(): + modules+=include.get_defines() + return modules + def get_all_includes(self): if(self.first_file): return self.all_includes @@ -591,7 +603,7 @@ class Nextflow_File(Nextflow_Building_Blocks): print(self.get_file_address()) self.extract_processes() self.extract_functions() - self.extract_executors() + self.DSL1_extract_executors() for e in self.executors: e.initialise() @@ -602,6 +614,183 @@ class Nextflow_File(Nextflow_Building_Blocks): self.initialise_graph() + def DSL1_extract_executors(self): + from .operation import Operation + + code = self.get_code() + + things_to_remove = [] + things_to_remove+= self.processes+self.includes+self.subworkflows+self.functions + + for to_remove in things_to_remove: + code = code.replace(to_remove.get_code(get_OG = True), "", 1) + + #We add this to simplify the search of the executors + code = "start\n"+code+"\nend" + + #This function takes an executor (already found and expandes it to the pipe operators) + def expand_to_pipe_operators(text, executor): + #If the executor ends with the pipe operator -> we remove it so that it can be detected by the pattern + if(executor[-1]=="|"): + executor = executor[:-1].strip() + start = text.find(executor)+len(executor) + for match in re.finditer(constant.END_PIPE_OPERATOR, text[start:]): + begining, end = match.span(0) + if(begining==0): + return expand_pipe_operator(text, executor+match.group(0)) + break + return executor + + + + #--------------------------------------------------------------- + #STEP1 - Extract equal operations eg. + # *Case "channel = something" + # *Case "(channel1, channel2) = something" + #--------------------------------------------------------------- + pattern_equal = constant.LIST_EQUALS + + searching = True + while(searching): + searching= False + text = code + for e in self.executors: + text = text.replace(e.get_code(), "", 1) + + for pattern in pattern_equal: + for match in re.finditer(pattern, text): + + start, end = match.span(2) + ope = extract_end_operation(text, start, end) + ope = expand_to_pipe_operators(text, ope) + + #If the thing which is extracted is not in the conditon of an if + if(not checks_in_condition_if(text, ope) and not checks_in_string(text, ope)): + operation = Operation(ope, self) + self.executors.append(operation) + searching= True + break + + + #------------------------------------------------- + #STEP2 - Extract the terms which use the operators + #------------------------------------------------- + pattern_dot = constant.DOT_OPERATOR + searching = True + searched = [] + + + while(searching): + searching= False + text = code + for e in self.executors: + text = text.replace(e.get_code(), "", 1) + + for match in re.finditer(pattern_dot, text): + start, end = match.span(1) + + if(match.group(1) not in constant.ERROR_WORDS): + if(match.group(1) in constant.LIST_OPERATORS): + #TODO -> the function below might not work perfectly but i don't have any other ideas + + + #Use if there is an operator called right before opening the curlies/parenthse + #curly_left, curly_right = get_curly_count(text[:start]), get_curly_count(text[end:]) + parenthese_left, parenthese_right = get_parenthese_count(text[:start]), get_parenthese_count(text[end:]) + + #if(curly_left==0 and curly_right==0 and parenthese_left==0 and parenthese_right==0 and (start, end) not in searched): + #if(parenthese_left==0 and parenthese_right==0 and (start, end, temp) not in searched): + if(parenthese_left==0 and parenthese_right==0): + + + try: + pot = extract_executor_from_middle(text, start, end) + except: + try: + temp = text[start-10:end+10] + except: + temp = text[start:end] + raise BioFlowInsightError(f"Failed to extract the operation or call{self.get_string_line(temp)}. Try rewriting it in a simplified version.", num = 11, origin=self) + + pot = expand_to_pipe_operators(text, pot) + #IF the exact potential hasn't already been searched, then we don't do it + if((start, end, pot) not in searched): + searched.append((start, end, pot)) + #If the thing which is extracted is not in the conditon of an if + if(not checks_in_condition_if(text, pot) and not checks_in_string(text, pot)): + + ope = Operation(pot, self) + self.executors.append(ope) + searching = True + break + + + #--------------------------------------------------------------- + #STEP4 - Extract the Executors which only use the pipe operators (which start with a channel) + #--------------------------------------------------------------- + to_call = self.get_list_name_processes()+self.get_list_name_subworkflows()+self.get_list_name_includes() + + searching = True + while(searching): + searching= False + text = code + for e in self.executors: + text = text.replace(e.get_code(get_OG=True), "", 1) + pattern = constant.BEGINNING_PIPE_OPERATOR + + for match in re.finditer(pattern, text): + txt_call = expand_pipe_operator(text, match.group(0)) + full_executor = txt_call + + #start, end = match.span(0) + ## Check to see if a parameter is given such as in the example 'splitLetters | flatten | convertToUpper | view { it.trim() }' + #params, full_executor = check_if_parameter_is_given_pipe(text, start, end) + #if(params!=''): + # tab_to_call = txt_call.split('|') + # start = f"{tab_to_call[0]}({params})" + # txt_call = start + '|' + '|'.join(tab_to_call[1:]) + # print(start) + #print(params, full_executor) + + #If the thing which is extracted is not in the conditon of an if + if(not checks_in_condition_if(text, full_executor) and not checks_in_string(text, full_executor)): + tab_to_call = txt_call.split('|') + if(tab_to_call[0].strip() in to_call): + start = f"{tab_to_call[0]}()" + txt_call = start + '|' + '|'.join(tab_to_call[1:]) + first_thing_called = txt_call.split('|')[-1].strip() + + if(first_thing_called in constant.LIST_OPERATORS): + ope = Operation(code =txt_call, origin =self, OG_code= full_executor) + self.executors.append(ope) + searching = True + break + else: + added = False + #This is in the case "channel | map {dfvfdvd}" + for ope in constant.LIST_OPERATORS: + if(first_thing_called[:len(ope)]==ope and not added): + ope = Operation(code =txt_call, origin =self, OG_code= full_executor) + self.executors.append(ope) + added = True + searching = True + if(added): + break + elif(not added): + raise BioFlowInsightError(f"In the executor '{txt_call}', '{first_thing_called}' is neither a process, subworkflow or an operator (in the file '{self.get_file_address()}')", num = 14,origin=self) + + #--------------------------------------------------------------------- + #STEP5 - We remove the things which were falsy extracted as executors + #--------------------------------------------------------------------- + to_remove = [] + starting_by_to_remove = ["System.out"] + for e in self.executors: + for r in starting_by_to_remove: + if(e.get_code()[:len(r)]==r): + to_remove.append(e) + for e in to_remove: + self.executors.remove(e) + #The start parameter is for when we call 'get_structure_DSL2' for the first time def get_structure_DSL2(self, dico, start = False): if(not self.already_added_structure): diff --git a/src/operation.py b/src/operation.py index c438d2aed796f77bbd8bd87dfd7e6f078ba66c7f..5425fbd70e0bdbe543a6bee3084375241fd95ffd 100644 --- a/src/operation.py +++ b/src/operation.py @@ -115,8 +115,10 @@ class Operation(Executor): if( splited[-1] in constant.LIST_OPERATORS): full_code = '.'.join(splited[:-1]) if(name_called not in IGNORE_NAMES): - process = self.origin.get_process_from_name(name_called) - subworkflow = self.origin.get_subworkflow_from_name(name_called) + #print(self.origin.origin.get_name(), self.origin.origin.get_processes()) + process = self.get_process_from_name(name_called) + #print(process) + subworkflow = self.get_subworkflow_from_name(name_called) if(process!=None and subworkflow!=None): raise Exception(f"Problem in get_element -> {name_called} exists as process and subworkflow") @@ -692,7 +694,9 @@ class Operation(Executor): def extract_calls(self, clean_pipe = True): from .call import Call - to_call = self.get_name_processes_subworkflows() + to_call = [] + for m in self.get_modules_defined(): + to_call.append(m.get_alias()) pattern_call = constant.BEGINNING_CALL searching = True while(searching): diff --git a/src/process.py b/src/process.py index b018c639f42ba7b4b6cbd1209d4daf59d3a23d88..29c10c0f939958a0fcc01fce9983c4ee75c3f281 100644 --- a/src/process.py +++ b/src/process.py @@ -15,6 +15,7 @@ class Process(Nextflow_Building_Blocks): self.code = Code(code, origin = self) self.name = "" self.alias = "" + self.printed_name = "" self.inputs = [] self.raw_input_names = []#This is used to convert DSL1 workflows to DSL2 self.outputs = [] @@ -489,9 +490,13 @@ class Process(Nextflow_Building_Blocks): self.name = self.name.replace("'", "") self.name = self.name.replace('"', '') self.alias = self.name + self.printed_name = self.name + + def get_name_to_print(self): + return self.printed_name def get_structure(self, dico): - dico['nodes'].append({'id':str(self), 'name':self.get_name(), "shape":"ellipse", 'xlabel':"", 'fillcolor':''}) + dico['nodes'].append({'id':str(self), 'name':self.get_name_to_print(), "shape":"ellipse", 'xlabel':"", 'fillcolor':''}) def initialise_inputs_outputs(self): DSL = self.origin.get_DSL() diff --git a/src/subworkflow.py b/src/subworkflow.py index 958d23bf190fd3513c7675d73b975124a9304836..920dd29e4a9b7264ab74be4c332c561cd89117cc 100644 --- a/src/subworkflow.py +++ b/src/subworkflow.py @@ -21,6 +21,7 @@ class Subworkflow(Main_DSL2): self.initialised = False self.later_emits = [] + self.number_times_called = 0 def add_to_emits(self, emit):