diff --git a/src/block.py b/src/block.py index 7d50ec98e38e6d1031fcdefaa95549e7e8314e98..4c2d7fd49b6a412ffcc07ae970f6793c8b766a1a 100644 --- a/src/block.py +++ b/src/block.py @@ -10,5 +10,24 @@ class Block(Root): def initialise(self): return super().initialise() + def get_type(self): + return "Root" + def get_channels(self): - return self.channels+self.origin.get_channels() \ No newline at end of file + return self.channels+self.origin.get_channels() + + def get_executors(self): + return self.executors+self.origin.get_executors() + + #def check_in_channels(self, channel): + # for c in self.get_channels(): + # if(c.equal(channel)): + # return True + # return False + # + #def get_channel_from_name(self, name): + # for c in self.get_channels(): + # if(name == c.get_name()): + # return c + # #raise Exception(f"{name} is not in the list of channels") + # return None \ No newline at end of file diff --git a/src/call.py b/src/call.py index 931397680998bf75bca729f76c28241485eb5fa1..9fee23aaeb35af5c7f54463053517cc60455fabb 100644 --- a/src/call.py +++ b/src/call.py @@ -20,9 +20,15 @@ class Call(Executor): self.first_element_called = None self.parameters = []#These are in the order self.OG_code = OG_code + self.initialised = False + self.emits = [] #It's important this is last #self.condition = Condition(self) + + def add_to_emits(self, emitted): + self.emits.append(emitted) + def __str__(self): return f"Call_{id(self)}" @@ -244,6 +250,7 @@ class Call(Executor): add_parameter(p) else: + print(num_inputs, process.get_nb_inputs()) raise BioFlowInsightError(f"Not the same number of parameters given as input for the process '{process.get_alias()}'{self.get_string_line(self.get_code(get_OG=True))}.", num=2, origin=self) elif(self.get_first_element_called().get_type()=="Subworkflow"): @@ -428,13 +435,15 @@ class Call(Executor): # temp = copy.copy(process) # temp.set_alias(f"{process.get_name()}_{process.get_number_times_called()}") if(self.get_duplicate_status()): - process = copy.copy(process) + process = process.copy() + process.initialise() self.first_element_called = process self.origin.add_element_to_elements_being_called(process) #temp.incremente_number_times_called() if(process==None and subworkflow!=None and fun==None): if(self.get_duplicate_status()): - subworkflow = copy.copy(subworkflow) + subworkflow = subworkflow.copy() + subworkflow.initialise() self.first_element_called = subworkflow self.origin.add_element_to_elements_being_called(subworkflow) if(process==None and subworkflow==None and fun!=None): @@ -488,8 +497,10 @@ class Call(Executor): raise Exception(f"I don't know what to do with '{self.get_first_element_called().get_type()}' in the call '{self.get_code()}' (in file ''{self.get_file_address()}'')") def initialise(self): - self.analyse_call(self.get_code(clean_pipe = True)) - self.write_summary() + if(not self.initialised): + self.initialised = True + self.analyse_call(self.get_code(clean_pipe = True)) + self.write_summary() diff --git a/src/emitted.py b/src/emitted.py index 14acf2e004b506e1d8221e72702b46445cd0b299..40745c4e634f537fb25f7c9292865e575dfc196d 100644 --- a/src/emitted.py +++ b/src/emitted.py @@ -9,9 +9,9 @@ class Emitted(Channel): def __init__(self, name, origin, emitted_by): Channel.__init__(self, name=name, origin=origin) + print(self, emitted_by) + self.emitted_by = emitted_by - if(not emitted_by.is_initialised()): - emitted_by.initialise() emitted_by.add_to_emits(self) @@ -28,10 +28,12 @@ class Emitted(Channel): return "Emitted" def set_emits_decimal(self, decimal): - self.emits = self.emitted_by.get_emit()[decimal] + thing_which_emits = self.emitted_by.get_first_element_called() + self.emits = thing_which_emits.get_emit()[decimal] def set_emits_name(self, name): - emitted = self.emitted_by.get_emit() + thing_which_emits = self.emitted_by.get_first_element_called() + emitted = thing_which_emits.get_emit() for o in emitted: code = o.get_code() @@ -56,14 +58,14 @@ class Emitted(Channel): except: self.set_emits_name(name=input) else: - #TODO -> check this - if(self.emitted_by.get_type()=='Process'): + thing_which_emits = self.emitted_by.get_first_element_called() + if(thing_which_emits.get_type()=='Process'): #self.emits = self.emitted_by None - elif(self.emitted_by.get_type()=='Subworkflow'): - if(len(self.emitted_by.emit)!=1): + elif(thing_which_emits.get_type()=='Subworkflow'): + if(len(thing_which_emits.emit)!=1): raise BioFlowInsightError(f"One channel was expected in the emit '{self.get_code()}'. Even though multiple emits are defined for the workflow '{self.emitted_by.get_name()}'", num=6, origin=self) - self.emits = self.emitted_by.emit[0] + self.emits = thing_which_emits.emit[0] else: raise Exception("This shoudn't happen!") diff --git a/src/executor.py b/src/executor.py index 85250d6d2002f12d6a1f62d7d373d444c174b2cb..adf9b5541aaf52695e0dd93eee108cf067ae62ec 100644 --- a/src/executor.py +++ b/src/executor.py @@ -204,5 +204,21 @@ class Executor(Nextflow_Building_Blocks): else: from .call import Call return Call(self.get_code(), self.origin) + + + #Method which returns the call which calls the element called + def get_call_by_name(self, name): + if(self.origin.get_type() in ['Root', 'Block']): + for c in self.origin.get_calls(): + c.initialise() + if(c.first_element_called.get_alias()==name): + return c + return None + + else: + print(self.get_code()) + return self.origin.get_call_by_name(name) + #print(self.origin.get_calls()) + #print(self.origin) diff --git a/src/include.py b/src/include.py index 0bf8662072a5b32cb2bcbc3cc569d501ce332547..9ac48bebaab00b8b32649f453f063300754dc2ea 100644 --- a/src/include.py +++ b/src/include.py @@ -112,7 +112,8 @@ class Include(Nextflow_Building_Blocks): for match in re.finditer(pattern_as, include): found = True if(self.get_duplicate_status()): - thing_as = copy.copy(self.nextflow_file.get_element_from_name(match.group(1))) + #thing_as = copy.copy(self.nextflow_file.get_element_from_name(match.group(1))) + thing_as = self.nextflow_file.get_element_from_name(match.group(1)).copy() thing_as.set_alias(match.group(3)) self.defines[match.group(3)] = thing_as else: diff --git a/src/main.py b/src/main.py index 07b762e73ca98ef942eef91c311350909fd94e75..ed6fdf98d9384210902797fd6213c09e35ffc97d 100644 --- a/src/main.py +++ b/src/main.py @@ -15,6 +15,9 @@ class Main(Nextflow_Building_Blocks): self.initialised = False self.root = None + def get_string_line(self, bit_of_code): + return self.nextflow_file.get_string_line(bit_of_code) + def get_modules_defined(self): return self.nextflow_file.get_modules_defined() @@ -27,128 +30,23 @@ class Main(Nextflow_Building_Blocks): def get_file_address(self): return self.nextflow_file.get_file_address() - def get_all_executors(self, dico): - for e in self.get_executors(): - dico[e] = 1 - - for exe in self.get_executors(): - if(exe.get_type()=="Call"): - first = exe.get_first_element_called() - if(first.get_type()=="Subworkflow"): - first.get_all_executors(dico) - elif(exe.get_type()=="Operation"): - for o in exe.get_origins(): - if(o.get_type()=="Call"): - first = o.get_first_element_called() - if(first.get_type()=="Subworkflow"): - first.get_all_executors(dico) - else: - raise Exception("This shouldn't happen") - def get_output_dir(self): return self.nextflow_file.get_output_dir() - def get_workflow_code(self): - return self.get_code() - - def get_file_conditions(self): - if(self.conditions==None): - self.conditions = extract_conditions(self.get_code()) - return self.conditions - def get_type(self): - return "Main DSL2" - - - def is_initialised(self): - return self.initialised - - def get_all_called(self): - called = [] - for exe in self.get_executors(): - if(exe.get_type()=="Call"): - called+=exe.get_elements_called() - else: - for o in exe.get_origins(): - if(o.get_type()=="Call"): - called+=o.get_elements_called() - return called - - #def get_processes(self): - # return self.origin.get_processes()+super().get_processes() - - #def get_process_from_name(self, name): - # print("here") - # return self.origin.get_process_from_name(name) - - - def get_processes_called(self, defined = {}): - - for c in self.get_all_called(): - if(c.get_type()=="Process"): - defined[c] = [] - elif(c.get_type()=="Subworkflow"): - _ = c.get_processes_called(defined = defined) - - return list(defined.keys()) - - def get_subworkflows_called(self, defined = {}): - for c in self.get_all_called(): - if(c.get_type()=="Subworkflow"): - defined[c] = [] - _ = c.get_subworkflows_called(defined = defined) - return list(defined.keys()) - - def get_functions_called(self, defined = {}): - for c in self.get_all_called(): - if(c.get_type()=="Function"): - defined[c] = [] - elif(c.get_type()=="Subworkflow"): - _ = c.get_functions_called(defined = defined) - return list(defined.keys()) - - - def get_function_from_name(self, name): - return self.origin.get_function_from_name(name) - - def get_list_name_subworkflows(self): - return self.origin.get_list_name_subworkflows() - - def get_list_name_includes(self): - return self.origin.get_list_name_includes() - - - #def get_channel_from_name(self, name): - # channel_file = self.origin.get_channel_from_name(name) - # if(channel_file!=None): - # return channel_file - # return super().get_channel_from_name(name) + return "Main" - """def get_added_operations_structure(self): - return self.origin.get_added_operations_structure()""" - - #def check_in_channels(self, channel): - # found = super().check_in_channels(channel) - # if(not found): - # if(self.origin.get_type()=="Nextflow File"): - # return self.origin.check_in_channels(channel) - # else: - # raise Exception(f"The origin is a '{self.origin.get_type()}' it should be a 'Nextflow File'") - # return found - - def get_subworkflow_from_name(self, name): - return self.origin.get_subworkflow_from_name(name) def check_includes(self): code = self.get_code() pattern = constant.FULL_INCLUDE for match in re.finditer(pattern, code): - if(self.get_type()=="Main DSL2"): + if(self.get_type()=="Main"): raise BioFlowInsightError(f"An include ('{match.group(0)}') was found in the main in the file '{self.get_file_address()}'. FlowInsight does not support this -> see specification list.", num = 12,origin=self) elif(self.get_type()=="Subworkflow"): raise BioFlowInsightError(f"An include ('{match.group(0)}') was found in the subworkflow '{self.get_name()}' in the file '{self.get_file_address()}'. FlowInsight does not support this -> see specification list.", num = 12, origin=self) @@ -172,46 +70,11 @@ class Main(Nextflow_Building_Blocks): self.root.initialise() - ##Extract Executors - #self.extract_executors() - # - ##Analyse Executors - #for e in self.executors: - # e.initialise() - - - - - """def add_channels_structure(self, dot): - return self.add_channels_structure_temp(dot, self.origin.get_added_operations_structure()) - """ - def get_origin(self): - return self.origin - def check_same_origin(self, sub): - return self.get_origin()== sub.get_origin() - ##Add "global" channels and operation to the structure defined in the file - #def get_structure_DSL2(self, dico): - # self.origin.get_structure_DSL2(dico) - def get_structure(self, dico): self.root.get_structure(dico) return dico - - ##Add "global" channels and operation to the structure defined in the file - #self.get_structure_DSL2(dico) - # - # - #for e in self.executors: - # if(e.get_type()=="Operation"): - # e.get_structure(dico) - # elif(e.get_type()=="Call"): - # e.get_structure(dico) - # else: - # raise Exception(f"Executor of type '{e.get_type()}' was extracted in a DSL2 workflow! I don't know what this is! The code is '{e.get_code()}'") - - \ No newline at end of file diff --git a/src/nextflow_file.py b/src/nextflow_file.py index 4e591eb85eb06bd9d70265ddbc1d1e64b5f8fb03..c3724a34aa0b844454e3d2b5cd7afa8ff591c8ed 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -39,6 +39,10 @@ class Nextflow_File(Nextflow_Building_Blocks): #GENERAL #---------------------- + def get_string_line(self, bit_of_code): + return self.code.get_string_line(bit_of_code) + + #Method that returns the address of the file def get_file_address(self): return Path(os.path.normpath(self.address)) diff --git a/src/operation.py b/src/operation.py index 70bf34154c7c4c375d66b019176a920dcd217b83..d72bbdc0e946f5ac56db8da0d8775d03bd9b2964 100644 --- a/src/operation.py +++ b/src/operation.py @@ -86,6 +86,8 @@ class Operation(Executor): #and Check that the thing extarcted is not WorkflowNameFile like 'WorkflowHgtseq' in nf-core/hgtseq if(name not in constant.ERROR_WORDS_ORIGINS):# and name.lower()!=f"workflow{self.get_name_file().lower()}"): channel = Channel(name=name, origin=self.origin) + #TODO -> problem might be here + #TODO -> this needs to be checked if(self.origin.get_type()!="Subworkflow"): if(not self.origin.check_in_channels(channel)): @@ -119,25 +121,35 @@ class Operation(Executor): full_code = '.'.join(splited[:-1]) if(name_called not in IGNORE_NAMES): #print(self.origin.origin.get_name(), self.origin.origin.get_processes()) - process = self.get_process_from_name(name_called) - #print(process) - subworkflow = self.get_subworkflow_from_name(name_called) - - if(process!=None and subworkflow!=None): - raise Exception(f"Problem in get_element -> {name_called} exists as process and subworkflow") - #Case subworkflow - if(process==None and subworkflow!=None): - emitted = Emitted(name=full_code, origin=self.origin, emitted_by=subworkflow) + call = self.get_call_by_name(name_called) + #process = self.get_process_from_name(name_called) + ##print(process) + #subworkflow = self.get_subworkflow_from_name(name_called) + # + #if(process!=None and subworkflow!=None): + # raise Exception(f"Problem in get_element -> {name_called} exists as process and subworkflow") + ##Case subworkflow + #if(process==None and subworkflow!=None): + # emitted = Emitted(name=full_code, origin=self.origin, emitted_by=subworkflow) + # emitted.set_emits(name_emitted) + ##Case Process + #if(process!=None and subworkflow==None): + # emitted = Emitted(name=full_code, origin=self.origin, emitted_by=process) + # #TODO -> analyse the outputs of the process + # + #if(process==None and subworkflow==None): + # if(name_called[:5]=="Call_"): + # name_called = self.calls[name_called].get_code() + # raise BioFlowInsightError(f"The call for '{name_called}' coudn't be found, before its use in the operation '{self.get_code(get_OG=True)}'{self.get_string_line(self.get_code(get_OG=True))}. Either because the call wasn't made before the operation or that the element it is calling doesn't exist.", num =8, origin=self) + + if(call!=None): + emitted = Emitted(name=full_code, origin=self.origin, emitted_by=call) emitted.set_emits(name_emitted) - #Case Process - if(process!=None and subworkflow==None): - emitted = Emitted(name=full_code, origin=self.origin, emitted_by=process) - #TODO -> analyse the outputs of the process - - if(process==None and subworkflow==None): + else: if(name_called[:5]=="Call_"): name_called = self.calls[name_called].get_code() - raise BioFlowInsightError(f"The call for '{name_called}' coudn't be found, before its use in the operation '{self.get_code(get_OG=True)}'{self.get_string_line(self.get_code(get_OG=True))}. Either because the call wasn't made before the operation or that the element it is calling doesn't exist.", num =8, origin=self) + raise BioFlowInsightError(f"The call for '{name_called}' coudn't be found, before its use in the operation '{self.get_code(get_OG=True)}'{self.get_string_line(self.get_code(get_OG=True))}. Either because the call wasn't made before the operation or that the element it is calling doesn't exist.", num =8, origin=self) + emitted.add_sink(self) self.origins.append(emitted) diff --git a/src/outils.py b/src/outils.py index 240cadd622196b19f31093e0ee8a2547de1badbb..61906be272f0af809b6ff52afde9fb0e6a79b27a 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1041,10 +1041,10 @@ def extract_conditions(code): def process_2_DSL2(code): def replace_file_by_path(match): - if(match.group(1)==" "): - return "path " - else: - return "path(" + if(match.group(1)==" "): + return "path " + else: + return "path(" def replace_set_by_tuple(match): if(match.group(1)==" "): diff --git a/src/process.py b/src/process.py index b5bf580589d65ddcf8e7c10812fbc8eb38179401..0baa5363d6d68d9cfc7ed7bf1d1e98e79f302bb5 100644 --- a/src/process.py +++ b/src/process.py @@ -1,5 +1,6 @@ import re import glob +import copy from .code_ import Code from .condition import Condition @@ -29,6 +30,21 @@ class Process(Nextflow_Building_Blocks): self.called_by = []#List of calls + def copy(self): + process = copy.copy(self) + process.name = "" + process.alias = "" + process.printed_name = "" + process.inputs = [] + process.raw_input_names = []#This is used to convert DSL1 workflows to DSL2 + process.outputs = [] + process.input_code = "" + process.output_code = "" + process.when_code = "" + process.pusblishDir_code = "" + process.script_code = "" + process.called_by = []#List of calls + return process def add_to_emits(self, emit): self.later_emits.append(emit) diff --git a/src/root.py b/src/root.py index d275afd4d84a6e17d52b81b4c34198e78f31a0c5..3006ead384f22cda73ca4e308e278e7d420c30d1 100644 --- a/src/root.py +++ b/src/root.py @@ -31,6 +31,9 @@ class Root(Nextflow_Building_Blocks): return True return False + def get_channels(self): + return self.channels + def add_channel(self, channel): if(not self.check_in_channels(channel)): self.channels.append(channel) @@ -44,6 +47,17 @@ class Root(Nextflow_Building_Blocks): #raise Exception(f"{name} is not in the list of channels") return None + def get_executors(self): + return self.executors + + def get_calls(self): + tab = [] + for c in self.get_executors(): + if(c.get_type()=="Call"): + tab.append(c) + return tab + + def initialise(self): #Define the blocks code = self.get_code() @@ -67,13 +81,9 @@ class Root(Nextflow_Building_Blocks): e.initialise() #Initialise each subworkflow being called - for sub in self.elements_being_called: - if(sub.get_type()=="Subworkflow"): - sub.initialise() - - - def get_channels(self): - return self.channels + #for sub in self.elements_being_called: + # if(sub.get_type()=="Subworkflow"): + # sub.initialise() def get_process_from_name(self, name): for m in self.modules_defined: diff --git a/src/subworkflow.py b/src/subworkflow.py index 7db6cea153e5ffc8292a47baa9ec656ae5afece7..655c65ba1578adc0f9140fa76ee43d1e99ab0562 100644 --- a/src/subworkflow.py +++ b/src/subworkflow.py @@ -1,6 +1,7 @@ import re from . import constant from .code_ import Code +import copy from .root import Root from .main import Main from .bioflowinsighterror import BioFlowInsightError @@ -28,6 +29,27 @@ class Subworkflow(Main): self.called_by = []#List of calls + def copy(self): + sub = copy.copy(self) + sub.alias = self.name + sub.take = [] + sub.takes_channels = [] + sub.work = None + sub.emit = [] + sub.call = [] + sub.initialised = False + sub.later_emits = [] + sub.number_times_called = 0 + sub.called_by = [] + return sub + + def get_call_by_name(self, name): + for c in self.root.get_calls(): + c.initialise() + if(c.first_element_called.get_alias()==name): + return c + return None + def add_to_emits(self, emit): self.later_emits.append(emit) @@ -41,8 +63,6 @@ class Subworkflow(Main): return match.group(0).replace(match.group(1), self.get_alias()) return re.sub(r"workflow\s*(\w+)\s*\{", replacer, code) - def get_call(self): - return self.call def print_summary(self, tab = 0): print(" "*tab+f"* {self.name} ({self})") @@ -165,28 +185,7 @@ class Subworkflow(Main): self.takes_channels.append(channel) self.take = tab - #def initialise_emit(self): - # if(self.emit!=None): - # code = self.emit.get_code().split('\n') - # tab = [] - # for i in range(len(code)): - # code[i] = code[i].strip() - # channel = self.get_channel_from_name(code[i]) - # if(channel!=None): - # tab.append(channel) - # channel.add_sink(Operation(code=channel.get_name(), origin=self)) - # - # else: - # #Case it's an operation - # operation = Operation(code[i], self) - # operation.initialise() - # for gives in operation.get_gives(): - # tab.append(gives) - # #TODO -> check not add origin too! - # gives.add_sink(Operation(code=gives.get_name(), origin=self)) - # #self.add_operation(operation) - # self.executors.append(operation) - # self.emit = tab + def initialise_emit(self): from .operation import Operation @@ -243,6 +242,7 @@ class Subworkflow(Main): def initialise(self): if(not self.initialised): + self.initialised = True self.initialise_parts() self.initialise_takes() self.modules_defined = self.nextflow_file.get_modules_defined() @@ -251,7 +251,7 @@ class Subworkflow(Main): self.root = Root(code=self.get_work(), origin=self, modules_defined=self.modules_defined, subworkflow_inputs = self.takes_channels) self.root.initialise() self.initialise_emit() - self.initialised = True + def get_structure(self, dico): super().get_structure(dico) @@ -264,71 +264,4 @@ class Subworkflow(Main): #ope.set_operation_type("Branch") ope.get_structure(dico, to_remove = True) - def add_2_rocrate(self, dico, parent_key): - sub_key = self.get_rocrate_key(dico) - dico_sub = get_dico_from_tab_from_id(dico, sub_key) - if(dico_sub==None): - dico_sub = {} - dico_sub["@id"] = sub_key - dico_sub["name"] = "Subworkflow" - dico_sub["@type"] = ["SoftwareSourceCode", "ComputationalWorkflow"] - #TODO -> check if this remains true - #dico_main["conformsTo"] = {"@id": "https://bioschemas.org/profiles/ComputationalWorkflow/0.5-DRAFT-2020_07_21"} - #dico_main["dct:conformsTo"]= "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/" - - - #ADD INPUTS - dico_sub["input"] = [] - for input in self.get_takes(): - if(type(input)==str): - name_input = input - else: - name_input = input.get_code() - dico_input = get_dico_from_tab_from_id(dico, name_input) - if(dico_input==None): - dico_input = {"@id":f"#{name_input}", "@name": name_input, "@type": "FormalParameter"} - dico["@graph"].append(dico_input) - dico_sub["input"].append({"@id":dico_input["@id"]}) - #ADD OUTPUTS - dico_sub["output"] = [] - for output in self.get_emit(): - if(type(output)==str): - name_output = output - else: - name_output = output.get_code() - dico_output = get_dico_from_tab_from_id(dico, name_output) - if(dico_output==None): - dico_output = {"@id":f"#{name_output}", "@name": name_output, "@type": "FormalParameter"} - dico["@graph"].append(dico_output) - dico_sub["output"].append({"@id":dico_output["@id"]}) - - - dico_sub["isPartOf"] = [{"@id": parent_key}] - dico_sub["hasPart"] = [] - - - called = [] - for exe in self.get_executors(): - - if(exe.get_type()=="Call"): - called+=exe.get_elements_called() - else: - for o in exe.get_origins(): - if(o.get_type()=="Call"): - called+=o.get_elements_called() - - for c in called: - if(c==self): - raise Exception("This shoudn't happen!") - c.add_2_rocrate(dico, sub_key) - dico_sub["hasPart"].append({"@id":c.get_rocrate_key(dico)}) - - dico["@graph"].append(dico_sub) - else: - if(not check_if_element_in_tab_rocrate(dico_sub["isPartOf"], parent_key)): - dico_sub["isPartOf"].append({"@id":parent_key}) - - - - - + \ No newline at end of file