diff --git a/src/operation.py b/src/operation.py index ff93b0d83cd3a2612a136c63f67d0d0ca06cd4c5..11a5503a48cf093f44b1de65f705e87c7d49bcfc 100644 --- a/src/operation.py +++ b/src/operation.py @@ -84,35 +84,42 @@ class Operation(Executor): #I don't need to define the equivalent gives -> cause it's not possible:) def add_origin_emits(self, full_code, name_called, name_emitted): from .emitted import Emitted - #full_code, name_called, name_emitted = match.group(1), match.group(2), match.group(3) - IGNORE_NAMES = ['params'] - #In the cas an operator is extracted at the end of the emit - if(full_code.count('.')>=2): - splited = full_code.split('.') - if( splited[-1] in constant.LIST_OPERATORS): - full_code = '.'.join(splited[:-1]) - if(name_called not in IGNORE_NAMES): - process = self.origin.get_process_from_name(name_called) - subworkflow = self.origin.get_subworkflow_from_name(name_called) - - if(process!=None and subworkflow!=None): - raise Exception(f"Problem in get_element -> {name_called} exists as process and subworkflow") - #Case subworkflow - if(process==None and subworkflow!=None): - emitted = Emitted(name=full_code, origin=self.origin, emitted_by=subworkflow) - emitted.set_emits(name_emitted) - #Case Process - if(process!=None and subworkflow==None): - emitted = Emitted(name=full_code, origin=self.origin, emitted_by=process) - #TODO -> analyse the outputs of the process - - if(process==None and subworkflow==None): - if(name_called[:5]=="Call_"): - name_called = self.calls[name_called].get_code() - raise BioFlowInsightError(f"The call for '{name_called}' coudn't be found, before its use in the operation '{self.get_code(get_OG=True)}'{self.get_string_line(self.get_code(get_OG=True))}. Either because the call wasn't made before the operation or that the element it is calling doesn't exist.", num =8, origin=self) - - emitted.add_sink(self) - self.origins.append(emitted) + #Check that it not already been added + added = False + for o in self.origins: + if(full_code==o.get_code()): + added = True + + if(not added): + #full_code, name_called, name_emitted = match.group(1), match.group(2), match.group(3) + IGNORE_NAMES = ['params'] + #In the cas an operator is extracted at the end of the emit + if(full_code.count('.')>=2): + splited = full_code.split('.') + if( splited[-1] in constant.LIST_OPERATORS): + full_code = '.'.join(splited[:-1]) + if(name_called not in IGNORE_NAMES): + process = self.origin.get_process_from_name(name_called) + subworkflow = self.origin.get_subworkflow_from_name(name_called) + + if(process!=None and subworkflow!=None): + raise Exception(f"Problem in get_element -> {name_called} exists as process and subworkflow") + #Case subworkflow + if(process==None and subworkflow!=None): + emitted = Emitted(name=full_code, origin=self.origin, emitted_by=subworkflow) + emitted.set_emits(name_emitted) + #Case Process + if(process!=None and subworkflow==None): + emitted = Emitted(name=full_code, origin=self.origin, emitted_by=process) + #TODO -> analyse the outputs of the process + + if(process==None and subworkflow==None): + if(name_called[:5]=="Call_"): + name_called = self.calls[name_called].get_code() + raise BioFlowInsightError(f"The call for '{name_called}' coudn't be found, before its use in the operation '{self.get_code(get_OG=True)}'{self.get_string_line(self.get_code(get_OG=True))}. Either because the call wasn't made before the operation or that the element it is calling doesn't exist.", num =8, origin=self) + + emitted.add_sink(self) + self.origins.append(emitted) #This methods checks if the input is an emit and adds it if it's the case, it also returns T/F if it's an emit def check_is_emit(self, name): @@ -171,28 +178,35 @@ class Operation(Executor): channel.add_source(self) def add_origin(self, name): - if(self.origin.get_DSL()=="DSL2"): - #Case it's a call and it's been replaced - if(re.fullmatch(constant.CALL_ID, name)): - self.origins.append(self.calls[name]) + #Check that it's not already been added + added = False + for o in self.origins: + if(name==o.get_code()): + added = True + + if(not added): + if(self.origin.get_DSL()=="DSL2"): + #Case it's a call and it's been replaced + if(re.fullmatch(constant.CALL_ID, name)): + self.origins.append(self.calls[name]) + else: + ##Case it's a subworkflow + #subworkflow = self.origin.get_subworkflow_from_name(name) + #process = self.origin.get_process_from_name(name) + #if(subworkflow!=None): + # print("George it's a subworkflow") + # #Case suborkflow + # self.origins.append(subworkflow) + ##Case process + #elif(process!=None): + # print("George it's a process") + # #Case process + # self.origins.append(process) + ##In this case it's a channel + #else: + self.add_origin_channel(name) else: - ##Case it's a subworkflow - #subworkflow = self.origin.get_subworkflow_from_name(name) - #process = self.origin.get_process_from_name(name) - #if(subworkflow!=None): - # print("George it's a subworkflow") - # #Case suborkflow - # self.origins.append(subworkflow) - ##Case process - #elif(process!=None): - # print("George it's a process") - # #Case process - # self.origins.append(process) - ##In this case it's a channel - #else: self.add_origin_channel(name) - else: - self.add_origin_channel(name) #Function that from an operation gives the origin ('input') channels diff --git a/src/process.py b/src/process.py index dc1f16820173fdf92206154027b552c409e630d6..ca7d81b8fce88c188218cab584cbfe2bf9638cd2 100644 --- a/src/process.py +++ b/src/process.py @@ -75,6 +75,7 @@ class Process(Nextflow_Building_Blocks): def get_nb_outputs(self): return len(self.outputs) + #TODO -> Have a much better way of doing this def extract_tools(self): script = self.script_code.lower() for tool in constant.TOOLS: diff --git a/src/ro_crate.py b/src/ro_crate.py index 073e9b183f2d07109f09b8ca8b7db698f5db0f40..0e4b85ab2e0f2f4979e8baff2bbe18072f2bfbde 100644 --- a/src/ro_crate.py +++ b/src/ro_crate.py @@ -105,7 +105,7 @@ class RO_Crate: return f"{year}-{month}-{day}" return None - + #TODO -> update this -> it's incomplet def get_url(self, file): if(self.workflow.dico!={}): return f"https://github.com/{self.workflow.get_address()}/blob/main/{file}" @@ -114,8 +114,8 @@ class RO_Crate: def get_creators(self, file): info = self.fill_log_file(file, reverse = True) - for match in re.finditer(r"Author: ([ \w-]+) <([^>]+)>", info): - return [{"@id": match.group(1)}] + for match in re.finditer(r"Author: ([^>]+)<([^>]+)>",info): + return [{"@id": match.group(1).strip()}] return None diff --git a/src/workflow.py b/src/workflow.py index e69d9691ae030a5e0a747085eb9d2e7fe493a20f..b29ea75bb39edab8f5cd7c0265fc5a97df092b6e 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -134,8 +134,8 @@ class Workflow: def get_authors(self): if(self.authors==None): authors = {} - for match in re.finditer(r"Author: ([ \w-]+) <([^>]+)>",self.log): - authors[match.group(2)] = match.group(1) + for match in re.finditer(r"Author: ([^>]+)<([^>]+)>",self.log): + authors[match.group(2)] = match.group(1).strip() tab = [] for author in authors: #tab.append({"@id":author, "name":authors[author]})