diff --git a/src/DBfile.py b/src/DBfile.py index a416c3bebce8b668008df18d4632f5437ad5eb5d..66e5510fcf68740095737f236617f1c2a4724d73 100644 --- a/src/DBfile.py +++ b/src/DBfile.py @@ -2,10 +2,19 @@ import json import glob import os import re -from .ro_crate import RO_Crate +import ctypes +from collections import OrderedDict + + +from .ro_crate import RO_Crate from . import constant + +def get_object(address): + address = int(re.findall(r"\dx\w+", address)[0], base=16) + return ctypes.cast(address, ctypes.py_object).value + class DBfile(RO_Crate): def __init__(self, workflow, personnal_acces_token = None, @@ -21,8 +30,6 @@ class DBfile(RO_Crate): @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . @prefix efo: <https://www.ebi.ac.uk/gwas/efotraits/> .""" - print(self.info_dico_workflow) - #print(self.log) def initialise(self): @@ -44,20 +51,45 @@ class DBfile(RO_Crate): steps_string.append(f"step{p.get_alias()}") steps_string = ", ".join(steps_string) - #TODO update the input and output of the workflow + link_dico_processes = self.workflow.get_link_dico_processes() + first_processes, last_processes = list(link_dico_processes.keys()), [] + for p in link_dico_processes: + if(link_dico_processes[p]==[]): + last_processes.append(get_object(p)) + else: + for p2 in link_dico_processes[p]: + try: + first_processes.remove(p2) + except: + None + temp = [] + for p in first_processes: + temp.append(get_object(p)) + first_processes = temp + + inputs = [] + for p in first_processes: + inputs+= p.get_inputs_DBfile() + outputs = [] + for p in last_processes: + outputs+= p.get_outputs_DBfile() + + header = f"""### Main workflow sf:{workflow_name}Workflow rdf:type sf:Workflow ; - schema:name "{workflow_long_name}" ; - schema:description "{description}" ; - schema:creator <{creator}> ; - schema:maintainer <{maintainer}> ; - schema:dateCreated "f{date_created}"^^xsd:date ; - schema:programmingLanguage <https://w3id.org/workflowhub/workflow-ro-crate#nextflow> ; - schema:step {steps_string} ; - sf:inputVariable sf:samplesheet, sf:ch_versions, sf:ch_fasta, - sf:ch_fasta_index, sf:ch_bismark_index, sf:ch_bwameth_index ; - sf:outputVariable sf:output_bam, sf:output_bai, sf:output_qualimap, sf:output_preseq, sf:output_versions ; - schema:keywords "{keywords}" .""" +\tschema:name "{workflow_long_name}" ; +\tschema:description "{description}" ; +\tschema:creator <{creator}> ; +\tschema:maintainer <{maintainer}> ; +\tschema:dateCreated "f{date_created}"^^xsd:date ; +\tschema:programmingLanguage <https://w3id.org/workflowhub/workflow-ro-crate#nextflow> ; +\tschema:step {steps_string} ;""" + if(inputs!=[]): + header+=f"\n\tsf:inputVariable {', '.join(inputs)} ;" + if(outputs!=[]): + header+=f"\n\tsf:outputVariable {', '.join(outputs)} ;" + header+=f'\n\tschema:keywords "{keywords}" .' + self.file_contents+=f"\n\n{header}" @@ -66,8 +98,21 @@ sf:{workflow_name}Workflow rdf:type sf:Workflow ; self.file_contents+="""\n\n<https://w3id.org/workflowhub/workflow-ro-crate#nextflow> rdf:type schema:ComputerLanguage ; rdfs:label "Nextflow" .""" - print(self.file_contents) - #TODO ADD Inputs values - #TODO ADD Outputs value + for p in self.workflow.get_processes_called(): + self.file_contents+="\n\n"+p.get_DBfile_description() + + all_calls = self.workflow.get_workflow_main().get_all_calls_in_subworkflow() + for c in all_calls: + if(c.get_first_element_called().get_type()=="Subworkflow"): + sub = c.get_first_element_called() + self.file_contents+="\n\n"+sub.get_DBfile_description(workflow_name) + + #This is to remove multiple lines + self.file_contents = "\n\n".join(list(OrderedDict.fromkeys(self.file_contents.split("\n\n")))) + + + with open(f"{self.workflow.get_output_dir()}/DBfile.ttl", 'w') as output_file : + output_file.write(self.file_contents) + diff --git a/src/graph.py b/src/graph.py index a06890ff0de98a98570ca61194711f459af9943c..c03f4f342f66219f9530dc78dbc67c96b3d13c42 100644 --- a/src/graph.py +++ b/src/graph.py @@ -24,6 +24,7 @@ class Graph(): # json.dump(self.full_dico, output_file, indent=4) #This dico give for the nodes its sister nodes self.link_dico = None + self.link_dico_processes = None #Dico to graph without operations self.dico_process_dependency_graph = {} self.user_view = {} @@ -147,6 +148,11 @@ class Graph(): if(self.link_dico==None): self.link_dico = initia_link_dico_rec(self.full_dico) + #Method that initalisise the link process link dico + def intia_link_dico_processes(self): + if(self.link_dico_processes==None): + self.link_dico_processes = initia_link_dico_rec(self.dico_process_dependency_graph) + def get_specification_graph(self, dirc = 'graphs', filename = "specification_graph", render_graphs = True): generate_graph(self.get_output_dir()/ dirc /filename, self.full_dico, render_graphs = render_graphs) #generate_graph(self.get_output_dir()/ dirc /(filename+"_without_artificial_nodes"), remove_artificial_nodes(self.full_dico), render_graphs = render_graphs) @@ -340,6 +346,7 @@ class Graph(): add_edges(dico, condition="", checking_conditions=False) self.dico_process_dependency_graph = dico + self.intia_link_dico_processes() with open(f"{self.get_output_dir()}/graphs/process_dependency_graph.json", 'w') as output_file : json.dump(self.dico_process_dependency_graph, output_file, indent=4) diff --git a/src/nextflow_building_blocks.py b/src/nextflow_building_blocks.py index c27c2d09ceb854ebb17ea4b02d1b3c6c90da57eb..28591c5a3e35be94e03a109b15f598171d2c161b 100644 --- a/src/nextflow_building_blocks.py +++ b/src/nextflow_building_blocks.py @@ -57,6 +57,13 @@ class Nextflow_Building_Blocks: except: return self.nextflow_file + def get_link_dico_processes(self): + try: + return self.origin.get_link_dico_processes() + except: + return self.nextflow_file.get_link_dico_processes() + + def get_display_info(self): return self.origin.get_display_info() diff --git a/src/nextflow_file.py b/src/nextflow_file.py index f4aef0e3aa0268a230e6b60e4215ce3fb5774699..2f06666fc238b09213fb37904265877e7ab9c448 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -38,6 +38,9 @@ class Nextflow_File(Nextflow_Building_Blocks): #GENERAL #---------------------- + def get_link_dico_processes(self): + return self.workflow.get_link_dico_processes() + def get_cycle_status(self): return self.workflow.get_cycle_status() diff --git a/src/process.py b/src/process.py index 3ffb410d3c6f8d5291160ad7e74794a5aef11f2f..6a97f93e16efecef436a3ec7bc3e18e37fa99da0 100644 --- a/src/process.py +++ b/src/process.py @@ -1,6 +1,7 @@ import re import glob import copy +import ctypes from .code_ import Code from .condition import Condition @@ -10,6 +11,10 @@ from .bioflowinsighterror import BioFlowInsightError from . import constant +def get_object(address): + address = int(re.findall(r"\dx\w+", address)[0], base=16) + return ctypes.cast(address, ctypes.py_object).value + class Process(Nextflow_Building_Blocks): def __init__(self, code, nextflow_file): self.nextflow_file = nextflow_file @@ -648,6 +653,69 @@ class Process(Nextflow_Building_Blocks): if(not check_if_element_in_tab_rocrate(dico_process["isPartOf"], parent_key)): dico_process["isPartOf"].append({"@id":parent_key}) self.get_nextflow_file().add_to_has_part(dico, process_key) + + + def get_inputs_DBfile(self): + input_params = self.get_input_parameters() + for i in range(len(input_params)): + input_params[i] = f"sf:{input_params[i]}" + return list(set(input_params)) + + def get_outputs_DBfile(self): + output_params = self.get_outputs() + for i in range(len(output_params)): + output_params[i] = output_params[i].replace(",", "\\,") + output_params[i] = output_params[i].replace("'", "\\'") + output_params[i] = output_params[i].replace('"', '\\"') + output_params[i] = f'sf:"{output_params[i]}"' + return list(set(output_params)) + + def get_DBfile_description(self): + text = "" + text+= f"# {self.get_alias()} Step" + text+= f"\nsf:step{self.get_alias()} rdf:type sf:Step" + + input_params = self.get_inputs_DBfile() + if(len(input_params)!=0): + text+= f" ;\n\tsf:inputVariable {', '.join(input_params)}" + + output_params = self.get_outputs_DBfile() + if(len(output_params)!=0): + text+= f" ;\n\tsf:outputVariable {', '.join(output_params)}" + + tools = self.get_tools() + for i in range(len(tools)): + tools[i] = f"sf:TOOL{tools[i]}" + if(len(tools)!=0): + text+= f" ;\n\tsf:use {', '.join(tools)}" + + isFollowedBy = self.get_link_dico_processes()[str(self)] + for i in range(len(isFollowedBy)): + isFollowedBy[i] = f'sf:step{get_object(isFollowedBy[i]).get_alias()}' + if(len(isFollowedBy)!=0): + text+= f" ;\n\tsf:isFollowedBy {', '.join(isFollowedBy)}" + + text+=" ." + + for t in tools: + name = t[7:]#To remove the sf:TOOL" + text+= f"""\n\n{t} rdf:type sf:Command ; +\tschema:name "{name}" ; +\tschema:url <https://bio.tools/t?page=1&q={name}&sort=score> .""" + + + + for input in input_params: + text+=f"\n\n{input} rdf:type sf:Variable ." + + for output in output_params: + text+=f"\n\n{output} rdf:type sf:Variable ." + + return text + + + + diff --git a/src/subworkflow.py b/src/subworkflow.py index fc16dd2381a903d70c9632f3985ebf163417f96b..23fd62ca8c0c9fc2f8b46c7aa90045a1faf11c3f 100644 --- a/src/subworkflow.py +++ b/src/subworkflow.py @@ -524,4 +524,52 @@ class Subworkflow(Main): dico_sub["isPartOf"].append({"@id":parent_key}) self.get_nextflow_file().add_to_has_part(dico, sub_key) + + def get_DBfile_description(self, workflow_name): + text = "" + text+= f"# {self.get_alias()} Subworkflow" + text+=f"""\nsf:subworkflow{self.get_alias()} rdf:type sf:Subworkflow ; +\tp-plan:isSubPlanOf sf:{workflow_name}""" + + processes_called, subworkflows_called = [], [] + all_calls = self.get_all_calls_in_subworkflow() + for c in all_calls: + if(c.get_first_element_called().get_type()=="Process"): + processes_called.append(c.get_first_element_called()) + if(c.get_first_element_called().get_type()=="Subworkflow"): + subworkflows_called.append(c.get_first_element_called()) + + for i in range(len(processes_called)): + processes_called[i] = f'sf:step{processes_called[i].get_alias()}' + if(len(processes_called)!=0): + text+= f" ;\n\tschema:step {', '.join(processes_called)}" + + input_params = [] + for input in self.get_takes(): + input_params.append(f"sf:{input.get_code(get_OG = True)}") + if(len(input_params)!=0): + text+= f" ;\n\tsf:inputVariable {', '.join(input_params)}" + + output_params = [] + for output in self.get_emit(): + val = output.get_code(get_OG = True) + val = val.replace(",", "\\,") + val = val.replace("'", "\\'") + val = val.replace('"', '\\"') + output_params.append(f"sf:{val}") + if(len(output_params)!=0): + text+= f" ;\n\tsf:outputVariable {', '.join(output_params)}" + text+=" ." + + for input in input_params: + text+=f"\n\n{input} rdf:type sf:Variable ." + + for output in output_params: + text+=f"\n\n{output} rdf:type sf:Variable ." + + for sub in subworkflows_called: + text+=f'\n\n{sub.get_DBfile_description(f"subworkflow{self.get_alias()}")}' + + return text + \ No newline at end of file diff --git a/src/workflow.py b/src/workflow.py index bb4bdaab995fc1c0ce666b3f9af6f3885db3a5e9..e8e0d711f77e942a71df99b7fa56bad567d45e82 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -180,7 +180,10 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen tools = p.get_tools() self.alias_2_tools[p.get_alias()] = tools self.scripts_2_tools[p.get_script_code()] = tools - + + def get_link_dico_processes(self): + self.graph.initialise() + return self.graph.link_dico_processes def iniatilise_tab_processes_2_remove(self): if(self.processes_2_remove==None):