From 269ca16c12eecc729261dd34248e344831de9728 Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Fri, 23 Aug 2024 10:48:54 +0200 Subject: [PATCH] add citations to output + added modifications to Ro-Crate --- README.md | 4 ++-- src/nextflow_file.py | 4 ++-- src/process.py | 6 +++--- src/ro_crate.py | 9 ++++++--- src/subworkflow.py | 4 ++-- src/workflow.py | 7 +++++++ 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 42dbd76..92f97da 100644 --- a/README.md +++ b/README.md @@ -122,10 +122,10 @@ The structure of this folder is organised as such : │  ├── specification_wo_orphan_operations_wo_labels.dot │  ├── specification_wo_orphan_operations_wo_labels.mmd │  └── specification_wo_orphan_operations_wo_labels.png -└── ro-crate-metadata-rnaseq-nf.json +└── ro-crate-metadata.json ``` -* The `ro-crate-metadata-rnaseq-nf.json` describes the workflow following an extended Workflow [RO-Crate](https://www.researchobject.org/ro-crate/) profile. The description of this extended profile can be found [here](https://gitlab.liris.cnrs.fr/sharefair/posters/swat4hcls-2024). +* The `ro-crate-metadata.json` describes the workflow following an extended Workflow [RO-Crate](https://www.researchobject.org/ro-crate/) profile. The description of this extended profile can be found [here](https://gitlab.liris.cnrs.fr/sharefair/posters/swat4hcls-2024). * the `debug` folder contains different intermediary files which are ussefull for debugging * the `graphs` folder contains the different graphs which are generated. For each of the 3 graphs described above, **BioFlow-Insight** generates : * A `json` file which describes the graph using **BioFlow-Insight** specific format diff --git a/src/nextflow_file.py b/src/nextflow_file.py index 4a43c44..c6588d6 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -706,7 +706,7 @@ class Nextflow_File(Nextflow_Building_Blocks): # self.graph.get_metadata_graph_wo_operations() def add_main_DSL1_2_rocrate(self, dico, file_dico, file_name): - main_key = f"{file_name}/main" + main_key = f"{file_name}#main" file_dico["hasPart"].append(main_key) dico_main = {} dico_main["@id"] = main_key @@ -719,7 +719,7 @@ class Nextflow_File(Nextflow_Building_Blocks): dico_main["output"] = [] dico_main["isPartOf"] = [{"@id": file_name}] dico_main["hasPart"] = [] - self.add_processes_2_rocrate(dico, dico_main, main_key) + self.add_processes_2_rocrate(dico, dico_main, main_key.split("#")[0]) dico["@graph"].append(dico_main) def add_processes_2_rocrate(self, dico, file_dico, file_name): diff --git a/src/process.py b/src/process.py index e5b25ac..c825e4c 100644 --- a/src/process.py +++ b/src/process.py @@ -434,7 +434,7 @@ class Process(Nextflow_Building_Blocks): dico_process = {} dico_process["@id"] = process_key dico_process["name"] = "Process" - dico_process["@type"] = ["SoftwareSourceCode", "Script"] + dico_process["@type"] = ["SoftwareSourceCode"] #ADD INPUTS dico_process["input"] = [] for input in self.get_inputs(): @@ -444,7 +444,7 @@ class Process(Nextflow_Building_Blocks): name_input = input.get_code() dico_input = get_dico_from_tab_from_id(dico, name_input) if(dico_input==None): - dico_input = {"@id":name_input, "@type": "FormalParameter"} + dico_input = {"@id":f"#{name_input}", "@name": name_input, "@type": "FormalParameter"} dico["@graph"].append(dico_input) dico_process["input"].append({"@id":dico_input["@id"]}) #ADD OUTPUTS @@ -456,7 +456,7 @@ class Process(Nextflow_Building_Blocks): name_output = output.get_code() dico_output = get_dico_from_tab_from_id(dico, name_output) if(dico_output==None): - dico_output = {"@id":name_output, "@type": "FormalParameter"} + dico_output = {"@id":f"#{name_output}", "@name": name_output, "@type": "FormalParameter"} dico["@graph"].append(dico_output) dico_process["output"].append({"@id":dico_output["@id"]}) #ADD isPartOf diff --git a/src/ro_crate.py b/src/ro_crate.py index 793a680..804ba34 100644 --- a/src/ro_crate.py +++ b/src/ro_crate.py @@ -47,9 +47,11 @@ class RO_Crate: tab_authors = [] for author in authors: try: - tab_authors.append({"@id":author["@id"], "email":author["email"]}) + #tab_authors.append({"@id":author["@id"], "email":author["email"]}) + tab_authors.append({"@id":f'#{"_".join(author["@id"].split())}', "@name":author["@id"],"email":author["email"]}) except: - tab_authors.append({"@id":author["@id"]}) + #tab_authors.append({"@id":author["@id"]}) + tab_authors.append({"@id":f'#{"_".join(author["@id"].split())}', "@name":author["@id"]}) root["author"] = tab_authors root["maintainer"] = tab_authors #Right now i'm assuming that all the authors are maintainers files = self.get_files() @@ -163,5 +165,6 @@ class RO_Crate: name = name.replace('github.com/', '') name = re.sub(r"^[ .]|[/<>:\"\\|?*]+|[ .]$", "-", name) - with open(f"{self.workflow.get_output_dir()}/ro-crate-metadata-{name}.json", 'w') as output_file : + #with open(f"{self.workflow.get_output_dir()}/ro-crate-metadata-{name}.json", 'w') as output_file : + with open(f"{self.workflow.get_output_dir()}/ro-crate-metadata.json", 'w') as output_file : json.dump(self.dico, output_file, indent=2) \ No newline at end of file diff --git a/src/subworkflow.py b/src/subworkflow.py index 363a7db..d468e06 100644 --- a/src/subworkflow.py +++ b/src/subworkflow.py @@ -256,7 +256,7 @@ class Subworkflow(Main_DSL2): name_input = input.get_code() dico_input = get_dico_from_tab_from_id(dico, name_input) if(dico_input==None): - dico_input = {"@id":name_input, "@type": "FormalParameter"} + dico_input = {"@id":f"#{name_input}", "@name": name_input, "@type": "FormalParameter"} dico["@graph"].append(dico_input) dico_sub["input"].append({"@id":dico_input["@id"]}) #ADD OUTPUTS @@ -268,7 +268,7 @@ class Subworkflow(Main_DSL2): name_output = output.get_code() dico_output = get_dico_from_tab_from_id(dico, name_output) if(dico_output==None): - dico_output = {"@id":name_output, "@type": "FormalParameter"} + dico_output = {"@id":f"#{name_output}", "@name": name_output, "@type": "FormalParameter"} dico["@graph"].append(dico_output) dico_sub["output"].append({"@id":dico_output["@id"]}) diff --git a/src/workflow.py b/src/workflow.py index 925193d..96b6b5e 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -71,6 +71,7 @@ class Workflow: self.workflow_directory = '/'.join(file.split('/')[:-1]) self.output_dir = Path(output_dir) self.rocrate = None + self.display_info = display_info self.name = name self.datePublished = datePublished self.description = description @@ -505,6 +506,12 @@ class Workflow: self.nextflow_file.initialise() if(create_rocrate): self.initialise_rocrate() + + if(self.display_info): + citation = """To cite BioFlow-Insight, please use the following publication: +George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen-Boulakia, BioFlow-Insight: facilitating reuse of Nextflow workflows with structure reconstruction and visualization, NAR Genomics and Bioinformatics, Volume 6, Issue 3, September 2024, lqae092, https://doi.org/10.1093/nargab/lqae092""" + print() + print(citation) def generate_all_graphs(self, render_graphs = True): """Method that generates all graphs representing the workflow -- GitLab