From 269ca16c12eecc729261dd34248e344831de9728 Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Fri, 23 Aug 2024 10:48:54 +0200
Subject: [PATCH] add citations to output + added modifications to Ro-Crate

---
 README.md            | 4 ++--
 src/nextflow_file.py | 4 ++--
 src/process.py       | 6 +++---
 src/ro_crate.py      | 9 ++++++---
 src/subworkflow.py   | 4 ++--
 src/workflow.py      | 7 +++++++
 6 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 42dbd76..92f97da 100644
--- a/README.md
+++ b/README.md
@@ -122,10 +122,10 @@ The structure of this folder is organised as such :
 │   ├── specification_wo_orphan_operations_wo_labels.dot
 │   ├── specification_wo_orphan_operations_wo_labels.mmd
 │   └── specification_wo_orphan_operations_wo_labels.png
-└── ro-crate-metadata-rnaseq-nf.json
+└── ro-crate-metadata.json
 ```
 
-* The `ro-crate-metadata-rnaseq-nf.json` describes the workflow following an extended Workflow [RO-Crate](https://www.researchobject.org/ro-crate/) profile. The description of this extended profile can be found [here](https://gitlab.liris.cnrs.fr/sharefair/posters/swat4hcls-2024).
+* The `ro-crate-metadata.json` describes the workflow following an extended Workflow [RO-Crate](https://www.researchobject.org/ro-crate/) profile. The description of this extended profile can be found [here](https://gitlab.liris.cnrs.fr/sharefair/posters/swat4hcls-2024).
 * the `debug` folder contains different intermediary files which are ussefull for debugging
 * the `graphs` folder contains the different graphs which are generated. For each of the 3 graphs described above, **BioFlow-Insight** generates :
   * A `json` file which describes the graph using **BioFlow-Insight** specific format
diff --git a/src/nextflow_file.py b/src/nextflow_file.py
index 4a43c44..c6588d6 100644
--- a/src/nextflow_file.py
+++ b/src/nextflow_file.py
@@ -706,7 +706,7 @@ class Nextflow_File(Nextflow_Building_Blocks):
     #    self.graph.get_metadata_graph_wo_operations()
     
     def add_main_DSL1_2_rocrate(self, dico, file_dico, file_name):
-        main_key = f"{file_name}/main"
+        main_key = f"{file_name}#main"
         file_dico["hasPart"].append(main_key)
         dico_main = {}
         dico_main["@id"] = main_key
@@ -719,7 +719,7 @@ class Nextflow_File(Nextflow_Building_Blocks):
         dico_main["output"] = []
         dico_main["isPartOf"] = [{"@id": file_name}]
         dico_main["hasPart"] = []
-        self.add_processes_2_rocrate(dico, dico_main, main_key)
+        self.add_processes_2_rocrate(dico, dico_main, main_key.split("#")[0])
         dico["@graph"].append(dico_main)
 
     def add_processes_2_rocrate(self, dico, file_dico, file_name):
diff --git a/src/process.py b/src/process.py
index e5b25ac..c825e4c 100644
--- a/src/process.py
+++ b/src/process.py
@@ -434,7 +434,7 @@ class Process(Nextflow_Building_Blocks):
             dico_process = {}
             dico_process["@id"] = process_key
             dico_process["name"] = "Process"
-            dico_process["@type"] = ["SoftwareSourceCode", "Script"]
+            dico_process["@type"] = ["SoftwareSourceCode"]
             #ADD INPUTS
             dico_process["input"] = []
             for input in self.get_inputs():
@@ -444,7 +444,7 @@ class Process(Nextflow_Building_Blocks):
                     name_input = input.get_code()
                 dico_input = get_dico_from_tab_from_id(dico, name_input)
                 if(dico_input==None):
-                    dico_input = {"@id":name_input, "@type": "FormalParameter"}
+                    dico_input = {"@id":f"#{name_input}", "@name": name_input, "@type": "FormalParameter"}
                     dico["@graph"].append(dico_input)
                 dico_process["input"].append({"@id":dico_input["@id"]})
             #ADD OUTPUTS
@@ -456,7 +456,7 @@ class Process(Nextflow_Building_Blocks):
                     name_output = output.get_code()
                 dico_output = get_dico_from_tab_from_id(dico, name_output)
                 if(dico_output==None):
-                    dico_output = {"@id":name_output, "@type": "FormalParameter"}
+                    dico_output = {"@id":f"#{name_output}", "@name": name_output, "@type": "FormalParameter"}
                     dico["@graph"].append(dico_output)
                 dico_process["output"].append({"@id":dico_output["@id"]})
             #ADD isPartOf
diff --git a/src/ro_crate.py b/src/ro_crate.py
index 793a680..804ba34 100644
--- a/src/ro_crate.py
+++ b/src/ro_crate.py
@@ -47,9 +47,11 @@ class RO_Crate:
         tab_authors = []
         for author in authors:
             try:
-                tab_authors.append({"@id":author["@id"], "email":author["email"]})
+                #tab_authors.append({"@id":author["@id"], "email":author["email"]})
+                tab_authors.append({"@id":f'#{"_".join(author["@id"].split())}', "@name":author["@id"],"email":author["email"]})
             except:
-                tab_authors.append({"@id":author["@id"]})
+                #tab_authors.append({"@id":author["@id"]})
+                tab_authors.append({"@id":f'#{"_".join(author["@id"].split())}', "@name":author["@id"]})
         root["author"] = tab_authors
         root["maintainer"] = tab_authors #Right now i'm assuming that all the authors are maintainers
         files = self.get_files()
@@ -163,5 +165,6 @@ class RO_Crate:
         name = name.replace('github.com/', '')
         name = re.sub(r"^[ .]|[/<>:\"\\|?*]+|[ .]$", "-", name)
 
-        with open(f"{self.workflow.get_output_dir()}/ro-crate-metadata-{name}.json", 'w') as output_file :
+        #with open(f"{self.workflow.get_output_dir()}/ro-crate-metadata-{name}.json", 'w') as output_file :
+        with open(f"{self.workflow.get_output_dir()}/ro-crate-metadata.json", 'w') as output_file :
             json.dump(self.dico, output_file, indent=2)
\ No newline at end of file
diff --git a/src/subworkflow.py b/src/subworkflow.py
index 363a7db..d468e06 100644
--- a/src/subworkflow.py
+++ b/src/subworkflow.py
@@ -256,7 +256,7 @@ class Subworkflow(Main_DSL2):
                     name_input = input.get_code()
                 dico_input = get_dico_from_tab_from_id(dico, name_input)
                 if(dico_input==None):
-                    dico_input = {"@id":name_input, "@type": "FormalParameter"}
+                    dico_input = {"@id":f"#{name_input}", "@name": name_input, "@type": "FormalParameter"}
                     dico["@graph"].append(dico_input)
                 dico_sub["input"].append({"@id":dico_input["@id"]})
             #ADD OUTPUTS
@@ -268,7 +268,7 @@ class Subworkflow(Main_DSL2):
                     name_output = output.get_code()
                 dico_output = get_dico_from_tab_from_id(dico, name_output)
                 if(dico_output==None):
-                    dico_output = {"@id":name_output, "@type": "FormalParameter"}
+                    dico_output = {"@id":f"#{name_output}", "@name": name_output, "@type": "FormalParameter"}
                     dico["@graph"].append(dico_output)
                 dico_sub["output"].append({"@id":dico_output["@id"]})
 
diff --git a/src/workflow.py b/src/workflow.py
index 925193d..96b6b5e 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -71,6 +71,7 @@ class Workflow:
         self.workflow_directory = '/'.join(file.split('/')[:-1])
         self.output_dir = Path(output_dir)
         self.rocrate = None
+        self.display_info = display_info
         self.name = name
         self.datePublished = datePublished
         self.description = description
@@ -505,6 +506,12 @@ class Workflow:
         self.nextflow_file.initialise()
         if(create_rocrate):
             self.initialise_rocrate()
+        
+        if(self.display_info):
+            citation = """To cite BioFlow-Insight, please use the following publication:
+George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen-Boulakia, BioFlow-Insight: facilitating reuse of Nextflow workflows with structure reconstruction and visualization, NAR Genomics and Bioinformatics, Volume 6, Issue 3, September 2024, lqae092, https://doi.org/10.1093/nargab/lqae092"""
+            print()
+            print(citation)
 
     def generate_all_graphs(self, render_graphs = True):
         """Method that generates all graphs representing the workflow
-- 
GitLab