Skip to content
Snippets Groups Projects
Commit 20cd8255 authored by George Marchment's avatar George Marchment
Browse files

Update -> ro-crate

parent 9f1bffdc
No related branches found
No related tags found
No related merge requests found
import json
import glob
import os
class RO_Crate:
def __init__(self, workflow):
self.nextflow_file = workflow
self.directroy = '/'.join(workflow.get_file_address().split('/')[:-1])
self.workflow = workflow
self.directory = '/'.join(workflow.get_file_address().split('/')[:-1])
self.files = []
self.dico = {}
def get_files(self):
self.files = glob.glob(f'{self.directory}/**/*.*', recursive=True)
tab_files = []
for file in self.files:
tab_files.append({"@id":file[len(self.directory)+1:]})
return tab_files
def initialise_dico(self):
None
self.dico["@context"] = "https://w3id.org/ro/crate/1.1/context"
self.dico["@graph"] = []
#GENERAL
general = {}
general["@id"] = f"ro-crate-metadata-{self.workflow.get_name()}.json"
general["@type"] = "CreativeWork"
general["about"] = {"@id":"./"}
general["conformsTo"] = [{"@id":"https://w3id.org/ro/crate/1.1"}
#, {"@id":"https://w3id.org/workflowhub/workflow-ro-crate/1.0"}#This description does not conform
]
self.dico["@graph"].append(general)
#ROOT
root = {}
root["@id"] = "./"
root["@type"] = "Dataset"
root["name"] = self.workflow.get_name()
root["datePublished"] = self.workflow.get_datePublished()
root["description"] = self.workflow.get_description()
root["mainEntity"] = {"@id": self.workflow.get_main_file(),
"@type":["File", "SoftwareSourceCode"]} #We do not consider a File as a "ComputationalWorkflow" since multiple (sub)workflows can be defined in a same file
root["license"] = {"@id":self.workflow.get_license()}
authors = self.workflow.get_authors()
tab_authors = []
for author in authors:
tab_authors.append({"@id":author["@id"]})
root["author"] = tab_authors
root["maintainer"] = tab_authors #Right now i'm assuming that all the authors are maintainers
files = self.get_files()
tab_files = []
for file in files:
tab_files.append({"@id":file["@id"]})
root["hasPart"] = tab_files
root["publisher"] = {"@id":self.workflow.get_publisher()}
#subjectOf TODO
root["subjectOf"] = None
root["creativeWorkStatus"] = self.workflow.get_creativeWorkStatus()
root["@version"] = self.workflow.get_version()
root["keywords"] = self.workflow.get_keywords()
root["producer"] = self.workflow.get_producer()
self.dico["@graph"].append(root)
#TODO
def get_programming_language(self, file):
if(file[-3:]==".nf"):
return "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"
return None
def get_contentSize(self, file):
file_stats = os.stat(file)
return file_stats.st_size/1e3
#TODO
def get_dateCreated(self, file):
return "TODO"
#TODO
def get_dateModified(self, file):
return "TODO"
#TODO
def get_url(self, file):
return "TODO"
#TODO
def get_creators(self, file):
return [{"@id": "George"}]
#TODO
def get_types(self, file):
types = ["File"]
if(file[-3:]==".nf"):
types.append("SoftwareSourceCode")
return types
def initialise_file(self, file):
key = file[len(self.directory)+1:]
dico = {}
dico["@id"] = key
dico["name"] = key
dico["@type"] = self.get_types(file)
dico["programmingLanguage"] = {"@id":self.get_programming_language(file)}
dico["contentSize"] = self.get_contentSize(file)
dico["dateCreated"] = self.get_dateCreated(file)
dico["dateModified"] = self.get_dateModified(file)
dico["url"] = self.get_url(file)
creators = self.get_creators(file)
dico["creator"] = []
for creator in creators:
dico["creator"].append({"@id": creator["@id"]})
dico["isPartOf"] = []
dico["hasPart"] = []
self.dico["@graph"].append(dico)
def initialise(self):
self.initialise_dico()
print("i'm initialised")
\ No newline at end of file
for file in self.files:
self.initialise_file(file)
with open(f"{self.workflow.get_output_dir()}/ro-crate-metadata-{self.workflow.get_name()}.json", 'w') as output_file :
json.dump(self.dico, output_file, indent=2)
\ No newline at end of file
......@@ -8,6 +8,57 @@ class Workflow:
self.nextflow_file = Nextflow_File(file, duplicate = duplicate, display_info = display_info)
self.rocrate = None
#TODO
def get_name(self):
return "TODO"
#TODO
def get_datePublished(self):
return "TODO"
#TODO
def get_description(self):
return "TODO"
#TODO
def get_main_file(self):
return "TODO"
#TODO
def get_license(self):
return "TODO"
#TODO
def get_main_license(self):
return "TODO"
#TODO
def get_creativeWorkStatus(self):
return "TODO"
#TODO
def get_version(self):
return "TODO"
#TODO
def get_authors(self):
return [{"@id": "George"}]
#TODO
#Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline"
def get_keywords(self):
return "TODO"
#TODO
def get_producer(self):
return "TODO"
#TODO
def get_publisher(self):
return "TODO"
def get_output_dir(self):
return self.nextflow_file.get_output_dir()
def get_file_address(self):
return self.nextflow_file.get_file_address()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment