diff --git a/src/constant.py b/src/constant.py index f41e6ea0ea54f8618091840202fb7a6ee7a84eeb..fee907b785cc66e74817dbf85988064ddfbbaa4c 100644 --- a/src/constant.py +++ b/src/constant.py @@ -144,4 +144,19 @@ WORKFLOW_HEADER = r"workflow\s*\{" WORKFLOW_HEADER_2 = r'[^\w](workflow\s*{)' - +# MONTHS +#-------------------------- +month_mapping = { + 'Jan': '01', + 'Feb': '02', + 'Mar': '03', + 'Apr': '04', + 'May': '05', + 'Jun': '06', + 'Jul': '07', + 'Aug': '08', + 'Sep': '09', + 'Oct': '10', + 'Nov': '11', + 'Dec': '12' +} \ No newline at end of file diff --git a/src/ro_crate.py b/src/ro_crate.py index 71d4a39aa8b01950006f7147b4c1b63e8def5b1d..450bfb4d9863acfadcada867036ba94c2add4237 100644 --- a/src/ro_crate.py +++ b/src/ro_crate.py @@ -42,7 +42,7 @@ class RO_Crate: authors = self.workflow.get_authors() tab_authors = [] for author in authors: - tab_authors.append({"@id":author["@id"]}) + tab_authors.append({"@id":author["@id"], "name":author["name"]}) root["author"] = tab_authors root["maintainer"] = tab_authors #Right now i'm assuming that all the authors are maintainers files = self.get_files() diff --git a/src/subworkflow.py b/src/subworkflow.py index 31fa72780ad3e299796c46419fb77874a1fb4137..363a7db835bbe5913349479acc4b13e4fdb15e3d 100644 --- a/src/subworkflow.py +++ b/src/subworkflow.py @@ -285,7 +285,6 @@ class Subworkflow(Main_DSL2): else: for o in exe.get_origins(): if(o.get_type()=="Call"): - print("works") called+=o.get_elements_called() for c in called: diff --git a/src/workflow.py b/src/workflow.py index 47ef7c757a7127abfa23ed09c6157ebca30e616a..6e0fcd3dea1509bee0a6958fd8fe9934bf801326 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -1,37 +1,76 @@ from .nextflow_file import Nextflow_File from .ro_crate import RO_Crate +from . import constant + +import os +import re class Workflow: - def __init__(self, file, duplicate=False, display_info=True): + def __init__(self, file, duplicate=False, display_info=True, + name = None, datePublished=None, description=None, + license = None, creativeWorkStatus = None, authors = None, + version = None, keywords = None, producer = None, + publisher = None): self.nextflow_file = Nextflow_File(file, duplicate = duplicate, display_info = display_info) self.rocrate = None + self.name = name + self.datePublished = datePublished + self.description = description + self.license = license + self.creativeWorkStatus = creativeWorkStatus + self.authors = authors + self.version = version + self.keywords = keywords + self.producer = producer + self.publisher = publisher + self.log = None + self.fill_log() - #TODO - def get_name(self): - return "TODO" - #TODO + def fill_log(self): + current_directory = os.getcwd() + os.chdir("/".join(self.nextflow_file.get_file_address().split("/")[:-1])) + os.system(f"git log --reverse > temp_{id(self)}.txt") + with open(f'temp_{id(self)}.txt') as f: + self.log = f.read() + os.system(f"rm temp_{id(self)}.txt") + os.chdir(current_directory) + + + + def get_name(self): + if(self.name==None): + return self.nextflow_file.get_file_address().split("/")[-2] + else: + return self.name + + #Format yyyy-mm-dd def get_datePublished(self): - return "TODO" + if(self.datePublished==None): + for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)",self.log): + month = constant.month_mapping[match.group(1)] + day = match.group(2) + year = match.group(3) + return f"{year}-{month}-{day}" + else: + return self.datePublished + + #TODO def get_description(self): return "TODO" - #TODO + def get_main_file(self): - return "TODO" + return self.nextflow_file.get_file_address().split("/")[-1] #TODO def get_license(self): return "TODO" - #TODO - def get_main_license(self): - return "TODO" - #TODO def get_creativeWorkStatus(self): return "TODO" @@ -40,9 +79,18 @@ class Workflow: def get_version(self): return "TODO" - #TODO + #TODO -> this doesn't workf perfectly def get_authors(self): - return [{"@id": "George"}] + if(self.authors==None): + authors = {} + for match in re.finditer(r"Author: (\w+ +\w+) <([^>]+)>",self.log): + authors[match.group(2)] = match.group(1) + tab = [] + for author in authors: + tab.append({"@id":author, "name":authors[author]}) + return tab + else: + return self.authors #TODO #Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline"