Skip to content
Snippets Groups Projects
Commit 48d0b921 authored by George Marchment's avatar George Marchment
Browse files

Update RO-Crate -> complet version

parent 8a5e87ad
No related branches found
No related tags found
No related merge requests found
......@@ -679,7 +679,7 @@ class Nextflow_File(Nextflow_Building_Blocks):
def add_subworkflows_2_rocrate(self, dico, file_dico, file_name):
for sub in self.subworkflows:
sub_key = sub.get_rocrate_key(dico)
file_dico["hasPart"].append(sub_key)
file_dico["hasPart"].append({"@id":sub_key})
sub.add_2_rocrate(dico, file_name)
def add_2_rocrate(self, dico):
......
import json
import glob
import os
import re
from . import constant
class RO_Crate:
def __init__(self, workflow):
......@@ -42,7 +45,7 @@ class RO_Crate:
authors = self.workflow.get_authors()
tab_authors = []
for author in authors:
tab_authors.append({"@id":author["@id"], "name":author["name"]})
tab_authors.append({"@id":author["@id"], "email":author["email"]})
root["author"] = tab_authors
root["maintainer"] = tab_authors #Right now i'm assuming that all the authors are maintainers
files = self.get_files()
......@@ -69,23 +72,53 @@ class RO_Crate:
file_stats = os.stat(file)
return file_stats.st_size/1e3
#TODO
def fill_log_file(self, file, reverse = True):
info = ""
current_directory = os.getcwd()
os.chdir("/".join(self.workflow.nextflow_file.get_file_address().split("/")[:-1]))
try:
os.system(f"git log {'--reverse'*reverse} {file} > temp_{id(self)}.txt")
with open(f'temp_{id(self)}.txt') as f:
info = f.read()
os.system(f"rm temp_{id(self)}.txt")
except:
None
os.chdir(current_directory)
return info
def get_dateCreated(self, file):
return "TODO"
info = self.fill_log_file(file, reverse = True)
for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)", info):
month = constant.month_mapping[match.group(1)]
day = match.group(2)
year = match.group(3)
return f"{year}-{month}-{day}"
return None
#TODO
def get_dateModified(self, file):
return "TODO"
info = self.fill_log_file(file, reverse = False)
for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)", info):
month = constant.month_mapping[match.group(1)]
day = match.group(2)
year = match.group(3)
return f"{year}-{month}-{day}"
return None
#TODO
def get_url(self, file):
return "TODO"
if(self.workflow.dico!={}):
return f"https://github.com/{self.workflow.get_address()}/blob/main/{file}"
return None
#TODO
def get_creators(self, file):
return [{"@id": "George"}]
#TODO
info = self.fill_log_file(file, reverse = True)
for match in re.finditer(r"Author: ([ \w-]+) <([^>]+)>", info):
return [{"@id": match.group(1)}]
return None
def get_types(self, file):
types = ["File"]
if(file[-3:]==".nf"):
......@@ -101,10 +134,10 @@ class RO_Crate:
dico["@type"] = self.get_types(file)
dico["programmingLanguage"] = {"@id":self.get_programming_language(file)}
dico["contentSize"] = self.get_contentSize(file)
dico["dateCreated"] = self.get_dateCreated(file)
dico["dateModified"] = self.get_dateModified(file)
dico["url"] = self.get_url(file)
creators = self.get_creators(file)
dico["dateCreated"] = self.get_dateCreated(key)
dico["dateModified"] = self.get_dateModified(key)
dico["url"] = self.get_url(key)
creators = self.get_creators(key)
dico["creator"] = []
for creator in creators:
dico["creator"].append({"@id": creator["@id"]})
......
......@@ -5,6 +5,7 @@ from . import constant
import os
import re
import json
class Workflow:
......@@ -25,17 +26,52 @@ class Workflow:
self.keywords = keywords
self.producer = producer
self.publisher = publisher
self.log = None
self.log = ""
self.fill_log()
self.address = ""
self.set_address()
self.dico = {}
self.get_dico()
def fill_log(self):
current_directory = os.getcwd()
os.chdir("/".join(self.nextflow_file.get_file_address().split("/")[:-1]))
os.system(f"git log --reverse > temp_{id(self)}.txt")
with open(f'temp_{id(self)}.txt') as f:
self.log = f.read()
os.system(f"rm temp_{id(self)}.txt")
try:
os.system(f"git log --reverse > temp_{id(self)}.txt")
with open(f'temp_{id(self)}.txt') as f:
self.log = f.read()
os.system(f"rm temp_{id(self)}.txt")
except:
None
os.chdir(current_directory)
def get_address(self):
return self.address
def set_address(self):
current_directory = os.getcwd()
os.chdir("/".join(self.nextflow_file.get_file_address().split("/")[:-1]))
try:
os.system(f"git ls-remote --get-url origin > temp_address_{id(self)}.txt")
with open(f'temp_address_{id(self)}.txt') as f:
self.address = f.read()
os.system(f"rm temp_address_{id(self)}.txt")
except:
None
os.chdir(current_directory)
for match in re.finditer(r"https:\/\/github\.com\/([^\.]+)\.git", self.address):
self.address = match.group(1)
def get_dico(self):
current_directory = os.getcwd()
os.chdir("/".join(self.nextflow_file.get_file_address().split("/")[:-1]))
try:
_ = os.system(f"wget -qO - https://api.github.com/repos/{self.address} > temp_dico_{id(self)}.json")
with open(f'temp_dico_{id(self)}.json') as json_file:
self.dico = json.load(json_file)
os.system(f"rm temp_dico_{id(self)}.json")
except:
None
os.chdir(current_directory)
......@@ -47,6 +83,7 @@ class Workflow:
return self.name
#Format yyyy-mm-dd
#Here i return the first commit date
def get_datePublished(self):
if(self.datePublished==None):
for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)",self.log):
......@@ -58,18 +95,32 @@ class Workflow:
return self.datePublished
#TODO
def get_description(self):
return "TODO"
if(self.description==None):
try:
res = self.dico["description"]
except:
res = None
return res
else:
return self.description
def get_main_file(self):
return self.nextflow_file.get_file_address().split("/")[-1]
#TODO
def get_license(self):
return "TODO"
if(self.license==None):
try:
res = self.dico["license"]["key"]
except:
res = None
return res
else:
return self.license
#TODO
def get_creativeWorkStatus(self):
......@@ -78,32 +129,51 @@ class Workflow:
#TODO
def get_version(self):
return "TODO"
#TODO -> this doesn't workf perfectly
def get_authors(self):
if(self.authors==None):
authors = {}
for match in re.finditer(r"Author: (\w+ +\w+) <([^>]+)>",self.log):
for match in re.finditer(r"Author: ([ \w-]+) <([^>]+)>",self.log):
authors[match.group(2)] = match.group(1)
tab = []
for author in authors:
tab.append({"@id":author, "name":authors[author]})
#tab.append({"@id":author, "name":authors[author]})
tab.append({"@id":authors[author], "email":author})
return tab
else:
return self.authors
#TODO
#Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline"
def get_keywords(self):
return "TODO"
if(self.keywords==None):
try:
res = ", ".join(self.dico["topics"])
except:
res = None
return res
else:
return self.keywords
#TODO
def get_producer(self):
return "TODO"
if(self.producer==None):
try:
res = {"@id": self.dico["owner"]["login"]}
except:
res = None
return res
else:
return self.producer
#TODO
def get_publisher(self):
return "TODO"
if(self.dico!={}):
return "https://github.com/"
else:
return None
def get_output_dir(self):
return self.nextflow_file.get_output_dir()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment