Skip to content
Snippets Groups Projects
Commit 48d0b921 authored by George Marchment's avatar George Marchment
Browse files

Update RO-Crate -> complet version

parent 8a5e87ad
No related branches found
No related tags found
No related merge requests found
...@@ -679,7 +679,7 @@ class Nextflow_File(Nextflow_Building_Blocks): ...@@ -679,7 +679,7 @@ class Nextflow_File(Nextflow_Building_Blocks):
def add_subworkflows_2_rocrate(self, dico, file_dico, file_name): def add_subworkflows_2_rocrate(self, dico, file_dico, file_name):
for sub in self.subworkflows: for sub in self.subworkflows:
sub_key = sub.get_rocrate_key(dico) sub_key = sub.get_rocrate_key(dico)
file_dico["hasPart"].append(sub_key) file_dico["hasPart"].append({"@id":sub_key})
sub.add_2_rocrate(dico, file_name) sub.add_2_rocrate(dico, file_name)
def add_2_rocrate(self, dico): def add_2_rocrate(self, dico):
......
import json import json
import glob import glob
import os import os
import re
from . import constant
class RO_Crate: class RO_Crate:
def __init__(self, workflow): def __init__(self, workflow):
...@@ -42,7 +45,7 @@ class RO_Crate: ...@@ -42,7 +45,7 @@ class RO_Crate:
authors = self.workflow.get_authors() authors = self.workflow.get_authors()
tab_authors = [] tab_authors = []
for author in authors: for author in authors:
tab_authors.append({"@id":author["@id"], "name":author["name"]}) tab_authors.append({"@id":author["@id"], "email":author["email"]})
root["author"] = tab_authors root["author"] = tab_authors
root["maintainer"] = tab_authors #Right now i'm assuming that all the authors are maintainers root["maintainer"] = tab_authors #Right now i'm assuming that all the authors are maintainers
files = self.get_files() files = self.get_files()
...@@ -69,23 +72,53 @@ class RO_Crate: ...@@ -69,23 +72,53 @@ class RO_Crate:
file_stats = os.stat(file) file_stats = os.stat(file)
return file_stats.st_size/1e3 return file_stats.st_size/1e3
#TODO def fill_log_file(self, file, reverse = True):
info = ""
current_directory = os.getcwd()
os.chdir("/".join(self.workflow.nextflow_file.get_file_address().split("/")[:-1]))
try:
os.system(f"git log {'--reverse'*reverse} {file} > temp_{id(self)}.txt")
with open(f'temp_{id(self)}.txt') as f:
info = f.read()
os.system(f"rm temp_{id(self)}.txt")
except:
None
os.chdir(current_directory)
return info
def get_dateCreated(self, file): def get_dateCreated(self, file):
return "TODO" info = self.fill_log_file(file, reverse = True)
for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)", info):
month = constant.month_mapping[match.group(1)]
day = match.group(2)
year = match.group(3)
return f"{year}-{month}-{day}"
return None
#TODO
def get_dateModified(self, file): def get_dateModified(self, file):
return "TODO" info = self.fill_log_file(file, reverse = False)
for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)", info):
month = constant.month_mapping[match.group(1)]
day = match.group(2)
year = match.group(3)
return f"{year}-{month}-{day}"
return None
#TODO
def get_url(self, file): def get_url(self, file):
return "TODO" if(self.workflow.dico!={}):
return f"https://github.com/{self.workflow.get_address()}/blob/main/{file}"
return None
#TODO
def get_creators(self, file): def get_creators(self, file):
return [{"@id": "George"}] info = self.fill_log_file(file, reverse = True)
for match in re.finditer(r"Author: ([ \w-]+) <([^>]+)>", info):
#TODO return [{"@id": match.group(1)}]
return None
def get_types(self, file): def get_types(self, file):
types = ["File"] types = ["File"]
if(file[-3:]==".nf"): if(file[-3:]==".nf"):
...@@ -101,10 +134,10 @@ class RO_Crate: ...@@ -101,10 +134,10 @@ class RO_Crate:
dico["@type"] = self.get_types(file) dico["@type"] = self.get_types(file)
dico["programmingLanguage"] = {"@id":self.get_programming_language(file)} dico["programmingLanguage"] = {"@id":self.get_programming_language(file)}
dico["contentSize"] = self.get_contentSize(file) dico["contentSize"] = self.get_contentSize(file)
dico["dateCreated"] = self.get_dateCreated(file) dico["dateCreated"] = self.get_dateCreated(key)
dico["dateModified"] = self.get_dateModified(file) dico["dateModified"] = self.get_dateModified(key)
dico["url"] = self.get_url(file) dico["url"] = self.get_url(key)
creators = self.get_creators(file) creators = self.get_creators(key)
dico["creator"] = [] dico["creator"] = []
for creator in creators: for creator in creators:
dico["creator"].append({"@id": creator["@id"]}) dico["creator"].append({"@id": creator["@id"]})
......
...@@ -5,6 +5,7 @@ from . import constant ...@@ -5,6 +5,7 @@ from . import constant
import os import os
import re import re
import json
class Workflow: class Workflow:
...@@ -25,17 +26,52 @@ class Workflow: ...@@ -25,17 +26,52 @@ class Workflow:
self.keywords = keywords self.keywords = keywords
self.producer = producer self.producer = producer
self.publisher = publisher self.publisher = publisher
self.log = None self.log = ""
self.fill_log() self.fill_log()
self.address = ""
self.set_address()
self.dico = {}
self.get_dico()
def fill_log(self): def fill_log(self):
current_directory = os.getcwd() current_directory = os.getcwd()
os.chdir("/".join(self.nextflow_file.get_file_address().split("/")[:-1])) os.chdir("/".join(self.nextflow_file.get_file_address().split("/")[:-1]))
os.system(f"git log --reverse > temp_{id(self)}.txt") try:
with open(f'temp_{id(self)}.txt') as f: os.system(f"git log --reverse > temp_{id(self)}.txt")
self.log = f.read() with open(f'temp_{id(self)}.txt') as f:
os.system(f"rm temp_{id(self)}.txt") self.log = f.read()
os.system(f"rm temp_{id(self)}.txt")
except:
None
os.chdir(current_directory)
def get_address(self):
return self.address
def set_address(self):
current_directory = os.getcwd()
os.chdir("/".join(self.nextflow_file.get_file_address().split("/")[:-1]))
try:
os.system(f"git ls-remote --get-url origin > temp_address_{id(self)}.txt")
with open(f'temp_address_{id(self)}.txt') as f:
self.address = f.read()
os.system(f"rm temp_address_{id(self)}.txt")
except:
None
os.chdir(current_directory)
for match in re.finditer(r"https:\/\/github\.com\/([^\.]+)\.git", self.address):
self.address = match.group(1)
def get_dico(self):
current_directory = os.getcwd()
os.chdir("/".join(self.nextflow_file.get_file_address().split("/")[:-1]))
try:
_ = os.system(f"wget -qO - https://api.github.com/repos/{self.address} > temp_dico_{id(self)}.json")
with open(f'temp_dico_{id(self)}.json') as json_file:
self.dico = json.load(json_file)
os.system(f"rm temp_dico_{id(self)}.json")
except:
None
os.chdir(current_directory) os.chdir(current_directory)
...@@ -47,6 +83,7 @@ class Workflow: ...@@ -47,6 +83,7 @@ class Workflow:
return self.name return self.name
#Format yyyy-mm-dd #Format yyyy-mm-dd
#Here i return the first commit date
def get_datePublished(self): def get_datePublished(self):
if(self.datePublished==None): if(self.datePublished==None):
for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)",self.log): for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)",self.log):
...@@ -58,18 +95,32 @@ class Workflow: ...@@ -58,18 +95,32 @@ class Workflow:
return self.datePublished return self.datePublished
#TODO
def get_description(self): def get_description(self):
return "TODO" if(self.description==None):
try:
res = self.dico["description"]
except:
res = None
return res
else:
return self.description
def get_main_file(self): def get_main_file(self):
return self.nextflow_file.get_file_address().split("/")[-1] return self.nextflow_file.get_file_address().split("/")[-1]
#TODO
def get_license(self): def get_license(self):
return "TODO" if(self.license==None):
try:
res = self.dico["license"]["key"]
except:
res = None
return res
else:
return self.license
#TODO #TODO
def get_creativeWorkStatus(self): def get_creativeWorkStatus(self):
...@@ -78,32 +129,51 @@ class Workflow: ...@@ -78,32 +129,51 @@ class Workflow:
#TODO #TODO
def get_version(self): def get_version(self):
return "TODO" return "TODO"
#TODO -> this doesn't workf perfectly
def get_authors(self): def get_authors(self):
if(self.authors==None): if(self.authors==None):
authors = {} authors = {}
for match in re.finditer(r"Author: (\w+ +\w+) <([^>]+)>",self.log): for match in re.finditer(r"Author: ([ \w-]+) <([^>]+)>",self.log):
authors[match.group(2)] = match.group(1) authors[match.group(2)] = match.group(1)
tab = [] tab = []
for author in authors: for author in authors:
tab.append({"@id":author, "name":authors[author]}) #tab.append({"@id":author, "name":authors[author]})
tab.append({"@id":authors[author], "email":author})
return tab return tab
else: else:
return self.authors return self.authors
#TODO
#Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline" #Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline"
def get_keywords(self): def get_keywords(self):
return "TODO" if(self.keywords==None):
try:
res = ", ".join(self.dico["topics"])
except:
res = None
return res
else:
return self.keywords
#TODO
def get_producer(self): def get_producer(self):
return "TODO" if(self.producer==None):
try:
res = {"@id": self.dico["owner"]["login"]}
except:
res = None
return res
else:
return self.producer
#TODO
def get_publisher(self): def get_publisher(self):
return "TODO" if(self.dico!={}):
return "https://github.com/"
else:
return None
def get_output_dir(self): def get_output_dir(self):
return self.nextflow_file.get_output_dir() return self.nextflow_file.get_output_dir()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment