diff --git a/src/workflow.py b/src/workflow.py index 41444ec7c537d6d0a498617872cda59147b065e2..7e0243eba7cfc3da9a45717834b5286cda80171f 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -1,9 +1,12 @@ - +#Import dependencies +#Local from .nextflow_file import Nextflow_File from .ro_crate import RO_Crate from . import constant from .outils_graph import flatten_dico, initia_link_dico_rec, get_number_cycles +from .bioflowinsighterror import BioFlowInsightError +#Outside packages import os import re import json @@ -11,11 +14,32 @@ from pathlib import Path import glob import ctypes -from .bioflowinsighterror import BioFlowInsightError - class Workflow: + """ + This is the main workflow class, from this class, workflow analysis can be done. + After analysis, workflow structure reconstruction can be done. + + Attributes: + file: A string indicating the address to the workflow main or the directory containing the workflow + duplicate: A boolean indicating if processes are to be duplicated in the structure + display_info: A boolean indicating if the analysis information should be printed + output_dir: A string indicating where the results will be saved + name: A string indicating the name of the workflow + datePublished: A string indicating the date of publication of the workflow + description: A string indicating the description of the workflow + license: A string indicating the license of the workflow + creativeWorkStatus: A string indicating the creative work statuts of the workflow + authors: A string inidcating the authors of the workflow + version: A string indicating the version of the workflow + keywords: A string indicating the keywords of the workflow + producer: A string indicating the producer of the workflow + publisher: A string indicating the publisher of the workflow + processes_2_remove: A string indicating the processes to remove from the workflow + processes_annotation: A dictionnary containing processes 2 annotations + """ + def __init__(self, file, duplicate=False, display_info=True, output_dir = './results', name = None, datePublished=None, description=None, license = None, creativeWorkStatus = None, authors = None, @@ -66,6 +90,11 @@ class Workflow: self.get_dico() def get_repo_adress(self): + """Function that returns the adress of the workflow repository + + Keyword arguments: + + """ current_directory = os.getcwd() repo = "/".join(self.nextflow_file.get_file_address().split("/")[:-1]) if(repo==''): @@ -73,9 +102,19 @@ class Workflow: return repo def get_processes_annotation(self): + """Function the dictionnary of the process annotations + + Keyword arguments: + + """ return self.processes_annotation def fill_log(self): + """Function that reads the git log and saves it + + Keyword arguments: + + """ current_directory = os.getcwd() os.chdir(self.get_repo_adress()) try: @@ -88,9 +127,19 @@ class Workflow: os.chdir(current_directory) def get_address(self): + """Function that returns the adress of the workflow main + + Keyword arguments: + + """ return self.address def set_address(self): + """Function that sets the adress of the workflow main + + Keyword arguments: + + """ current_directory = os.getcwd() os.chdir(self.get_repo_adress()) try: @@ -105,6 +154,11 @@ class Workflow: self.address = match.group(1) def get_dico(self): + """Function that returns a dictionnary containg information regarding the github repository + + Keyword arguments: + + """ current_directory = os.getcwd() os.chdir(self.get_repo_adress()) try: @@ -120,6 +174,11 @@ class Workflow: def get_name(self): + """Function that returns the name of the workflow + + Keyword arguments: + + """ if(self.name==None): return self.nextflow_file.get_file_address().split("/")[-2] else: @@ -128,6 +187,11 @@ class Workflow: #Format yyyy-mm-dd #Here i return the first commit date def get_datePublished(self): + """Function that returns the date of publication + + Keyword arguments: + + """ if(self.datePublished==None): for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)",self.log): month = constant.month_mapping[match.group(1)] @@ -139,6 +203,11 @@ class Workflow: def get_description(self): + """Function that returns the description + + Keyword arguments: + + """ if(self.description==None): try: res = self.dico["description"] @@ -151,10 +220,20 @@ class Workflow: def get_main_file(self): + """Function that returns the name of the main file + + Keyword arguments: + + """ return self.nextflow_file.get_file_address().split("/")[-1] def get_license(self): + """Function that returns the license + + Keyword arguments: + + """ if(self.license==None): try: res = self.dico["license"]["key"] @@ -175,6 +254,11 @@ class Workflow: def get_authors(self): + """Function that returns a list of the authors + + Keyword arguments: + + """ if(self.authors==None): authors = {} for match in re.finditer(r"Author: ([^>]+)<([^>]+)>",self.log): @@ -194,6 +278,11 @@ class Workflow: #Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline" def get_keywords(self): + """Function that returns the keywords + + Keyword arguments: + + """ if(self.keywords==None): try: res = ", ".join(self.dico["topics"]) @@ -206,6 +295,11 @@ class Workflow: def get_producer(self): + """Function that returns the producer + + Keyword arguments: + + """ if(self.producer==None): try: res = {"@id": self.dico["owner"]["login"]} @@ -217,28 +311,60 @@ class Workflow: def get_publisher(self): + """Function that returns the publisher + + Keyword arguments: + + """ if(self.dico!={}): return "https://github.com/" else: return None def get_output_dir(self): + """Function that returns the output directory + + Keyword arguments: + + """ return self.nextflow_file.get_output_dir() def get_file_address(self): + """Function that returns the adress of the workflow main + + Keyword arguments: + + """ return self.nextflow_file.get_file_address() def add_2_rocrate(self, dico): + """TODO + """ self.nextflow_file.add_2_rocrate(dico) def get_processes_defined(self): + """Function that returns a list of the processes defined + + Keyword arguments: + + """ processes = self.nextflow_file.get_processes_defined(dict={}).keys() return list(processes) def get_processes_called(self): + """Function that returns a list of the processes called/used during the workflow execution + + Keyword arguments: + + """ return self.nextflow_file.get_processes_called() def get_tools(self): + """Function that returns a list of the tools used by the workflow + + Keyword arguments: + + """ processes = self.get_processes_called() tab = [] for p in processes: @@ -246,6 +372,11 @@ class Workflow: return list(set(tab)) def get_commands(self): + """Function that returns a list of the commands used by the workflow + + Keyword arguments: + + """ processes = self.get_processes_called() tab = [] for p in processes: @@ -253,6 +384,11 @@ class Workflow: return list(set(tab)) def get_modules(self): + """Function that returns a list of the modules used by the workflow + + Keyword arguments: + + """ processes = self.get_processes_called() tab = [] for p in processes: @@ -260,10 +396,17 @@ class Workflow: return list(set(tab)) def initialise_rocrate(self): + """Function that initialises the RO-Crate file + + Keyword arguments: + + """ self.rocrate = RO_Crate(self) self.rocrate.initialise() def get_layers(self): + """TODO + """ graph = self.nextflow_file.get_graph() if(not graph.is_initialised()): graph.initialise() @@ -338,11 +481,21 @@ class Workflow: def initialise(self, create_rocrate = True): + """Function that initialises the analysis of the worflow + + Keyword arguments: + + """ self.nextflow_file.initialise() if(create_rocrate): self.initialise_rocrate() def generate_all_graphs(self, render_graphs = True): + """Function that generates all graphs representing the workflow + + Keyword arguments: + + """ tab_processes_2_remove = [] if(self.processes_2_remove!=None): temp = self.processes_2_remove.split(",")