Skip to content
Snippets Groups Projects
Commit c62f3498 authored by George Marchment's avatar George Marchment
Browse files

starting to add comments

parent 307054a1
No related branches found
No related tags found
No related merge requests found
Pipeline #13443 failed with stage
in 39 seconds
#Import dependencies
#Local
from .nextflow_file import Nextflow_File
from .ro_crate import RO_Crate
from . import constant
from .outils_graph import flatten_dico, initia_link_dico_rec, get_number_cycles
from .bioflowinsighterror import BioFlowInsightError
#Outside packages
import os
import re
import json
......@@ -11,11 +14,32 @@ from pathlib import Path
import glob
import ctypes
from .bioflowinsighterror import BioFlowInsightError
class Workflow:
"""
This is the main workflow class, from this class, workflow analysis can be done.
After analysis, workflow structure reconstruction can be done.
Attributes:
file: A string indicating the address to the workflow main or the directory containing the workflow
duplicate: A boolean indicating if processes are to be duplicated in the structure
display_info: A boolean indicating if the analysis information should be printed
output_dir: A string indicating where the results will be saved
name: A string indicating the name of the workflow
datePublished: A string indicating the date of publication of the workflow
description: A string indicating the description of the workflow
license: A string indicating the license of the workflow
creativeWorkStatus: A string indicating the creative work statuts of the workflow
authors: A string inidcating the authors of the workflow
version: A string indicating the version of the workflow
keywords: A string indicating the keywords of the workflow
producer: A string indicating the producer of the workflow
publisher: A string indicating the publisher of the workflow
processes_2_remove: A string indicating the processes to remove from the workflow
processes_annotation: A dictionnary containing processes 2 annotations
"""
def __init__(self, file, duplicate=False, display_info=True, output_dir = './results',
name = None, datePublished=None, description=None,
license = None, creativeWorkStatus = None, authors = None,
......@@ -66,6 +90,11 @@ class Workflow:
self.get_dico()
def get_repo_adress(self):
"""Function that returns the adress of the workflow repository
Keyword arguments:
"""
current_directory = os.getcwd()
repo = "/".join(self.nextflow_file.get_file_address().split("/")[:-1])
if(repo==''):
......@@ -73,9 +102,19 @@ class Workflow:
return repo
def get_processes_annotation(self):
"""Function the dictionnary of the process annotations
Keyword arguments:
"""
return self.processes_annotation
def fill_log(self):
"""Function that reads the git log and saves it
Keyword arguments:
"""
current_directory = os.getcwd()
os.chdir(self.get_repo_adress())
try:
......@@ -88,9 +127,19 @@ class Workflow:
os.chdir(current_directory)
def get_address(self):
"""Function that returns the adress of the workflow main
Keyword arguments:
"""
return self.address
def set_address(self):
"""Function that sets the adress of the workflow main
Keyword arguments:
"""
current_directory = os.getcwd()
os.chdir(self.get_repo_adress())
try:
......@@ -105,6 +154,11 @@ class Workflow:
self.address = match.group(1)
def get_dico(self):
"""Function that returns a dictionnary containg information regarding the github repository
Keyword arguments:
"""
current_directory = os.getcwd()
os.chdir(self.get_repo_adress())
try:
......@@ -120,6 +174,11 @@ class Workflow:
def get_name(self):
"""Function that returns the name of the workflow
Keyword arguments:
"""
if(self.name==None):
return self.nextflow_file.get_file_address().split("/")[-2]
else:
......@@ -128,6 +187,11 @@ class Workflow:
#Format yyyy-mm-dd
#Here i return the first commit date
def get_datePublished(self):
"""Function that returns the date of publication
Keyword arguments:
"""
if(self.datePublished==None):
for match in re.finditer(r"Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)",self.log):
month = constant.month_mapping[match.group(1)]
......@@ -139,6 +203,11 @@ class Workflow:
def get_description(self):
"""Function that returns the description
Keyword arguments:
"""
if(self.description==None):
try:
res = self.dico["description"]
......@@ -151,10 +220,20 @@ class Workflow:
def get_main_file(self):
"""Function that returns the name of the main file
Keyword arguments:
"""
return self.nextflow_file.get_file_address().split("/")[-1]
def get_license(self):
"""Function that returns the license
Keyword arguments:
"""
if(self.license==None):
try:
res = self.dico["license"]["key"]
......@@ -175,6 +254,11 @@ class Workflow:
def get_authors(self):
"""Function that returns a list of the authors
Keyword arguments:
"""
if(self.authors==None):
authors = {}
for match in re.finditer(r"Author: ([^>]+)<([^>]+)>",self.log):
......@@ -194,6 +278,11 @@ class Workflow:
#Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline"
def get_keywords(self):
"""Function that returns the keywords
Keyword arguments:
"""
if(self.keywords==None):
try:
res = ", ".join(self.dico["topics"])
......@@ -206,6 +295,11 @@ class Workflow:
def get_producer(self):
"""Function that returns the producer
Keyword arguments:
"""
if(self.producer==None):
try:
res = {"@id": self.dico["owner"]["login"]}
......@@ -217,28 +311,60 @@ class Workflow:
def get_publisher(self):
"""Function that returns the publisher
Keyword arguments:
"""
if(self.dico!={}):
return "https://github.com/"
else:
return None
def get_output_dir(self):
"""Function that returns the output directory
Keyword arguments:
"""
return self.nextflow_file.get_output_dir()
def get_file_address(self):
"""Function that returns the adress of the workflow main
Keyword arguments:
"""
return self.nextflow_file.get_file_address()
def add_2_rocrate(self, dico):
"""TODO
"""
self.nextflow_file.add_2_rocrate(dico)
def get_processes_defined(self):
"""Function that returns a list of the processes defined
Keyword arguments:
"""
processes = self.nextflow_file.get_processes_defined(dict={}).keys()
return list(processes)
def get_processes_called(self):
"""Function that returns a list of the processes called/used during the workflow execution
Keyword arguments:
"""
return self.nextflow_file.get_processes_called()
def get_tools(self):
"""Function that returns a list of the tools used by the workflow
Keyword arguments:
"""
processes = self.get_processes_called()
tab = []
for p in processes:
......@@ -246,6 +372,11 @@ class Workflow:
return list(set(tab))
def get_commands(self):
"""Function that returns a list of the commands used by the workflow
Keyword arguments:
"""
processes = self.get_processes_called()
tab = []
for p in processes:
......@@ -253,6 +384,11 @@ class Workflow:
return list(set(tab))
def get_modules(self):
"""Function that returns a list of the modules used by the workflow
Keyword arguments:
"""
processes = self.get_processes_called()
tab = []
for p in processes:
......@@ -260,10 +396,17 @@ class Workflow:
return list(set(tab))
def initialise_rocrate(self):
"""Function that initialises the RO-Crate file
Keyword arguments:
"""
self.rocrate = RO_Crate(self)
self.rocrate.initialise()
def get_layers(self):
"""TODO
"""
graph = self.nextflow_file.get_graph()
if(not graph.is_initialised()):
graph.initialise()
......@@ -338,11 +481,21 @@ class Workflow:
def initialise(self, create_rocrate = True):
"""Function that initialises the analysis of the worflow
Keyword arguments:
"""
self.nextflow_file.initialise()
if(create_rocrate):
self.initialise_rocrate()
def generate_all_graphs(self, render_graphs = True):
"""Function that generates all graphs representing the workflow
Keyword arguments:
"""
tab_processes_2_remove = []
if(self.processes_2_remove!=None):
temp = self.processes_2_remove.split(",")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment