Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
BioFlow-Insight
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
shareFAIR
BioFlow-Insight
Commits
c62f3498
Commit
c62f3498
authored
1 year ago
by
George Marchment
Browse files
Options
Downloads
Patches
Plain Diff
starting to add comments
parent
307054a1
No related branches found
No related tags found
No related merge requests found
Pipeline
#13443
failed with stage
in 39 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/workflow.py
+156
-3
156 additions, 3 deletions
src/workflow.py
with
156 additions
and
3 deletions
src/workflow.py
+
156
−
3
View file @
c62f3498
#Import dependencies
#Local
from
.nextflow_file
import
Nextflow_File
from
.ro_crate
import
RO_Crate
from
.
import
constant
from
.outils_graph
import
flatten_dico
,
initia_link_dico_rec
,
get_number_cycles
from
.bioflowinsighterror
import
BioFlowInsightError
#Outside packages
import
os
import
re
import
json
...
...
@@ -11,11 +14,32 @@ from pathlib import Path
import
glob
import
ctypes
from
.bioflowinsighterror
import
BioFlowInsightError
class
Workflow
:
"""
This is the main workflow class, from this class, workflow analysis can be done.
After analysis, workflow structure reconstruction can be done.
Attributes:
file: A string indicating the address to the workflow main or the directory containing the workflow
duplicate: A boolean indicating if processes are to be duplicated in the structure
display_info: A boolean indicating if the analysis information should be printed
output_dir: A string indicating where the results will be saved
name: A string indicating the name of the workflow
datePublished: A string indicating the date of publication of the workflow
description: A string indicating the description of the workflow
license: A string indicating the license of the workflow
creativeWorkStatus: A string indicating the creative work statuts of the workflow
authors: A string inidcating the authors of the workflow
version: A string indicating the version of the workflow
keywords: A string indicating the keywords of the workflow
producer: A string indicating the producer of the workflow
publisher: A string indicating the publisher of the workflow
processes_2_remove: A string indicating the processes to remove from the workflow
processes_annotation: A dictionnary containing processes 2 annotations
"""
def
__init__
(
self
,
file
,
duplicate
=
False
,
display_info
=
True
,
output_dir
=
'
./results
'
,
name
=
None
,
datePublished
=
None
,
description
=
None
,
license
=
None
,
creativeWorkStatus
=
None
,
authors
=
None
,
...
...
@@ -66,6 +90,11 @@ class Workflow:
self
.
get_dico
()
def
get_repo_adress
(
self
):
"""
Function that returns the adress of the workflow repository
Keyword arguments:
"""
current_directory
=
os
.
getcwd
()
repo
=
"
/
"
.
join
(
self
.
nextflow_file
.
get_file_address
().
split
(
"
/
"
)[:
-
1
])
if
(
repo
==
''
):
...
...
@@ -73,9 +102,19 @@ class Workflow:
return
repo
def
get_processes_annotation
(
self
):
"""
Function the dictionnary of the process annotations
Keyword arguments:
"""
return
self
.
processes_annotation
def
fill_log
(
self
):
"""
Function that reads the git log and saves it
Keyword arguments:
"""
current_directory
=
os
.
getcwd
()
os
.
chdir
(
self
.
get_repo_adress
())
try
:
...
...
@@ -88,9 +127,19 @@ class Workflow:
os
.
chdir
(
current_directory
)
def
get_address
(
self
):
"""
Function that returns the adress of the workflow main
Keyword arguments:
"""
return
self
.
address
def
set_address
(
self
):
"""
Function that sets the adress of the workflow main
Keyword arguments:
"""
current_directory
=
os
.
getcwd
()
os
.
chdir
(
self
.
get_repo_adress
())
try
:
...
...
@@ -105,6 +154,11 @@ class Workflow:
self
.
address
=
match
.
group
(
1
)
def
get_dico
(
self
):
"""
Function that returns a dictionnary containg information regarding the github repository
Keyword arguments:
"""
current_directory
=
os
.
getcwd
()
os
.
chdir
(
self
.
get_repo_adress
())
try
:
...
...
@@ -120,6 +174,11 @@ class Workflow:
def
get_name
(
self
):
"""
Function that returns the name of the workflow
Keyword arguments:
"""
if
(
self
.
name
==
None
):
return
self
.
nextflow_file
.
get_file_address
().
split
(
"
/
"
)[
-
2
]
else
:
...
...
@@ -128,6 +187,11 @@ class Workflow:
#Format yyyy-mm-dd
#Here i return the first commit date
def
get_datePublished
(
self
):
"""
Function that returns the date of publication
Keyword arguments:
"""
if
(
self
.
datePublished
==
None
):
for
match
in
re
.
finditer
(
r
"
Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)
"
,
self
.
log
):
month
=
constant
.
month_mapping
[
match
.
group
(
1
)]
...
...
@@ -139,6 +203,11 @@ class Workflow:
def
get_description
(
self
):
"""
Function that returns the description
Keyword arguments:
"""
if
(
self
.
description
==
None
):
try
:
res
=
self
.
dico
[
"
description
"
]
...
...
@@ -151,10 +220,20 @@ class Workflow:
def
get_main_file
(
self
):
"""
Function that returns the name of the main file
Keyword arguments:
"""
return
self
.
nextflow_file
.
get_file_address
().
split
(
"
/
"
)[
-
1
]
def
get_license
(
self
):
"""
Function that returns the license
Keyword arguments:
"""
if
(
self
.
license
==
None
):
try
:
res
=
self
.
dico
[
"
license
"
][
"
key
"
]
...
...
@@ -175,6 +254,11 @@ class Workflow:
def
get_authors
(
self
):
"""
Function that returns a list of the authors
Keyword arguments:
"""
if
(
self
.
authors
==
None
):
authors
=
{}
for
match
in
re
.
finditer
(
r
"
Author: ([^>]+)<([^>]+)>
"
,
self
.
log
):
...
...
@@ -194,6 +278,11 @@ class Workflow:
#Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline"
def
get_keywords
(
self
):
"""
Function that returns the keywords
Keyword arguments:
"""
if
(
self
.
keywords
==
None
):
try
:
res
=
"
,
"
.
join
(
self
.
dico
[
"
topics
"
])
...
...
@@ -206,6 +295,11 @@ class Workflow:
def
get_producer
(
self
):
"""
Function that returns the producer
Keyword arguments:
"""
if
(
self
.
producer
==
None
):
try
:
res
=
{
"
@id
"
:
self
.
dico
[
"
owner
"
][
"
login
"
]}
...
...
@@ -217,28 +311,60 @@ class Workflow:
def
get_publisher
(
self
):
"""
Function that returns the publisher
Keyword arguments:
"""
if
(
self
.
dico
!=
{}):
return
"
https://github.com/
"
else
:
return
None
def
get_output_dir
(
self
):
"""
Function that returns the output directory
Keyword arguments:
"""
return
self
.
nextflow_file
.
get_output_dir
()
def
get_file_address
(
self
):
"""
Function that returns the adress of the workflow main
Keyword arguments:
"""
return
self
.
nextflow_file
.
get_file_address
()
def
add_2_rocrate
(
self
,
dico
):
"""
TODO
"""
self
.
nextflow_file
.
add_2_rocrate
(
dico
)
def
get_processes_defined
(
self
):
"""
Function that returns a list of the processes defined
Keyword arguments:
"""
processes
=
self
.
nextflow_file
.
get_processes_defined
(
dict
=
{}).
keys
()
return
list
(
processes
)
def
get_processes_called
(
self
):
"""
Function that returns a list of the processes called/used during the workflow execution
Keyword arguments:
"""
return
self
.
nextflow_file
.
get_processes_called
()
def
get_tools
(
self
):
"""
Function that returns a list of the tools used by the workflow
Keyword arguments:
"""
processes
=
self
.
get_processes_called
()
tab
=
[]
for
p
in
processes
:
...
...
@@ -246,6 +372,11 @@ class Workflow:
return
list
(
set
(
tab
))
def
get_commands
(
self
):
"""
Function that returns a list of the commands used by the workflow
Keyword arguments:
"""
processes
=
self
.
get_processes_called
()
tab
=
[]
for
p
in
processes
:
...
...
@@ -253,6 +384,11 @@ class Workflow:
return
list
(
set
(
tab
))
def
get_modules
(
self
):
"""
Function that returns a list of the modules used by the workflow
Keyword arguments:
"""
processes
=
self
.
get_processes_called
()
tab
=
[]
for
p
in
processes
:
...
...
@@ -260,10 +396,17 @@ class Workflow:
return
list
(
set
(
tab
))
def
initialise_rocrate
(
self
):
"""
Function that initialises the RO-Crate file
Keyword arguments:
"""
self
.
rocrate
=
RO_Crate
(
self
)
self
.
rocrate
.
initialise
()
def
get_layers
(
self
):
"""
TODO
"""
graph
=
self
.
nextflow_file
.
get_graph
()
if
(
not
graph
.
is_initialised
()):
graph
.
initialise
()
...
...
@@ -338,11 +481,21 @@ class Workflow:
def
initialise
(
self
,
create_rocrate
=
True
):
"""
Function that initialises the analysis of the worflow
Keyword arguments:
"""
self
.
nextflow_file
.
initialise
()
if
(
create_rocrate
):
self
.
initialise_rocrate
()
def
generate_all_graphs
(
self
,
render_graphs
=
True
):
"""
Function that generates all graphs representing the workflow
Keyword arguments:
"""
tab_processes_2_remove
=
[]
if
(
self
.
processes_2_remove
!=
None
):
temp
=
self
.
processes_2_remove
.
split
(
"
,
"
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment