Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
BioFlow-Insight
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
shareFAIR
BioFlow-Insight
Commits
20cd8255
Commit
20cd8255
authored
1 year ago
by
George Marchment
Browse files
Options
Downloads
Patches
Plain Diff
Update -> ro-crate
parent
9f1bffdc
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/ro_crate.py
+111
-5
111 additions, 5 deletions
src/ro_crate.py
src/workflow.py
+51
-0
51 additions, 0 deletions
src/workflow.py
with
162 additions
and
5 deletions
src/ro_crate.py
+
111
−
5
View file @
20cd8255
import
json
import
glob
import
os
class
RO_Crate
:
def
__init__
(
self
,
workflow
):
self
.
nextflow_file
=
workflow
self
.
directroy
=
'
/
'
.
join
(
workflow
.
get_file_address
().
split
(
'
/
'
)[:
-
1
])
self
.
workflow
=
workflow
self
.
directory
=
'
/
'
.
join
(
workflow
.
get_file_address
().
split
(
'
/
'
)[:
-
1
])
self
.
files
=
[]
self
.
dico
=
{}
def
get_files
(
self
):
self
.
files
=
glob
.
glob
(
f
'
{
self
.
directory
}
/**/*.*
'
,
recursive
=
True
)
tab_files
=
[]
for
file
in
self
.
files
:
tab_files
.
append
({
"
@id
"
:
file
[
len
(
self
.
directory
)
+
1
:]})
return
tab_files
def
initialise_dico
(
self
):
None
self
.
dico
[
"
@context
"
]
=
"
https://w3id.org/ro/crate/1.1/context
"
self
.
dico
[
"
@graph
"
]
=
[]
#GENERAL
general
=
{}
general
[
"
@id
"
]
=
f
"
ro-crate-metadata-
{
self
.
workflow
.
get_name
()
}
.json
"
general
[
"
@type
"
]
=
"
CreativeWork
"
general
[
"
about
"
]
=
{
"
@id
"
:
"
./
"
}
general
[
"
conformsTo
"
]
=
[{
"
@id
"
:
"
https://w3id.org/ro/crate/1.1
"
}
#, {"@id":"https://w3id.org/workflowhub/workflow-ro-crate/1.0"}#This description does not conform
]
self
.
dico
[
"
@graph
"
].
append
(
general
)
#ROOT
root
=
{}
root
[
"
@id
"
]
=
"
./
"
root
[
"
@type
"
]
=
"
Dataset
"
root
[
"
name
"
]
=
self
.
workflow
.
get_name
()
root
[
"
datePublished
"
]
=
self
.
workflow
.
get_datePublished
()
root
[
"
description
"
]
=
self
.
workflow
.
get_description
()
root
[
"
mainEntity
"
]
=
{
"
@id
"
:
self
.
workflow
.
get_main_file
(),
"
@type
"
:[
"
File
"
,
"
SoftwareSourceCode
"
]}
#We do not consider a File as a "ComputationalWorkflow" since multiple (sub)workflows can be defined in a same file
root
[
"
license
"
]
=
{
"
@id
"
:
self
.
workflow
.
get_license
()}
authors
=
self
.
workflow
.
get_authors
()
tab_authors
=
[]
for
author
in
authors
:
tab_authors
.
append
({
"
@id
"
:
author
[
"
@id
"
]})
root
[
"
author
"
]
=
tab_authors
root
[
"
maintainer
"
]
=
tab_authors
#Right now i'm assuming that all the authors are maintainers
files
=
self
.
get_files
()
tab_files
=
[]
for
file
in
files
:
tab_files
.
append
({
"
@id
"
:
file
[
"
@id
"
]})
root
[
"
hasPart
"
]
=
tab_files
root
[
"
publisher
"
]
=
{
"
@id
"
:
self
.
workflow
.
get_publisher
()}
#subjectOf TODO
root
[
"
subjectOf
"
]
=
None
root
[
"
creativeWorkStatus
"
]
=
self
.
workflow
.
get_creativeWorkStatus
()
root
[
"
@version
"
]
=
self
.
workflow
.
get_version
()
root
[
"
keywords
"
]
=
self
.
workflow
.
get_keywords
()
root
[
"
producer
"
]
=
self
.
workflow
.
get_producer
()
self
.
dico
[
"
@graph
"
].
append
(
root
)
#TODO
def
get_programming_language
(
self
,
file
):
if
(
file
[
-
3
:]
==
"
.nf
"
):
return
"
https://w3id.org/workflowhub/workflow-ro-crate#nextflow
"
return
None
def
get_contentSize
(
self
,
file
):
file_stats
=
os
.
stat
(
file
)
return
file_stats
.
st_size
/
1e3
#TODO
def
get_dateCreated
(
self
,
file
):
return
"
TODO
"
#TODO
def
get_dateModified
(
self
,
file
):
return
"
TODO
"
#TODO
def
get_url
(
self
,
file
):
return
"
TODO
"
#TODO
def
get_creators
(
self
,
file
):
return
[{
"
@id
"
:
"
George
"
}]
#TODO
def
get_types
(
self
,
file
):
types
=
[
"
File
"
]
if
(
file
[
-
3
:]
==
"
.nf
"
):
types
.
append
(
"
SoftwareSourceCode
"
)
return
types
def
initialise_file
(
self
,
file
):
key
=
file
[
len
(
self
.
directory
)
+
1
:]
dico
=
{}
dico
[
"
@id
"
]
=
key
dico
[
"
name
"
]
=
key
dico
[
"
@type
"
]
=
self
.
get_types
(
file
)
dico
[
"
programmingLanguage
"
]
=
{
"
@id
"
:
self
.
get_programming_language
(
file
)}
dico
[
"
contentSize
"
]
=
self
.
get_contentSize
(
file
)
dico
[
"
dateCreated
"
]
=
self
.
get_dateCreated
(
file
)
dico
[
"
dateModified
"
]
=
self
.
get_dateModified
(
file
)
dico
[
"
url
"
]
=
self
.
get_url
(
file
)
creators
=
self
.
get_creators
(
file
)
dico
[
"
creator
"
]
=
[]
for
creator
in
creators
:
dico
[
"
creator
"
].
append
({
"
@id
"
:
creator
[
"
@id
"
]})
dico
[
"
isPartOf
"
]
=
[]
dico
[
"
hasPart
"
]
=
[]
self
.
dico
[
"
@graph
"
].
append
(
dico
)
def
initialise
(
self
):
self
.
initialise_dico
()
print
(
"
i
'
m initialised
"
)
\ No newline at end of file
for
file
in
self
.
files
:
self
.
initialise_file
(
file
)
with
open
(
f
"
{
self
.
workflow
.
get_output_dir
()
}
/ro-crate-metadata-
{
self
.
workflow
.
get_name
()
}
.json
"
,
'
w
'
)
as
output_file
:
json
.
dump
(
self
.
dico
,
output_file
,
indent
=
2
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
src/workflow.py
+
51
−
0
View file @
20cd8255
...
...
@@ -8,6 +8,57 @@ class Workflow:
self
.
nextflow_file
=
Nextflow_File
(
file
,
duplicate
=
duplicate
,
display_info
=
display_info
)
self
.
rocrate
=
None
#TODO
def
get_name
(
self
):
return
"
TODO
"
#TODO
def
get_datePublished
(
self
):
return
"
TODO
"
#TODO
def
get_description
(
self
):
return
"
TODO
"
#TODO
def
get_main_file
(
self
):
return
"
TODO
"
#TODO
def
get_license
(
self
):
return
"
TODO
"
#TODO
def
get_main_license
(
self
):
return
"
TODO
"
#TODO
def
get_creativeWorkStatus
(
self
):
return
"
TODO
"
#TODO
def
get_version
(
self
):
return
"
TODO
"
#TODO
def
get_authors
(
self
):
return
[{
"
@id
"
:
"
George
"
}]
#TODO
#Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline"
def
get_keywords
(
self
):
return
"
TODO
"
#TODO
def
get_producer
(
self
):
return
"
TODO
"
#TODO
def
get_publisher
(
self
):
return
"
TODO
"
def
get_output_dir
(
self
):
return
self
.
nextflow_file
.
get_output_dir
()
def
get_file_address
(
self
):
return
self
.
nextflow_file
.
get_file_address
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment