diff --git a/src/DBfile.py b/src/DBfile.py new file mode 100644 index 0000000000000000000000000000000000000000..a416c3bebce8b668008df18d4632f5437ad5eb5d --- /dev/null +++ b/src/DBfile.py @@ -0,0 +1,73 @@ +import json +import glob +import os +import re +from .ro_crate import RO_Crate + +from . import constant + +class DBfile(RO_Crate): + def __init__(self, workflow, + personnal_acces_token = None, + display_info=False): + RO_Crate.__init__(self, workflow, personnal_acces_token = personnal_acces_token, display_info=display_info) + + self.file_contents="""@prefix sf: <http://sharefair.org/> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix schema: <https://schema.org/> . +@prefix edam: <http://edamontology.org/> . +@prefix p-plan: <http://purl.org/net/p-plan#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . +@prefix efo: <https://www.ebi.ac.uk/gwas/efotraits/> .""" + + print(self.info_dico_workflow) + #print(self.log) + + + def initialise(self): + workflow_name = self.workflow.get_name() + workflow_long_name = self.info_dico_workflow["full_name"] + description = self.get_description() + creator =self.info_dico_workflow["owner"]["html_url"] + authors_tab, authors_temp= [], self.get_authors() + for author in authors_temp: + #These are filters to keep in "real" authors + if("@users.noreply.github.com" not in author['email'] and "@machine" not in author['email']): + if(" " not in author["@id"]): + authors_tab.append(author) + maintainer = f"https://github.com/{authors_tab[0]['@id']}" + date_created = self.get_datePublished() + keywords = self.get_keywords() + steps_string = [] + for p in self.workflow.get_processes_called(): + steps_string.append(f"step{p.get_alias()}") + steps_string = ", ".join(steps_string) + + #TODO update the input and output of the workflow + header = f"""### Main workflow +sf:{workflow_name}Workflow rdf:type sf:Workflow ; + schema:name "{workflow_long_name}" ; + schema:description "{description}" ; + schema:creator <{creator}> ; + schema:maintainer <{maintainer}> ; + schema:dateCreated "f{date_created}"^^xsd:date ; + schema:programmingLanguage <https://w3id.org/workflowhub/workflow-ro-crate#nextflow> ; + schema:step {steps_string} ; + sf:inputVariable sf:samplesheet, sf:ch_versions, sf:ch_fasta, + sf:ch_fasta_index, sf:ch_bismark_index, sf:ch_bwameth_index ; + sf:outputVariable sf:output_bam, sf:output_bai, sf:output_qualimap, sf:output_preseq, sf:output_versions ; + schema:keywords "{keywords}" .""" + + self.file_contents+=f"\n\n{header}" + + for author in authors_tab: + self.file_contents+=f"\n\n<https://github.com/{author['@id']}> rdf:type schema:Person ." + + self.file_contents+="""\n\n<https://w3id.org/workflowhub/workflow-ro-crate#nextflow> rdf:type schema:ComputerLanguage ; + rdfs:label "Nextflow" .""" + print(self.file_contents) + + #TODO ADD Inputs values + #TODO ADD Outputs value + diff --git a/src/workflow.py b/src/workflow.py index fc408509b47cf99901b8dd0a76baa7120e568155..bb4bdaab995fc1c0ce666b3f9af6f3885db3a5e9 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -2,6 +2,7 @@ #Local from .nextflow_file import Nextflow_File from .ro_crate import RO_Crate +from .DBfile import DBfile from . import constant from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1, group_together_ifs, extract_curly, remove_extra_jumps, get_channels_to_add_in_false_conditions, extract_conditions, remove_empty_conditions_place_anker from .outils_graph import get_flatten_dico, initia_link_dico_rec, get_number_cycles, generate_graph, enrich_json_with_positions @@ -316,6 +317,10 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen producer = producer) self.rocrate.initialise() + def get_DBfile(self): + file = DBfile(self) + file.initialise() + #Returns a dico of number of processes called per each condition #For example : {condition1: 14, condition2: 10, condition:3}