From aa312abcb00628b86b03b519dcacfa7564a9fc77 Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Wed, 31 Jul 2024 19:12:51 +0200
Subject: [PATCH] Started implementing external script extraction

---
 src/nextflow_building_blocks.py |  5 +++++
 src/nextflow_file.py            |  8 ++++++++
 src/process.py                  | 31 +++++++++++++++++++++++++++++++
 src/workflow.py                 | 10 ++++++++++
 4 files changed, 54 insertions(+)

diff --git a/src/nextflow_building_blocks.py b/src/nextflow_building_blocks.py
index 5f8de68..0c60005 100644
--- a/src/nextflow_building_blocks.py
+++ b/src/nextflow_building_blocks.py
@@ -80,6 +80,11 @@ class Nextflow_Building_Blocks:
     def get_rocrate_key(self, dico):
         return f"{self.get_file_address()[len(dico['temp_directory'])+1:]}#{self.get_name()}"
 
+    def get_address(self):
+        return self.origin.get_address()
+    
+    def get_workflow_address(self):
+        return self.origin.get_workflow_address()
 
     
 
diff --git a/src/nextflow_file.py b/src/nextflow_file.py
index 5eadc6e..4a43c44 100644
--- a/src/nextflow_file.py
+++ b/src/nextflow_file.py
@@ -115,6 +115,13 @@ class Nextflow_File(Nextflow_Building_Blocks):
                 return self.display_info 
             else:
                 return self.origin.get_display_info()
+            
+
+    def get_workflow_address(self):
+        if(self.origin==None):
+                return self.workflow.get_workflow_directory() 
+        else:
+            return self.origin.get_workflow_address()
                 
 
 
@@ -259,6 +266,7 @@ class Nextflow_File(Nextflow_Building_Blocks):
     def get_DSL(self):
         return self.DSL
     
+    
     #Method which returns the DSL of the workflow -> by default it's DSL2
     #I use the presence of include, subworkflows and into/from in processes as a proxy
     def which_DSL(self):
diff --git a/src/process.py b/src/process.py
index ef3d222..78f39e4 100644
--- a/src/process.py
+++ b/src/process.py
@@ -1,4 +1,5 @@
 import re
+import glob
 
 from .code_ import Code
 from .nextflow_building_blocks import Nextflow_Building_Blocks
@@ -53,6 +54,36 @@ class Process(Nextflow_Building_Blocks):
             return self.tools
     
 
+    def get_external_scripts_call(self):
+        code = self.get_script_code()
+        tab = []
+        for match in re.finditer(r"([^\s\\\*]+\.(sh|py|R|r|pl))[^\w]", code):
+            tab.append(match.group(1))
+        return list(set(tab))
+    
+    def get_external_scripts_code(self):
+        calls = self.get_external_scripts_call()
+        #workflow_directory = self.origin.get_address()
+        #print(workflow_directory)
+        #import os
+        #print(os.getcwd(), self.origin.get_address(), self.get_workflow_address())
+
+        for call in calls:
+            #Check first if the file is in the bin
+            file = glob.glob(f'{self.get_workflow_address()}/bin/**/{call}', recursive=True)
+            if(len(file)>1):
+                print(file)
+                print("More than one file found!")
+            #If not we search again
+            if(len(file)==0):
+                file = glob.glob(f'{self.get_workflow_address()}/**/{call}', recursive=True)
+                if(len(file)>1):
+                    print(file)
+                    print("More than one file found!")
+                
+
+
+
     #def get_source(self):
     #    return [self]
     
diff --git a/src/workflow.py b/src/workflow.py
index c3b6dae..5565d67 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -67,6 +67,7 @@ class Workflow:
             output_dir=output_dir,
             workflow = self
         )
+        self.workflow_directory = '/'.join(file.split('/')[:-1])
         self.output_dir = Path(output_dir)
         self.rocrate = None
         self.name = name
@@ -133,6 +134,15 @@ class Workflow:
         
         """
         return self.address
+    
+    def get_workflow_directory(self):
+        """Method that returns the workflow directory 
+
+        Keyword arguments:
+        
+        """
+        return self.workflow_directory
+    
 
     def set_address(self):
         """Method that sets the adress of the workflow main
-- 
GitLab