From 3a0525106ae3d66714ade916df2305862d75650e Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Mon, 14 Apr 2025 10:18:51 +0200 Subject: [PATCH] Added the extraction of the tools funnction to the process object --- ressources/empty.sif | Bin 0 -> 40960 bytes src/outils.py | 245 +++++++++++++++++++++++++++++++++++++++++++ src/process.py | 5 +- 3 files changed, 249 insertions(+), 1 deletion(-) create mode 100755 ressources/empty.sif diff --git a/ressources/empty.sif b/ressources/empty.sif new file mode 100755 index 0000000000000000000000000000000000000000..4195d8ae42fdba5b066423fdb7d4d6c2f8ee9b9c GIT binary patch literal 40960 zcmeI5dpMNc+Q2E2R8C2udJG{kPNgC_8$t<DQOpU$n1?wyBtkjFD@sBRZ%WdpR8-Ol zQN1DZBBYW-QYn>m&bOW!BX7IC_xtT{f8VvQZ?Cy7vz|5US!><*Z{7F3*6p9Aq^QMV zF||lEh8C3(gfKY_O%{#e&7l*SG<N6=0Vf+P4|`LBjk$ofj)1nVfJz>s(|l=t>tV#o z@l#K;!J^?_ZvpV(ml{|Fe6)FmfPe(=;k5sF8O|#maQqiw2?;;~kN_kA2|xmn03-kj zKmw2eBmfCO0{`0t1Wg141O>q?;BOe4?DTkou7*{>hhOp&e16Gmh2M|>kH9n&U_(>D zGvAQf#|^1Voc991<Qovb<h6ePrZHFnR5DxBpUNguh-@O>mxVIEH^Zm@=LEQhG!-bx zH>9#8p2h*=it|gp0rATpxu9g4CK+XT(Y&Dv{mAA&p)O<sY)A-b$~Po|lyS8scx9I) zujCsLzZ`$--=+Za3t*8I{QDh#AOT1K5`Y9C0Z0H6fCL}`NB|Om1Rw!O01|)%AOT1K z5`Y9C0Z0H6fCL}`NB|Om1Rw!O01|)%AOT1K5`Y9C0Z0H6fCL}`NB|Om1Rw!O01|)% zAOT1K5`Y9C0Z0H6fCL}`NB|Om1Rw!O01|)%AOT1K5`Y9C0Z0H6fCL}`NB|Om1Rw!O z01|)%AOT1K5`Y9C0Z0H6fCL}`NB|Om1Rw!O01|)%AOT1K5`YB$NdhDkWwY2!Vt@fc zLCJnp<_s?;>TiIMy}XG`3KiVq?jHc|ZJ!Z_!|u+)8HC}8Y&Mfd;;^YKtOhQ5r!i56 zKb678s#^F-x4q*j*d6d%I@+4Np>-#>v3zg_I3-2yP4O%rcLp^i0A;cf2UBNjBX=D4 z5_vjGCepQ7;4XQCN%e{9_Xh4yyefEf$Kf=7D#$8r%}L_of0RH8F-HSJnKW-7Hljve z0%{DNrYMtzuu+8VLq$vj0@y?vgUUn#n5ZwfR!jq-ve-ltoyPK^QV=4`ouPyvlU^et zPE;n1%0f^t#LfzF;Pql5c6R1?-n&*P6A7l0STr^jVN;p@+*dg)ss;j<g+lcwGX0PM zlujduYJf>;K}2%scpV`!C<qF~V)8~B7m3%3&B>xt5g#@?fMuYiMfVzSX5mq$w-$Fq z@DA4Vk80~`>#Wq&)z;O~K%7G9L=p?JVUY2NDV>gR*{~2Ml|^L+Q7K#noPB6uBGii= zOk`3K@C+oSGFSixhd}|Hh=`shnN9=>A!be%{8l{TK&KMHZ{R{L7KhHp#NbQnO=J6T zNO&^puSMkJp7cbbqa-bVun1aqHs+S=oGkJF6d)32t~7=>5{h!TIL2iK>xNAY7*lA# z&bfvQR&tWeKnpGe4vUTYBa;jS0ZR&$CZiN8ctkmL3gU&*>1Z%6TmuKdQ0ZI`05tL3 znkI|v1C|R<;?U?6O(K(wSvb+3qQ4B-<KK7-I29GV<w~V6s4NOG)I(R_!yfDE0ajqS zhmM|sj=q7Oj)%FW^Unt1&8QhfWwK}}0}P~t*Z$di92U`=il3l~pWHzY|5N#RT_$@h zh3e%+qf_w{Ap^|EbL_u8<<I(K0S0vaV}!w&GC2$unMn%(kz{gYSZ{9QVY$xPakImE zn|01cs;ce`>?d=33nNt!DS*dofdGO-+;N&@ZsCU4!d4F5>W%~NVvFnQLuDXNHtPr* z?MxkQoHu){Tkm1%VrmCG1b8rJAYeqW(*6NNHjPB5A|$F0F^C4NhV4USgUHBp6b8yh zs6j+J2do<A1t=YO0RjX;w2<!-f{Ljl!3lI_q8x7@ZrtFSk_P6mm^j#Y2sW5MN}+j$ zA|UpGPy&Vu<e+TKh$p58QG>|zW&s<ec_D6yCIe`x{1Zih-dhc^nSi<={vb+|<v|(r zP(%{}c!7DdnV|tFjlo6&=o}Vza9$7I_}j7Z$LV5YBJLb*ZC!od_k;j{T#+B=#^&dZ zi9Jz6sAOzV6L%c%M=u(-CRh`QJOJG!>9|!Uf&%ko(ty_mgGlgKB61P@JrO5#_`W1R z6OpU1vJ#?64Mep6NDmVZ`vB`eG&K>`34>Cd(1hQlN&f5K#!d2j!*?2(^1QMC0h#Zz zbE7;)hPQ%~8n}3{79tA)&PfDubP_G4A2<Rl=k8iTfGpUlfK~BC`3D^MivDXD@b&+L zRq!lA32|Zr{{Ufp()Kyw2z<9={VjLH{0cZQhz7ugQO1P#@q##s>V<;s37eMdg}{3Q zIW*wH2zEIrjY#La9@jlNU?)bgt%(dAeSAFNi2${o%{ogD%)5a{POgF@3ufO+V27p# zU>hM2#UI<A>EN({!RBBmBo^2}nEVI=gmh<Mb<m2?xK<5(#1I6B?<VrY6v|<fe+Wn1 z-GO6)?pTRUPeB<}Ucj4VC6oN?=idLX^ZW@@@@D_cr6$KM%z+&@t^?eZ!~Pgf!jT`; z#5BWv2JE*$JHGe)$-(%YF&pFC8G!gFqYe+vzarxNlW6|1-h5sDJP6P|oZo9Qfl20p z87T(jzY$&jOl0{x`u@q4`<WSh*Zj{9GQL%CIq)sOlD~=bq664}K`6kEP{2cJOcn^E zfC?7*zI!f_Oh&mUOnRsW=IGd7%Z(=>69SzWAcXLJ9)x4wC?JA-&lbpF29V}}nZYK4 zjjbH^6D7D}PKu7;Xf=NB0*9iBlNUc(12+AWYz_1QISG^fv%}Pnr+t{@{NE)xn9X3O zGbwMu4%Cw}6AJIh1KtBE6^98-2Fn|{=?Xu&`=4YwxNvaFW`_n~DaV9u;=;plmW&{r zfg6sSd{0WXzh~IIRGgb<b6CttIXU;IiPRgnRRd@4Pr=3-KLi0=)FgwiiQ@xq>-at1 z|KvX`V7Gs30zRxjmMV~tclYrW?5<?+Cn-<~zOlQLR|@mQml(grefoGgHvYkh@!Ids z<du=LdF7|^GNg0JZ4B+So;!B&i_EL+!VV*%bmxPI3yO;OE-teqOrKJ*@!V!bvUqpm z>vFx`{IX|b5w9BBZ_Kl{apbI5|EO4%o7wnT_tkUl%z#t*Qj9KvU&HR%ITu80$#vU% zpBmomOitXSxiszQwN@2V=hU~iCCfrC	uH>K1uL*pYBxy2_p&v*@0~Pi7tpEbc-` zQ%F@~b*ncDtE3r<PF>}_rd(>nVePsYMQOp*%HvJpqT!|1W*6==M_SU|D>bujndms! zX^hlcoK;6&t#K>wks~df_V8?i@-F#%ZUv3qR%-$%YkdwMSfpn?XE9mZRsCSn7R~H} z>&=fzE_F}n@n2q)eOv#zW3Y6)Ol)0Kj`dTE+^G%PLPj>xp8NO6KaqZaG&GX(!TCW& zv|xml^IV%2bE%J8J`9|ftx%iwMSpdp)q<yS^|iqZlPOZ_%}b*>h9`s2_Mo|I>~^I@ zEwVluzWdUw7WX2LHx?mQSqbspW@1)Zau#}<dS+VKUaZ}!^sKN~CVO+{nIUI&CxwH# z(@DanLdyxbby5{L2iuz^XPy>}1{3b=G}Zgd;atUx>P4RGs|@rLH9owRCE=Toj`SAa z7`a4qcpX}!E;AYxx^fhK?#U4;`t0QrXDf3^tX=ZXhh;BDwp|(27s<JMDX{d>tL0jq zvp$!mMN`Y_qs2X2^JVS)7z%Zrr_OmLSRXmCeUBD-M7&Zb_lRQk0u7l1K_2**o3<!x zbj+yEVx8W8x!dQX+~FaM-<x0ERU+FN8gjOJsyjF|1{FC<l~SbKENyE$SGC6PPkb#b z*%b8`Io%DER$Ba7mjlf!rpatxv-^zsywA4Po9>pzmfU}EO89)y1Ft%V4iEj~^LF9O zt;4j-6_#}$OY9u^jeu(wKeU8Vu(ZnCX=osIakjgEb<QS-v3G-|YpZVF%W(^9s99Xw zS3H>J{V_h`-lDd~;xuywvc&x5hF3;Ci^;#Wwhon~T=cGw(M8IN`$~6DKffUGF;y|I z{tn*D(Ir7BNNBfXh7Bjf{`IQPZ8b793tzwAdX8DQ#<=D9K9_L4`o5fLk=JujP&BI3 zLjAVoTl3GN1-4Iv@8qpBp_rK-kSG~Qzmh6Bvr{(x+J&X}1buHZwvJl$h3zy=>D*s= zPUv&Na8I9kfl)}!g=IZxi%wGlL0?R+M=`v5TiJz@PmaMRseSjJio9fq+?Jw?D+%vi zktL9Hvgq46`sHK70{6`c>)Q{yAPZL26prnh7h^I%OYaxU-)dfeh^BWGo*t%2y|M9+ z`qeKcax`O@U1l#hX42vM<ilK5mv=!!&DGN?8_%1D#uNvh*)1mWEp__4eS5afyYH|q zUrnj3LNM!$^|8Fnsw}l~;uL!?8*~f4LP2&zw~Hj9L3q6v>uJS>Xv>AZxqJNPo?SfR zE;D><R-jLHOi}!~MHJn;F1tFV>dAACOExDR%(ot0Bf48uDEd*k;<V{iva!P6Pb*g? zrkTIJUv601pFI1;!II!r)kfFnclyL;-tBsJwJ$bi^_>Q<gq=RdE8>ZPXH1L`OQB`U z&Pn&NwgtA><`1L<Dvo{7amOvCm`bfyd1`)J@#Qo*^PYyJhx$5CZguTGGu^8?NzK)9 z$IJti$L#(cQts;s3l`p;XJ+5tb!@)Gr6^Q2P)4jWK76>^qUY*HMTzJK0ph!+t?6P) ze!39VsQdnKp;bLa9rw_O6(IJd=DLh9Yc#|zH}^)t$|~)?4H?Kr;z92@T63C})$g5M zYp3L1FErz<^+23eT)bS|OWmEDmmlh178WJAt!{1O8iGi3*ddWSWp&S15K0=0x8N)_ zEnZh2s(Eeoc-2n#Sx?2ENbX#)H{;rXkKG$p$GqRkTjxz@WH%{zgeVwU+zFN1FB_|P z%_!rxVxX~lpi)hWj=#dmx^I_E2U?$Rc&K=>f@8LgWSBp)`?6<5%PPI+$;_A9Mb9)j zi}Ls3{I91yT+kp4{%01M^5kn8Cuo=2$J6Q&dh*P}M}0OZ7X4nq9#c#8F}K&VySTJI z@!99Kdn*e|2<_2RH?^nc$M3dauF`R}%FsWzuh4Gk#!a5Nhw?ibZxUXLIDHjumAItm zdUs&?XT`ZXQ^{ANJ34!Byl@<vms8xCJJcDtu*q}bU{BKRsdiO`Q5unouT-cwk6NeO z_BD=2pS`+AJs4I)EPA{DMx1v{f7^}W?l(;KdGRRHEAN$BNO56*X>)aVU1icxIK4MR zqWLJ}XuHa^_rh)Zn_L_Od!OTEhTQL!Yv&m$Cpx~}(GZ?y-EVD^{C>Y>y8e>hixt%i z+~;JChKr`R9iZI}W_^CPbf!vv$;*%B_dohKDO{G9ZrimnF5tehTGg*ZiLEQ~C(GJA zo#eY}ziczuDRA>xe=_;asRrEvX6>QEa-ApT^jD>-27!hr?@QbD?#)IQmK-C{#cPXJ zhaP!%rK<JDiI%VPKjJJRYSMg10=Im+cVer<M^~pdl^0*xqsaMDOLf1~3InXH2h8~P z<`YR*)sK3u&#d;{m^Y`oVXt{@4k01Vyu$qC&XxIw3e5{<55C-K?W(h<Ux}k}tY_DZ zkLSA1Y#usrHN8rgF!$>Ijtmccl-zwmwKH?tpNT)0FD~{_%zKXebg1^c+}cO-tNP+z ztuoGUJ8+@Dcg6Kl&m9jVItI@*-u!BM>Cwv^wDU`9i|@75Pgqm^4}6=e87!iyL4B!` zCiw6od(qnW`@Lc^yB}tz#y*&GePh39WsbtjLr27Fdfp4Btxr>(yQOGX&@D5!?W~b$ z-us2}&`Z@>ZnI8z#f<f+7FC2DN$yv+-%<A^-saKPy}w?W=h5^==g`N--cN6>zFPK% zhIjcUpYP2uKELkPYFu#zt0LSN_h3GY8OCXATOAxx;n%vQjO-N7SoYhuW4k2kjX7Pi z(M*k#+s3pcir$3EjN;vL0xzwQyZqcEX#3I9ROzn;oWO`65_JWs>a5Y1cX3Wfl{nvy zbIb@Y(YqmSdvmv^_l~}5z{f89C9y9&emCQ4ypAnVza{gnib=XN%XLtU8GoeY7V+Ia zhyLWpF>&*@2@IOga@q0NoHgv5anJZur>IUU^~#ruoHf6@IX;SdJ$v6nRri5c*(+sC zNGdte>2yhwQIUeo=EssM8?zWTB)^k2x=#dMq;;O`oaJ7=*KD(Tt4dGR@5kp?agL$- zq+_;nn^|egT?ku@h1ZNICB&+z7PdFJvewpH+|T$}E~2;n<7>(G7e?8Pq4`U6+x4^i zrZGt&cOKr~HfZU)Ni`<1@K{?|dUs6tVIk{id6_xW%0`xvPMxY?=PYr(vBAdH=eR-E zqi*?Y`K38#J7+~X5nZGArx3KA)npC)@)B;@uXrncvCcTa=#zZ2?r~P;g$%S>W#)6U z0?|Wnw1xz<7au7RYB$KgYWDKL>5;37oW3Or93|O-t-stO<DNJttoJ@>S?j$e8z(D< zlRP4j-$KbV%pP1jU8p7aazKhNso5kVGcls=OtP`eug2HYIKPOMq!e7d&e0MH-_3gB zrQ_sQe`FqEI(4{!P+T%KB6h#F8^d)-gd%fimHPX5(d2j$@%>|x%{|R$Gm;Yo!a~Lv zl8Y*?Ts$2%wb)`k^^R2jYl<D?>3L`OCRsN#^^>9r%a^*=T{~Ir)wry3!}hyLkL3*1 zZtZAkFloCgGm>HwvqMH>$~NN=+f3Dt*<-!I<!AjmPmBqE?IjtU6b$K%Inc27x`{-j zs`1HI-`2KYM+oxg&PAVJl3R=(4w1f_80Tm%UwA^9b~m+~tQL~8@LkV?QbLvMaJQxD zu;8<T;)WLmi^a^REPC{!o_<zpL<m{>`R#^ak1p#}wT{@$=!&9)kKNPml=(L3Y$-T7 zn7-ZKEWtXh#i!Oct39P~-+ce}Sywvd<QWE-Ri2t9h1((iVctyxT%A(<Bk57~(j5wc zIm(8QKM3_`ZHO$dd85{Y>O`J;G5YkiHuYuMZ0lR@U!87UkQPXjs3XT4s|ygajg!h# zgxdQ){hC)ym^;(=^#_eN7j6yiwA{Rq;J!qlka$p4;?a^RU2ExMsfA;z?MJ_c4H(9K zd9Pb}qVm(*Tj|~RR$LSA^Sdt)CwWi{7g1`<Or%F->b^I;r1DYhYxrf<Mf97F!h@iI z+saN%+is@<pWE5S#~49nYcE<ajBV9z<ov#*W7g_Z^-aAaADSih`q>`sT(;%@wQD77 z6tqM>Y%aKP>1y&txk}gAfYn(W4diQ$<(Zl7%j?QLy{^nX?SYRhVXba_y}I9V?8w{C z!nYszbxY?8zn}4GYE18o;B2s5_8V;1UDuXauye}{$JXis%^q<xx>VXD+zQXCy)t#n zmMf>+*f-=dGR-}2MfdcPX$nyfBbAyKq}r(a4%+l)z7A{Dle%3Rm7%n9x&x}dMc{hI zuKC`!GnCsm9it`hdDCOoJ4IzM%Bih&=@Avu?HNbxpL%TH)ujIDhMr3kr~B-yS)7r- z)Th~>9SD>=RAYZ;jdzTdeDu}hXKhGqeaFn})zKq|B)UR~x%U$oHq}`YU5<10jJ7S+ zt6Hnu{cgYR@US#~{s|kOgtoNb<w#Z@52NFDu1NMtGhI-9Yq3FT@{V0rQRfRTTQ<;E zI-GdtQGCbia?(aYn(k@V)ueta9bNfM<zTU^=3kc;>DH<b8P&uI$We<=mU={QDB0k0 zT-vy~!>M(~K)i=&Uv<(Om$YpgMDvFe)3&Bc3$BrKC`*_7P*%2Bc9UO^zyH(gxla!y zC%$`-h!>sNsw`tvk5+Z!B5D|`gk+cZMGVS?zKdCn+})JilCwI0<qCs@e#u%ZzxVj$ zJq-KLPPq~}Ih|-|#ivadf~*uz4T_5e#?!<FLON|dBO}e-CFBHVdI@Tx<9T8LXjtB0 zG%HeDBKGl{bR^2a(>rhWJZ%R-<(cy;1?N5!5N#2ZDx9j^l=S%FrT*X2FKK!IQXeVy z#BnCYBYt1@$ZXB9xZH2+W_RA)Jzw+vy0A}+lvK*nR`$kf<%W>m5{=fC_e9CAX<yz$ dSooW9;HEh<;>Tweio`P3DdTx9_WYmp`7d1O`MLlA literal 0 HcmV?d00001 diff --git a/src/outils.py b/src/outils.py index ea78ff6..7116f09 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1391,5 +1391,250 @@ def remove_empty_conditions_place_anker(code, workflow): code = code.replace(OG_anker, new_anker) code = remove_empty_conditions(code) return code + + +def extract_single_quote(text, start): + end = start + code= text + quote_single = True + + while(quote_single): + if(code[end]=="'" and quote_single): + if(code[end-1]!="\\" or (code[end-1]=="\\" and code[end-2]=="\\")): + quote_single=False + end+=1 + if(end>=len(code)): + raise Exception('Unable to extract') + return end + +def extract_double_quote(text, start): + temp_start = start + end = start + code= text + quote_double = True + + while(quote_double): + if(code[end]=='"' and quote_double): + if(code[end-1]!="\\" or (code[end-1]=="\\" and code[end-2]=="\\")): + quote_double=False + end+=1 + if(end>=len(code)): + raise Exception('Unable to extract') + return end + +#This function extracts the tools used in a script by running each line in the bash script +#in an empty bash envrionment using a singularity image (by doing this with parse the errors +#and extract the tools) +def extract_tools(script, extract_general_tools = False): + #If we want to extract the general tools we define a list of the general tools 'to remove' from the tools extracted + if(extract_general_tools): + general_tools = [] + else: + general_tools = ['cd', 'cat', 'sed', 'echo', 'mv', 'mkdir', 'cp', 'awk', 'touch', 'tabix', + 'gzip', 'rm', 'bgzip', 'set', 'grep', 'egrep', 'pigz', 'head', 'tar', 'tail', + 'gunzip', 'wc', 'ls', 'find', "sort", "uniq", "printf", "ln", "zcat", "which", + "eval", "paste", "tr", "gawk", "date", "tee", "trap","base64", 'parallel', 'time', + "pwd", "sleep", "ssh", "cpu", "fgrep", "bc", "chmod", "whereis", "conda", "wait", + "split", "git", "join", "unzip", "wget", "print", "rev", 'rmdir'] + + OG_script = script + script = " "+script+" " + + #Detecting cases of none bash environments + python = ["#!/usr/bin/env python"] + for p in python: + if(p in script): + return [] + rscript = ["#!/usr/bin/env Rscript"] + for r in rscript: + if(r in script): + return [] + perl_script = ['#!/usr/bin/env perl'] + for p in perl_script: + if(p in script): + return [] + + + tools = [] + + #---------------------------- + #"CLEANING" the script + #---------------------------- + #Removing the curlies and the elements inside them -> to avoid the errors not recognising the variables + searching = True + while(searching): + searching = False + for match in re.finditer(r'\{.+\}', script): + start, _ = match.span(0) + end = extract_curly(script+"\n\n\n\n", start+1) + if(end!=-1): + inside_curly = script[start:end] + script = script.replace(inside_curly, "") + searching = True + break + #Removing the triple quotes from the script + script = re.sub(r"\"\"\"", "\n", script) + script = re.sub(r"\'\'\'", "\n", script) + + #Removing elements inside the single quotes + searching = True + while(searching): + searching = False + for match in re.finditer(r'\'', script): + start, end = match.span(0) + end = extract_single_quote(script+"\n\n\n\n", start+1) + inside_single_quote = script[start:end] + script = script.replace(inside_single_quote, "") + searching = True + break + + #Removing elements inside the doucle quotes + searching = True + while(searching): + searching = False + for match in re.finditer(r'\"', script): + start, end = match.span(0) + end = extract_double_quote(script+"\n\n\n\n", start+1) + inside_double_quote = script[start:end] + script = script.replace(inside_double_quote, "") + searching = True + break + script = re.sub(r"\\\$", "", script) + script = re.sub(r"\$", "", script) + script = re.sub(r"\(", "", script) + script = re.sub(r"\)", "", script) + script = re.sub(r'\(', "", script) + script = re.sub(r'\)', "", script) + script = re.sub(r"\n *\<[^\>.]+\>", " ", script) + script = re.sub(r"\<", " ", script) + script = re.sub(r"\>", " ", script) + script = re.sub(r"\&", " ", script) + script = re.sub(r"\n\s*\\", " ", script) + script = re.sub(r"\s*\\", " ", script) + script = re.sub(r" then ", " ", script) + #Repalcing xargs by nothing + #"xargs" -> is not really a tool in a traditional sense + temp = script + def replacer(match): + return match.group(0).replace(match.group(1), '') + for tool in ["xargs"]: + script = re.sub(fr"[^\w]({tool})\s", replacer, script) + + #Removing the pipe operators + searching = True + while(searching): + searching = False + to_replace = [] + for command in script.split('\n'): + if('|' in command): + left, right = command.split('|')[0], '|'.join(command.split('|')[1:]) + if(left.count('(')==left.count(')') and right.count('(')==right.count(')')): + searching = True + to_replace.append([command, f"{left}\n{right}"]) + for r in to_replace: + script = script.replace(r[0], r[1], 1) + + OG_path = os.getcwd() + #Change working directory to the one of the file + os.chdir("/".join((str(__file__).split("/")[:-1]))) + + #Get list of files which already exist in folder + OG_files = os.listdir() + + #Create empty output.txt file + os.system(f"> output.txt") + for command in script.split('\n'): + command = command.strip() + os.system(f"> output.txt") + if(command!=""): + if(command[-1]==";"): + command = command[:-1] + if(command[0]=="&"): + command = command[1:] + test_apptainer = True + #In the case the command is "var = ..." we don't run it + for match in re.finditer(r"\w+\s*=", command): + if(match.span(0)[0]==0): + test_apptainer = False + #Running the command in the empty environment + if(test_apptainer): + apptainer_command = f"apptainer exec ../ressources/empty.sif {command} >> output.txt 2>&1" + f = open("apptainer_script.sh", "w") + f.write(apptainer_command) + f.close() + os.system(f"chmod +x apptainer_script.sh") + #apptainer pull empty.sif docker://cfgarden/empty + os.system(f"./apptainer_script.sh >> .out 2>&1 && rm -rf .out") + + + #Parsing the error to extarct the tool + results = open("output.txt").read() + #print("*", f"'{results}'") + for pattern in [r'FATAL: +\"([^"]+)"', r'FATAL: +stat +([^:]+):']: + for match in re.finditer(pattern, results): + extarcted = match.group(1).split("/")[-1].strip() + #List of things to ignore -> these can be detected for tools -> obviously they are not tools + random_things = ['if', 'elif', "else", "done", "fi", 'do', 'for', 'module','then', + "def", "{", "}", "end_versions", ":", "stub:", "stub :", "__pycache__", + "cut", "source", "export", "[", "]", "$", ",", "case", "esac", "exit", + "cli", "e0f", "gnu", "env", "!", "function", "readme.md", "false", "while"] + to_add = True + for match2 in re.finditer(r"\w+\s*=", extarcted): + if(match2.span(0)[0]==0): + to_add = False + extarcted = extarcted.lower() + if(to_add and extarcted not in random_things): + #If it's a parameter + if(extarcted[0]=="-"): + None + #If it's a script -> we get of which kind + elif(extarcted[-3:]==".py" or extarcted=="python3" or extarcted=="python2"): + tools.append("python") + elif(extarcted[-2:]==".R" or extarcted[-2:]==".r"): + tools.append("r") + elif(extarcted[-3:]==".pl"): + tools.append("perl") + elif(extarcted[-3:]==".jl"): + tools.append("julia") + elif(extarcted[-3:]==".sh"): + #For now the bash script is not considered + #tools.append("bash") + None + else: + ex = extarcted.lower().strip() + if(ex=="rscript"): + tools.append("r") + elif(ex=="bash"): + None + #If the tool extarcted is "template" -> we search for the script used + elif(ex=="template"): + for extension_search in re.finditer(r'template *[^\/\s]+(\.\w+)', OG_script): + extension = extension_search.group(1) + if(extension==".py"): + tools.append("python") + elif(extension==".R" or extension==".r"): + tools.append("r") + elif(extension==".pl"): + tools.append("perl") + elif(extension==".jl"): + tools.append("julia") + elif (ex!="" and len(ex)>1 and ex not in general_tools and ex[-1]!=":" and re.fullmatch(r"\w", ex[0])): + tools.append(ex) + #If the tool is java -> we search for the jar file in the command + if(ex=="java"): + for java_search in re.finditer(r'([^\/\s]+)\.jar', command): + tools.append(java_search.group(1).lower()) + tools.remove('java') + + #We remove the remaining files which have been created in the meantime + for file in os.listdir(): + if(file not in OG_files): + os.system(f'rm {file}') + + #Change working directory back to the OG one + os.chdir(OG_path) + + #Return the tools extarcted + return list(set(tools)) diff --git a/src/process.py b/src/process.py index d73f94f..8d4c181 100644 --- a/src/process.py +++ b/src/process.py @@ -5,7 +5,7 @@ import copy from .code_ import Code from .condition import Condition from .nextflow_building_blocks import Nextflow_Building_Blocks -from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries, get_perl_modules, process_2_DSL2 +from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries, get_perl_modules, process_2_DSL2, extract_tools from .bioflowinsighterror import BioFlowInsightError from . import constant @@ -578,6 +578,9 @@ class Process(Nextflow_Building_Blocks): # call.append(f"{o.get_code()} = {self.get_name()}.out.{o.get_code()}") call = "\n".join(call) return code, call + + def get_tools(self, extract_general_tools = False): + return extract_tools(self.get_script_code(), extract_general_tools = extract_general_tools) -- GitLab