From 842647de89255b65eb05b69f21b1dda5f7d80c0d Mon Sep 17 00:00:00 2001
From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr>
Date: Mon, 19 Jun 2023 13:30:24 +0200
Subject: [PATCH] =?UTF-8?q?Pb=20sons=20ignor=C3=A9s=20EN=20corrig=C3=A9=20?=
 =?UTF-8?q?=3F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 parsers/en_en.py   |  14 ++++++--------
 wikstraktor.py     |  22 +++++++++++++---------
 wikstraktor.sqlite | Bin 20480 -> 28672 bytes
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/parsers/en_en.py b/parsers/en_en.py
index a721c9b..c52dcfc 100644
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -24,18 +24,16 @@ class En_en_straktor(Wikstraktor):
 			acc = None
 			for j, t in enumerate(templates):
 				if (t.normal_name() == self.constants['t_acc'] and templates[j+1].normal_name()!= self.constants['t_acc']):
-					acc = t.arguments
+					for a in t.arguments:
+						p.set_accent(a.value)
 				elif t.normal_name() == self.constants['t_ipa']:
 					p.set_transcription(t.arguments[1].value)
-					if acc != None:
-						for a in acc:
-							p.set_accent(a.value)
 				elif t.normal_name() == self.constants['t_snd']:
 					p.add_sound(self.get_file_url(t.arguments[1].value), t.arguments[2].value)
-				if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] :
-					if p.ipa != None or p.has_accents():
-						pronunciations.append(p)
-						p = Pronunciation()
+			if p.ipa != None or p.has_accents() or p.has_sounds():
+				pronunciations.append(p)
+			else:
+				self.log.add_log("En_en_straktor.process_pronunciation", f"“{l.fullitems[i]}” processed as empty → {p}")
 			i += 1
 		return pronunciations
 
diff --git a/wikstraktor.py b/wikstraktor.py
index ff1c57e..ad85137 100755
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -26,19 +26,20 @@ class SubInfo:
 		cls.next_id = 0
 
 	def __init__(self, prefix = None):
-		self.id = None
+		self.id = self.__class__.next_id
+		self.__class__.inc_n_id()
+		self.label = None
 		self.set_id(prefix)
 
-	def set_id(self, prefix):
-		if self.id == None and prefix != None:
-			self.id = f"{prefix}_{self.__class__.prfx}{self.__class__.next_id}"
-			self.__class__.inc_n_id()
-		return self.id
+	def set_id(self, prefix, force = False):
+		if (self.label == None or force) and prefix != None:
+			self.label = f"{prefix}_{self.__class__.prfx}{self.id}"
+		return self.label
 
 	def serializable(self, prefix = None):
 		res = {}
-		if self.set_id(prefix) != None:
-			res["id"] = self.id
+		if prefix != None:
+			res["id"] = self.set_id(prefix)
 		return res
 
 
@@ -82,6 +83,9 @@ class Pronunciation(SubInfo):
 	def add_sound(self, url, accent=None):
 		self.sounds.append(Sound(url,accent))
 
+	def has_sounds(self):
+		return len(self.sounds) > 0
+
 	def serializable(self, prefix = None):
 		snds = []
 		for s in self.sounds:
@@ -365,7 +369,7 @@ class Entry:
 
 	def add_pronunciation(self, p):
 		if p not in self.pronunciations:
-			p.set_id(self.get_id())
+			p.set_id(self.get_id(), True) #pro often parsed without context
 			self.pronunciations.append(p)
 
 	def set_senses(self, senses):
diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite
index b60d5703a8684738f7a25ed782234ade2c97f3c3..242a322cf854b7548f263467d6032ed6a7ffdb17 100644
GIT binary patch
literal 28672
zcmeHPTWllM8Fn_#&AGAA<<b^7%@&eul4Zu@>nvNyW(^JOF0e`1rop6VJjeDZ;~6tE
z-Vkua+MueHP{l=RUwV<+he{QqQW5X;rBX}#-iyS;N^M{Ig4BzuN>%?e6Ca#OX0c<&
zYs!p>*K;%9|DFF_{_p>1R-U?`SOn#Dql7Iagl-A-^@ScrC=?1k4Sz%M*Vy{tOSrKE
z|I+=B+nx@czVg^OT-7)6UI_jRwt#_vfq;R4fq;R4fq;R4fq;R4fq;R4fq;P{F;G9-
zH+09zlYOr&T3Axa6=ln`47{c4g{H5^mM&)&S2JjJ@!W+Bnrb>Tg=QinQ|GZorbf=g
zC9BI9Gb86Sk1VcTSVc=~moH~7t!~nTE31napPYwl8#i0Jv3ZpkrlM<8BdeLOt)fd$
zz~9=13-(pIA#bW!D^zd+e)`~fWlOo?d3rw$%DwI_6A0KfqFI}YJT<a>X*Kg`=JGxW
zOITHju~{NkQHM(?1d41)*DScrIS`p00M^5kf1}(FP5x){pOYUtJ4b4a;HH3qfPsL4
zfPsL4fPsL4fPsL4fPsL4fPsL4fPr3MVDJ*47<dpq!gKJ^e>=_1-yV7=G<jy?>iDn5
z?;Kkh{Ym5>k<7^VhW|R89{Tp+p9bd!cEi65-`lSr`{l9YeL1-J=5Dox+ap5u`0?Aq
z%eB!c7Znz`_=3P8o)fvW$R{u-=aR`pD$Pk8FAHfRkwhXUMC0(m(fzm-OC*yq0`iGL
z<Z$-sYfYlg>9V2ch-q%hB(G?S1^UkQYu`sNzNku7qE)exD`F+Ly9-B%wot~Vh4g$C
zeuwPtUcdGOw0Ap^5jGJim93rH+zEzv$b~m0#-l#r#i~UN4O_}pg3K~eRYfaMs9*%3
zl|9iee2$@Yv+E<;4%oeo;XTN1wCC8pm7zVzu5V~NVD~t~+srO>w0K(NxORQ_nOEO!
zrLP^?cW+^6o7wdlZ3pa5GQ4hfQz9qy8oLt=t(#pR(RRS@IK%5^Hz^7|$L<(I>t@$S
zv>mWJ%J90`O^96Ev#{SILxiDqv+E<;4%i)Gc-`#A#8|r5MuuUA*3GVuXggqch~agy
z8|B1!&-v~kL+fJKPqas8_uK%(8*f_g!Yu4R-erS8+EuQuJzkXv<YyuMYE~7^u5VWD
zRXb0Yzept|UBfq8w1=4y`&=X?#*$(*)@vQGpP}97BL1T7z(tNRy!%|F8+bo47dhAG
zX+%tn#(i=ThxAX%2pa#N2z?Nmygc#a@xPCsAN%g;Uq(}rZ;kwM<kWC==(j`n3~B@K
z4croz`hU?MJ{EyXKSf)8cFu016XE5T`<jL#J}$;mBHvaoR`shSoW1S$t;8>yt}PT)
z-89ihDnfP5bVLEGNYKVc4o*Wvp3Tl9iNNqgkaOHr3J|nc3_{Q7d4%Cwl~@qV(*pz|
z`J4ik9XoKBwM~eI3c4<%yrB?HHj!?iJT574x<pD6l&8RN7)aW&e|$%;Xob_Bf&ccY
z23PgFyU3N}&pBOH_2(*JT6IY;!PvY_AP6klBle+eE+U|$L>Y2y2a~ZZ8=&0GBZCyM
zA*=A+K2HeMK1^@R=Vn<)ZXWA-0U}&jY^6DZbFo|^DaX<hmSU->Or$slj^uO<gddU=
z=;2(Og3^(L+!=<pd93#t?a{~jxoL*?5O$-z#_s(L?IG;?g|-8B?_+omvMUI^#_pFH
z+Jo%+hPDHCPcpm**^TnO#_kkDdyrk<(00J?ml)oC>_)|ST;zI=UBuAtW7l7_9k6>Z
z!|P@jCbnd+`R+Xot(#pR(RRS@-3;$Ob~#x2i_t`{vHL}ab|1U`qV0g)FEG4rcIl$5
z-O#GV6yK3oWS?hf-R$~^wgYzWVt8HbLJqa}5|Vc^v@Ul2MB4$ocQCvzcKu9GAJKPd
z{C_<3T4-`<;?40tkI#&4jlLcEaO7XZ?+;H5zBO<re69aS$EtlV!6jYUYU+bFdl0eL
z_t;A;@M#3p2{FhyN;wjbCpbQiQ@l(#o`+n1TIS-3crwS!NiLQbganp28jHyU8(~~)
zeq_TlyASbtb<ueZf{L~=1k5@zEo@k5M$f|o69~hnfu^TRJJZv%jZl-O)J?raBwgM?
zx<-(yY!N#OcS2ATZ7G_p?s$iuFPvfKo^*oln{93xWD7;2;q=E^3~!8}qJ=~Xil#Z`
z1)x_rYQ;QNAaq2moTu@1Br@am1I7!9cG`Noh^;W57;8nHak}<JcsfT_%wlx~kkc=Y
z)&PExDV2-ZR9=9Vq~=*h#VXoTj0Rc^HcevCs*r3Jl|VqIjsj{9JXK?0S;<*&7;A`J
zRb+U|23X8v)g(|0VpgCQ1a3n`qRI_L2L89;Tg33BsP=5SK~Zh1ixkoBu&jYSP(K1b
zj!~gvt0@M;WvFm@UV*oTfHAhZgi#q+z(yd#GVG3HkQ_)<Xis*h7kV3*N-+cP?XrQ3
z&`!^nsboak1(YPSVw4R9OaaYIR!j+WBa-)oUe}Th=&$TeOig{)Kli0=V{3_fx*_my
z5a_jbtrAvIxoBXgIiUQAIx^jX?qQEqR*|-+0-lYxU{q5yXf<$N6WrCXo0OI<VU)95
z6lgFAHg)X`s+bj9b%chN;V!tRBq_AI2$<Vepnpu$&uKJc5~^;}?r0y+W@qQ2L&&I9
z$rWK}07(vR1$_^?hJBw7UCd~<z+0!QZzdXqzg+6Fuk4KpradOOx!l;$bc<~3hE?2w
z{tvWmY#5}3jV=02A{+t2o&)|A3uXy$EmANDnWskykOt;M|Dq}`oRhFjr=K%0wor)|
zc=zBt2cr;%UvT)72~|TC7)n6?Y!Zp5@$|gIx-*MS)7fg;)1_--yuono;B>^dH6q)3
z`8TYew=yEb;>DhOX>ppW=SkH%3w~KPF!=5~GNE{_s>t*Z{puxP*Joo5@C~uQsXzJ$
z{!n?^?x4_>=mgS}V!zPfsv-7wg)3+3y{Tfq7_}`w3Du(~8iNRzMtnl#p{Na3ktCiV
zI7Si@)Qbv<R7yzXIG&3q2`@#HDV&$&Xfm0i3rzAn^g0dtwMoLu{Ie~3x>r>u`?VLm
zOG!ok2H^R4kAvqt!KXx_*O`CdaPR^oh@j|L&P5>sgh?u%N+eSGWKtrrn8b5wkTK;L
ziOUII5Tuk{bj%vMYi=`l+6GT6rdd!$eO?fS1jJSCfTtDHn?GIc(ARHVfxN)bvUQdJ
zXpbK7i}N;K2w5AGrc;mZbCUTJnKWtqe{1N!p~=~ajqx|f-WiLGu0?(p=^I%eerxFU
z!FLD#5dL@gu70ua1Gwn_ZmVT4?KRz6E!&t3Ky3hFDHerZeUu0^qiqsx(|!vDyd#1M
z4iGhI6%BItbjb`)6r6$w2#n%R3CfdT(es;Et*UvJ-`$NI{tN-S{>?Ygi&OBf2N6we
zmJN;PrsmNU#GIO$GnBFg$F+*8!Z8!F{IUuA8`<6Z`o)HQFRvZ6FYdE_MZKh#i=d0x
zHeSJ&Ro%8fbjuF5?KN>;Ax4#ki*`%^E824gL^5V^9xYZVi>iVl&b&mn(bu6Q07408
zXI(e8&=Ory;~Zq(Ez=1nob}c=tc0&$dvhCt9RfOpQJ~cmt&^K<h1=RQ7aH8wHoDL4
zN51pxH`?@_L`&vnA7|#dx`qG~p3s)-K(H8I4_A)VH7f7FR%)+r2(8)on5TVF5$5hT
zgvRae+Q+wK-}*AM&(-yz3Nse%9s459<63KNxBgWx+h7gPwIve!+ji~SR=@MnhHdqg
z{lf~M5~C@f+im&DMLHinuuZ$&12AOLc?wr#MW=`1iMNWmKq)(|7eFwl$I;B%%Iw~S
z9}P-Pz1Cw&YoD6hsYh&T=rVSoi5i+4h(k4BJJf)%8OGK1mX@)SWPqOQXkE?&bZ|E@
z%8IID!%K~*-Ip2JdrEQlW@F9U>h|F)i`xC?8#rIO**Hl-H+DP;ompb7zgNr2B#W-M
zJ_d0QYo1V}=NMF`G3+I=x(`*|v6LJYd7&I0+}0EH{fj;02)JBPVTtJBvDt?iyY8Hb
zkL)_}p-ErrcB86PSh5mgAq|%xjgYtXKiQ}hKm??3VuOJL)}*i^&St$u*vv!BzBAqH
zl6%rz5mFxNPF2Gd@EYtqGGP`$+NJ_6oddvx1$}w}@_l3h6lm4-Y^TS+V%tIE;*84<
zKU2AmAu<fn@Q^diUankLBgr$5$`C!+87#>4*lDe+SoLm|)w9f+D_yk4PE13h2Es?^
z#cgG#ArA>DIASOehI%y;jsJ&+-wsXuaC~9R7=1mWjeG}ogDqenU?5;1U?5;1U?5<i
zM;NGO*Y*}+F1?-?qiHe5wW9#|EBqV~>{}DIva6o<(G=;Y-M%_nX)L1a%@u#0<Xc*i
zaGa3fQ&RDFxk1ssia)<8az#mgR{TB13~a9W>xO|HSNuKMfWEKduN%-mqZNNoGzi~U
N@z)K)KN}T){{xO$04V?f

delta 77
zcmZp8z}T>Wae_3fF#`hw>qG^6M&peMOZa)1_`d-K7+Cp#^MBi{C~%H{VuSSNpYq%a
hLhM{08Tg&~Ci6b!)!@nIUdQ!uW8rPC%^$t^Spiwr7ry`i

-- 
GitLab