From 7c707d79f7c212ce4bcddff8e495f525d4138896 Mon Sep 17 00:00:00 2001
From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr>
Date: Mon, 19 Jun 2023 13:39:13 +0200
Subject: [PATCH] Reporting manually 842647de89255b65eb05b69f21b1dda5f7d80c0d

---
 parsers/en_en.py   |  14 ++++++--------
 wikstraktor.py     |  36 +++++++++++++++++++-----------------
 wikstraktor.sqlite | Bin 20480 -> 20480 bytes
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/parsers/en_en.py b/parsers/en_en.py
index a721c9b..c52dcfc 100644
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -24,18 +24,16 @@ class En_en_straktor(Wikstraktor):
 			acc = None
 			for j, t in enumerate(templates):
 				if (t.normal_name() == self.constants['t_acc'] and templates[j+1].normal_name()!= self.constants['t_acc']):
-					acc = t.arguments
+					for a in t.arguments:
+						p.set_accent(a.value)
 				elif t.normal_name() == self.constants['t_ipa']:
 					p.set_transcription(t.arguments[1].value)
-					if acc != None:
-						for a in acc:
-							p.set_accent(a.value)
 				elif t.normal_name() == self.constants['t_snd']:
 					p.add_sound(self.get_file_url(t.arguments[1].value), t.arguments[2].value)
-				if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] :
-					if p.ipa != None or p.has_accents():
-						pronunciations.append(p)
-						p = Pronunciation()
+			if p.ipa != None or p.has_accents() or p.has_sounds():
+				pronunciations.append(p)
+			else:
+				self.log.add_log("En_en_straktor.process_pronunciation", f"“{l.fullitems[i]}” processed as empty → {p}")
 			i += 1
 		return pronunciations
 
diff --git a/wikstraktor.py b/wikstraktor.py
index a7cd883..63e2ea2 100755
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -27,27 +27,28 @@ class SubInfo:
 		cls.next_id = 0
 
 	def __init__(self, prefix = None):
-		self.id = None
+		self.id = self.__class__.next_id
+		self.__class__.inc_n_id()
+		self.label = None
 		self.set_id(prefix)
 
-	def set_id(self, prefix):
-		if self.id == None and prefix != None:
-			self.id = f"{prefix}_{self.__class__.prfx}{self.__class__.next_id}"
-			self.__class__.inc_n_id()
-		return self.id
+	def set_id(self, prefix, force = False):
+		if (self.label == None or force) and prefix != None:
+			self.label = f"{prefix}_{self.__class__.prfx}{self.id}"
+		return self.label
 
 	def replace_src_in_id(self, former_src, new_src):
 		##Attention si on nettoie en mettant des sources partout, il faudra changer
 		res = None
-		if self.id != None and former_src != None and new_src != None :
-			self.id = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.id)
-			res = self.id
+		if self.label != None and former_src != None and new_src != None :
+			self.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label)
+			res = self.label
 		return res
 
 	def get_src_from_id(self):
 		res = None
-		if self.id != None:
-			gp = re.match(r'^[\w\.]+-(\d{1,2})', self.id)
+		if self.label != None:
+			gp = re.match(r'^[\w\.]+-(\d{1,2})', self.label)
 			if gp:
 				res = int(gp.group(1))
 		return res
@@ -96,6 +97,9 @@ class Pronunciation(SubInfo):
 	def has_accents(self):
 		return len(self.accents) > 0
 
+	def has_sounds(self):
+		return len(self.sounds) > 0
+
 	def add_sound(self, url, accent=None):
 		self.sounds.append(Sound(url,accent))
 
@@ -249,7 +253,6 @@ class Sense(SubInfo):
 		else:
 			theDef = Definition(lang, definition)
 		if theDef != None and theDef not in self.definitions:
-			print("def set id", self.get_id())##
 			theDef.set_id(self.get_id())
 			self.definitions.append(theDef)
 
@@ -394,7 +397,7 @@ class Entry:
 
 	def add_pronunciation(self, p):
 		if p not in self.pronunciations:
-			p.set_id(self.get_prefix())
+			p.set_id(self.get_prefix(), True) #pro often parsed without context
 			self.pronunciations.append(p)
 
 	def set_senses(self, senses):
@@ -835,10 +838,9 @@ if __name__ == "__main__":
 	\033[1m\033[32mex :\033[0m
 	‣\033[0m\033[32m./wikstraktor.py -m blue\033[0m
 	‣\033[0m\033[32m./wikstraktor.py -m blue -f blue.json -A -C\033[0m
-	‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -A -C\033[0m
-	‣\033[0m\033[32m./wikstraktor.py -l en -w fr+en -m particular -f particular.json\033[0m""")
-	parser.add_argument("-l", "--language",  help="la ou les langue(s) du mot (séparées par des “+”)", type=str, default = "en")
-	parser.add_argument("-w", "--wiki_language",  help="la ou les langue(s) du wiki (séparées par des “+”)", type=str, default = "en")
+	‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -A -C\033[0m""")
+	parser.add_argument("-l", "--language",  help="la langue du mot", type=str, default = "en")
+	parser.add_argument("-w", "--wiki_language",  help="la langue du wiki", type=str, default = "en")
 	parser.add_argument("-m", "--mot",  help="le mot à chercher", type=str, default=None)
 	parser.add_argument("-f", "--destination_file", help="le fichier dans lequel stocker le résultat", type=str, default=None)
 	parser.add_argument("-A", "--force_ascii", help="json avec que des caractères ascii", action="store_true")
diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite
index b60d5703a8684738f7a25ed782234ade2c97f3c3..03e32cc16f9aa765795c30f9feed594c88275095 100644
GIT binary patch
delta 258
zcmZozz}T>Wae_3X$wV1vMw5*ROYB8BxC9yao%ts7KIPTm$>v_i^^r?(W8qKE`o>se
zF5_51K~a{_##kc*BV%0yGhIVV1w%tCV{<E0gQVnCQ&TeoL(@bH!<1A5L&GE!Gs_eM
zQ!`WZWWyA51CumkW3$90gVem#yz>0qB$h~)QrEor)V%oOlA^@ylKdjQf};H7)Z*fJ
zAe~p5mz<ecl9`|Ps9}zlLUpxhQATBMYH>|!UQOxg3Ar`JwuZH}T$3+qh?yxoYMZ1`
zt(2LfWTl`4)SPFmq@$n&G&HX`xhS)s1Sp(Wnv(+*E6y*?ODP8Oqho6~fAr#K1ptfw
BQ{(^u

delta 47
zcmV+~0MP${paFoO0gxL3Gm#ua0W+~+q(2e|3h@9BLk^P-<qaqdYYVOl@v(8+3bXM_
F4+Uae51ar1

-- 
GitLab