diff --git a/parsers/en_en.py b/parsers/en_en.py index a721c9b407e3d2f84ec11cd313b3189eff68ff3a..c52dcfc5a91146d2352e464d2b6f653da7d3b241 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -24,18 +24,16 @@ class En_en_straktor(Wikstraktor): acc = None for j, t in enumerate(templates): if (t.normal_name() == self.constants['t_acc'] and templates[j+1].normal_name()!= self.constants['t_acc']): - acc = t.arguments + for a in t.arguments: + p.set_accent(a.value) elif t.normal_name() == self.constants['t_ipa']: p.set_transcription(t.arguments[1].value) - if acc != None: - for a in acc: - p.set_accent(a.value) elif t.normal_name() == self.constants['t_snd']: p.add_sound(self.get_file_url(t.arguments[1].value), t.arguments[2].value) - if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] : - if p.ipa != None or p.has_accents(): - pronunciations.append(p) - p = Pronunciation() + if p.ipa != None or p.has_accents() or p.has_sounds(): + pronunciations.append(p) + else: + self.log.add_log("En_en_straktor.process_pronunciation", f"“{l.fullitems[i]}†processed as empty → {p}") i += 1 return pronunciations diff --git a/wikstraktor.py b/wikstraktor.py index ff1c57eb91557f276f4892a1c5f3df209ddbcee5..ad8513732dfcbfc7d9e942242b26a83379d14f8f 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -26,19 +26,20 @@ class SubInfo: cls.next_id = 0 def __init__(self, prefix = None): - self.id = None + self.id = self.__class__.next_id + self.__class__.inc_n_id() + self.label = None self.set_id(prefix) - def set_id(self, prefix): - if self.id == None and prefix != None: - self.id = f"{prefix}_{self.__class__.prfx}{self.__class__.next_id}" - self.__class__.inc_n_id() - return self.id + def set_id(self, prefix, force = False): + if (self.label == None or force) and prefix != None: + self.label = f"{prefix}_{self.__class__.prfx}{self.id}" + return self.label def serializable(self, prefix = None): res = {} - if self.set_id(prefix) != None: - res["id"] = self.id + if prefix != None: + res["id"] = self.set_id(prefix) return res @@ -82,6 +83,9 @@ class Pronunciation(SubInfo): def add_sound(self, url, accent=None): self.sounds.append(Sound(url,accent)) + def has_sounds(self): + return len(self.sounds) > 0 + def serializable(self, prefix = None): snds = [] for s in self.sounds: @@ -365,7 +369,7 @@ class Entry: def add_pronunciation(self, p): if p not in self.pronunciations: - p.set_id(self.get_id()) + p.set_id(self.get_id(), True) #pro often parsed without context self.pronunciations.append(p) def set_senses(self, senses): diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite index b60d5703a8684738f7a25ed782234ade2c97f3c3..242a322cf854b7548f263467d6032ed6a7ffdb17 100644 Binary files a/wikstraktor.sqlite and b/wikstraktor.sqlite differ