diff --git a/parsers/en_en.py b/parsers/en_en.py index a721c9b407e3d2f84ec11cd313b3189eff68ff3a..c52dcfc5a91146d2352e464d2b6f653da7d3b241 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -24,18 +24,16 @@ class En_en_straktor(Wikstraktor): acc = None for j, t in enumerate(templates): if (t.normal_name() == self.constants['t_acc'] and templates[j+1].normal_name()!= self.constants['t_acc']): - acc = t.arguments + for a in t.arguments: + p.set_accent(a.value) elif t.normal_name() == self.constants['t_ipa']: p.set_transcription(t.arguments[1].value) - if acc != None: - for a in acc: - p.set_accent(a.value) elif t.normal_name() == self.constants['t_snd']: p.add_sound(self.get_file_url(t.arguments[1].value), t.arguments[2].value) - if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] : - if p.ipa != None or p.has_accents(): - pronunciations.append(p) - p = Pronunciation() + if p.ipa != None or p.has_accents() or p.has_sounds(): + pronunciations.append(p) + else: + self.log.add_log("En_en_straktor.process_pronunciation", f"“{l.fullitems[i]}†processed as empty → {p}") i += 1 return pronunciations diff --git a/wikstraktor.py b/wikstraktor.py index a7cd883d2dc1396461b7ef959a70350439835662..63e2ea2082094242f7b37b7f439ec07f47f45b4a 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -27,27 +27,28 @@ class SubInfo: cls.next_id = 0 def __init__(self, prefix = None): - self.id = None + self.id = self.__class__.next_id + self.__class__.inc_n_id() + self.label = None self.set_id(prefix) - def set_id(self, prefix): - if self.id == None and prefix != None: - self.id = f"{prefix}_{self.__class__.prfx}{self.__class__.next_id}" - self.__class__.inc_n_id() - return self.id + def set_id(self, prefix, force = False): + if (self.label == None or force) and prefix != None: + self.label = f"{prefix}_{self.__class__.prfx}{self.id}" + return self.label def replace_src_in_id(self, former_src, new_src): ##Attention si on nettoie en mettant des sources partout, il faudra changer res = None - if self.id != None and former_src != None and new_src != None : - self.id = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.id) - res = self.id + if self.label != None and former_src != None and new_src != None : + self.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label) + res = self.label return res def get_src_from_id(self): res = None - if self.id != None: - gp = re.match(r'^[\w\.]+-(\d{1,2})', self.id) + if self.label != None: + gp = re.match(r'^[\w\.]+-(\d{1,2})', self.label) if gp: res = int(gp.group(1)) return res @@ -96,6 +97,9 @@ class Pronunciation(SubInfo): def has_accents(self): return len(self.accents) > 0 + def has_sounds(self): + return len(self.sounds) > 0 + def add_sound(self, url, accent=None): self.sounds.append(Sound(url,accent)) @@ -249,7 +253,6 @@ class Sense(SubInfo): else: theDef = Definition(lang, definition) if theDef != None and theDef not in self.definitions: - print("def set id", self.get_id())## theDef.set_id(self.get_id()) self.definitions.append(theDef) @@ -394,7 +397,7 @@ class Entry: def add_pronunciation(self, p): if p not in self.pronunciations: - p.set_id(self.get_prefix()) + p.set_id(self.get_prefix(), True) #pro often parsed without context self.pronunciations.append(p) def set_senses(self, senses): @@ -835,10 +838,9 @@ if __name__ == "__main__": \033[1m\033[32mex :\033[0m ‣\033[0m\033[32m./wikstraktor.py -m blue\033[0m ‣\033[0m\033[32m./wikstraktor.py -m blue -f blue.json -A -C\033[0m - ‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -A -C\033[0m - ‣\033[0m\033[32m./wikstraktor.py -l en -w fr+en -m particular -f particular.json\033[0m""") - parser.add_argument("-l", "--language", help="la ou les langue(s) du mot (séparées par des “+â€)", type=str, default = "en") - parser.add_argument("-w", "--wiki_language", help="la ou les langue(s) du wiki (séparées par des “+â€)", type=str, default = "en") + ‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -A -C\033[0m""") + parser.add_argument("-l", "--language", help="la langue du mot", type=str, default = "en") + parser.add_argument("-w", "--wiki_language", help="la langue du wiki", type=str, default = "en") parser.add_argument("-m", "--mot", help="le mot à chercher", type=str, default=None) parser.add_argument("-f", "--destination_file", help="le fichier dans lequel stocker le résultat", type=str, default=None) parser.add_argument("-A", "--force_ascii", help="json avec que des caractères ascii", action="store_true") diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite index b60d5703a8684738f7a25ed782234ade2c97f3c3..03e32cc16f9aa765795c30f9feed594c88275095 100644 Binary files a/wikstraktor.sqlite and b/wikstraktor.sqlite differ