From eaf3a4e857928eb9aaf018465f17c83845606735 Mon Sep 17 00:00:00 2001 From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr> Date: Thu, 11 Jan 2024 11:45:31 +0100 Subject: [PATCH] {{audio|en|en-au-platypus.ogg}} sans 3e param --- parsers/en_en.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/parsers/en_en.py b/parsers/en_en.py index 960944a..febb56a 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -2,6 +2,7 @@ from wikstraktor import Wikstraktor, Pronunciation, Sense, SubSense, Definition from parsers.en_constants import string_values +import re debugEty = 0 @@ -13,6 +14,19 @@ class En_en_straktor(Wikstraktor): self.constants = string_values self.site = self.pwb.Site(f'wiktionary:en') + def process_audio_accent(self, audio, file_name = False): + if file_name: #on traite un nom de fichier + match = re.search(r'(e|E)(n|N)-(\w\w)-(.*)\.ogg', audio) + if match: + res = match.group(3).upper() + else: + res = None + else: + res = re.sub(r'((a|A)udio ?\(?)|\)?','',audio) + if res == "": + res = None + return res + def process_pronunciation(self, proContent): # TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux l = proContent.get_lists()[0] @@ -29,7 +43,11 @@ class En_en_straktor(Wikstraktor): elif t.normal_name() == self.constants['t_ipa']: p.set_transcription(t.arguments[1].value) elif t.normal_name() == self.constants['t_snd']: - p.add_sound(self.get_file_url(t.arguments[1].value), t.arguments[2].value) + if len(t.arguments) > 2: + acc = self.process_audio_accent(t.arguments[2].value) + if acc == None: + acc = self.process_audio_accent(t.arguments[1].value, True) + p.add_sound(self.get_file_url(t.arguments[1].value), acc) if p.ipa != None or p.has_accents() or p.has_sounds(): pronunciations.append(p) else: -- GitLab