diff --git a/parsers/en_en.py b/parsers/en_en.py index a19e7a97ecb7a3ae74028931d924a10d477e05a0..bb8ba7a9668f2f48892311d01055e67f52fb2387 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -17,7 +17,7 @@ class En_en_straktor(Wikstraktor): self.wiki_language = "en" self.entry_language = "en" self.constants = string_values - self.site = self.pwb.Site(f'wiktionary:{self.wiki_language}') + self.site = self.pwb.Site(f'wiktionary:en') def process_pronunciation(self, proContent): l = proContent.get_lists()[0] @@ -36,7 +36,7 @@ class En_en_straktor(Wikstraktor): p.set_transcription(t.arguments[1].value) print(t, t.arguments, t.arguments[0].value) elif t.normal_name() == self.constants['t_snd']: - p.add_sound(t.arguments[1].value) + p.add_sound(self.get_file_url(t.arguments[1].value)) print(t, t.arguments, t.arguments[1].value) pronunciations.append(p) i += 1 diff --git a/pronunciation.py b/pronunciation.py index 11c6051dceef735f95f97f4d1538e1a8d4940da9..42b3796f5a476b6e320ae585d5e7b56ca042de9a 100644 --- a/pronunciation.py +++ b/pronunciation.py @@ -1,17 +1,36 @@ #!/usr/bin/env python3 +class Sound: + def __init__(self, url, accent): + self.url = url + self.accent = accent + + def serializable(self): + if self.accent == None: + res = {"url":self.url} + else: + res = {"accent":self.accent, "url":self.url} + return res + class Pronunciation: def __init__(self): self.ipa = None self.sounds = [] + self.accent = None def set_transcription(self, tscpt): self.ipa = tscpt - def add_sound(self, url): - self.sounds.append(url) + def set_accent(self, accent): + self.accent = accent + + def add_sound(self, url, accent=None): + self.sounds.append(Sound(url,accent)) def serializable(self): - return {"transcript":self.ipa, "sounds":self.sounds} + snds = [] + for s in self.sounds: + snds.append(s.serializable()) + return {"transcript":self.ipa, "sounds":snds} def __str__(self): return f"{self.serializable()}" diff --git a/wikstraktor.py b/wikstraktor.py index 0fad41a14030dc66ad9540355900cc4dd19350d0..972a51e5eea558ea659756984eb78cc3e0a95bd0 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -21,6 +21,15 @@ class Wikstraktor: instance = None return instance + def get_file_url(self, file_page_name): + res = None + try: + f = self.pwb.FilePage(self.site, file_page_name) + res = f.get_file_url() + except pywikibot.exceptions.NoPageError: + print(f"{file_page_name} does not exist in {self.site}.") + return res + def __init__(self): self.entries = [] self.pwb = pywikibot @@ -58,6 +67,15 @@ class Wikstraktor: if __name__ == "__main__": e = Wikstraktor.get_instance('en', "en") + print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav")) + print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav")) print(e.fetch("test"), "entries added") + # site = pywikibot.Site(f'wiktionary:en') + # p = pywikibot.FilePage(site, "File:LL-Q1860 (eng)-Nattes à chat----parent.wav") + # print(p) + # if not p.exists(): + # site = pywikibot.Site('commons') + # p = pywikibot.FilePage(site, "File:LL-Q1860 (eng)-Nattes à chat-parent.wav") + # print(p.get_file_url()) #print(e) #Entry("test", wtp.parse(page.text)))