diff --git a/parsers/en_en.py b/parsers/en_en.py index b49265bf5501819f62322cb786ade97f111b1687..6f741b6330efe2fa741412069fcbfe295658e009 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -18,23 +18,22 @@ class En_en_straktor(Wikstraktor): l = proContent.get_lists()[0] i = 0 pronunciations = [] - while i < len(l.items): + while i < len(l.fullitems): p = Pronunciation() - templates = self.wtp.parse(l.items[i]).templates - if(len(l.sublists(i))>0): - for li in l.sublists(i)[0].items: - for t in self.wtp.parse(li).templates: - templates.append(t) + templates = self.wtp.parse(l.fullitems[i]).templates a = None - for t in templates: - if t.normal_name() == self.constants['t_acc']: + for j, t in enumerate(templates): + if (t.normal_name() == self.constants['t_acc'] and templates[j+1].normal_name()!= self.constants['t_acc']): a = t.arguments[0].value elif t.normal_name() == self.constants['t_ipa']: p.set_transcription(t.arguments[1].value) p.set_accent(a) elif t.normal_name() == self.constants['t_snd']: p.add_sound(self.get_file_url(t.arguments[1].value), a) - pronunciations.append(p) + if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] : + if p.ipa != None or p.accent != None: + pronunciations.append(p) + p = Pronunciation() i += 1 return pronunciations @@ -49,19 +48,18 @@ class En_en_straktor(Wikstraktor): l = sensesContent.get_lists()[0] i = 0 senses = [] - while i < len(l.items): + while i < len(l.fullitems): newSense = Sense(f"{baseId}{i}") - li = self.wtp.parse(l.items[i]) + li = self.wtp.parse(l.fullitems[i]) for t in li.templates: if t.normal_name() == self.constants['t_deflabel']: newSense.set_domain(t.arguments[1].value)#We could use the second parameter for a comment - newSense.add_def(self.wiki_language, li.plain_text().strip()) + newSense.add_def(self.wiki_language, li.plain_text().strip()) # TODO: process examples i += 1 senses.append(newSense) return senses - if __name__ == "__main__": ensk = En_en_straktor() print(ensk.fetch("test"), "entries added")