diff --git a/parsers/en_en.py b/parsers/en_en.py index 32c9392b524a058203bc25b468ac124b7000b0ea..8271662ba6915554ac43f1c8fa01bcaeebe4dacc 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -44,34 +44,42 @@ class En_en_straktor(Wikstraktor): def process_senses(self, entry, pos, sensesContent): baseId = f"{entry}_{pos}_" - #here we don't look at - l = sensesContent.get_lists()[0] + l = sensesContent.get_lists(('\\# ', '\\#:','\\## ', '\\##:' )) i = 0 senses = [] - while i < len(l.fullitems): - newSense = Sense(f"{baseId}{i}") - li = self.wtp.parse(l.fullitems[i]) - j = 0 - while j < len(li.templates) and li.templates[j].normal_name() != self.constants['t_deflabel']: - j += 1 - if j < len(li.templates): - newSense.set_domain(li.templates[j].arguments[-1].value)#We could use the second parameter for a comment - newSense.add_def(self.wiki_language, self.wtp.parse(li.get_lists()[0].items[0]).plain_text().strip()) - while j < len(li.templates)-1 and li.templates[j+1].normal_name() == self.constants['t_ex']: - newSense.add_example(li.templates[j+1].arguments[1].value) - j += 1 + while i < len(l): + if l[i].pattern == '\\# ': + newSense = Sense(f"{baseId}{i}") + if len(l[i].templates) > 0: + newSense.set_domain(l[i].templates[-1].arguments[-1].value)#We could use the second parameter for a comment + newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) + elif l[i].pattern == '\\#:': + for j in l[i].items: + if len(self.wtp.parse(j).templates) > 0: + newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) + else: + newSense.add_example(self.wtp.parse(j).plain_text().strip()) + if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ': senses.append(newSense) - if len(li.get_lists(pattern = '##')) > 0 : - for cnt, k in enumerate (li.get_lists(pattern = '##')[0].items): - if self.wtp.parse(k).templates[0].normal_name() == self.constants['t_deflabel']: - newSense2 = Sense(f"{baseId}{i}{cnt}") - newSense2.set_domain(self.wtp.parse(k).templates[0].arguments[-1].value)#We could use the second parameter for a comment - newSense2.add_def(self.wiki_language, self.wtp.parse(k).plain_text().strip()) - for a in self.wtp.parse(li.get_lists(pattern = '##')[0].fullitems[cnt]).templates: - if a.normal_name() == self.constants['t_ex']: - newSense2.add_example(a.arguments[-1].value) - senses.append(newSense2) - # TODO: process examples + cnt = 0 + while l[i].level == 3 and i < len(l) : + cnt +=1 + if l[i].pattern == '\\## ': + newSense2 = Sense(f"{baseId}{i}{cnt}") + if len(l[i].templates) > 0: + newSense2.set_domain(l[i].templates[-1].arguments[-1].value)#We could use the second parameter for a comment + newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) + elif l[i].pattern == '\\##:': + for j in l[i].items: + if len(self.wtp.parse(j).templates) > 0: + newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) + else: + newSense2.add_example(self.wtp.parse(j).plain_text().strip()) + if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ': + senses.append(newSense2) + i += 1 + if cnt > 0: + i -= 1 i += 1 return senses