From bd4c7d91cbac6fabf39470cb08e55ca519039002 Mon Sep 17 00:00:00 2001 From: Enzo Simonnet <enzosim@laposte.net> Date: Tue, 25 Oct 2022 14:18:16 +0000 Subject: [PATCH] =?UTF-8?q?def=20+=20exemples=20OK=20exemples=20pas=20imbr?= =?UTF-8?q?iqu=C3=A9s=20comme=20il=20faut=20N=C2=B0=20des=20defs=20pas=20b?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- parsers/en_en.py | 58 +++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/parsers/en_en.py b/parsers/en_en.py index 32c9392..8271662 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -44,34 +44,42 @@ class En_en_straktor(Wikstraktor): def process_senses(self, entry, pos, sensesContent): baseId = f"{entry}_{pos}_" - #here we don't look at - l = sensesContent.get_lists()[0] + l = sensesContent.get_lists(('\\# ', '\\#:','\\## ', '\\##:' )) i = 0 senses = [] - while i < len(l.fullitems): - newSense = Sense(f"{baseId}{i}") - li = self.wtp.parse(l.fullitems[i]) - j = 0 - while j < len(li.templates) and li.templates[j].normal_name() != self.constants['t_deflabel']: - j += 1 - if j < len(li.templates): - newSense.set_domain(li.templates[j].arguments[-1].value)#We could use the second parameter for a comment - newSense.add_def(self.wiki_language, self.wtp.parse(li.get_lists()[0].items[0]).plain_text().strip()) - while j < len(li.templates)-1 and li.templates[j+1].normal_name() == self.constants['t_ex']: - newSense.add_example(li.templates[j+1].arguments[1].value) - j += 1 + while i < len(l): + if l[i].pattern == '\\# ': + newSense = Sense(f"{baseId}{i}") + if len(l[i].templates) > 0: + newSense.set_domain(l[i].templates[-1].arguments[-1].value)#We could use the second parameter for a comment + newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) + elif l[i].pattern == '\\#:': + for j in l[i].items: + if len(self.wtp.parse(j).templates) > 0: + newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) + else: + newSense.add_example(self.wtp.parse(j).plain_text().strip()) + if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ': senses.append(newSense) - if len(li.get_lists(pattern = '##')) > 0 : - for cnt, k in enumerate (li.get_lists(pattern = '##')[0].items): - if self.wtp.parse(k).templates[0].normal_name() == self.constants['t_deflabel']: - newSense2 = Sense(f"{baseId}{i}{cnt}") - newSense2.set_domain(self.wtp.parse(k).templates[0].arguments[-1].value)#We could use the second parameter for a comment - newSense2.add_def(self.wiki_language, self.wtp.parse(k).plain_text().strip()) - for a in self.wtp.parse(li.get_lists(pattern = '##')[0].fullitems[cnt]).templates: - if a.normal_name() == self.constants['t_ex']: - newSense2.add_example(a.arguments[-1].value) - senses.append(newSense2) - # TODO: process examples + cnt = 0 + while l[i].level == 3 and i < len(l) : + cnt +=1 + if l[i].pattern == '\\## ': + newSense2 = Sense(f"{baseId}{i}{cnt}") + if len(l[i].templates) > 0: + newSense2.set_domain(l[i].templates[-1].arguments[-1].value)#We could use the second parameter for a comment + newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) + elif l[i].pattern == '\\##:': + for j in l[i].items: + if len(self.wtp.parse(j).templates) > 0: + newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) + else: + newSense2.add_example(self.wtp.parse(j).plain_text().strip()) + if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ': + senses.append(newSense2) + i += 1 + if cnt > 0: + i -= 1 i += 1 return senses -- GitLab