diff --git a/parsers/en_en.py b/parsers/en_en.py index feedf89b80cb811cf07d3ba655dcd3df5da4f700..b31790825d55566011c8c7ed01217b2b85123b81 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -41,6 +41,12 @@ class En_en_straktor(Wikstraktor): global debugEty debugEty += 1 return "Etymology" + str(debugEty) + + def process_POS(self,parsedwikitext): + pos = None + if parsedwikitext in self.constants['POS'].keys(): + pos = self.constants['POS'][parsedwikitext] + return pos def process_senses(self, entry, pos, sensesContent): baseId = f"{entry}_{pos}_" @@ -52,32 +58,38 @@ class En_en_straktor(Wikstraktor): if l[i].pattern == '\\# ': nombreDef += 1 newSense = Sense(f"{baseId}{nombreDef}") - if len(l[i].templates) > 0: - newSense.set_domain(l[i].templates[-1].arguments[-1].value)#We could use the second parameter for a comment newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) elif l[i].pattern == '\\#:': for j in l[i].items: - if len(self.wtp.parse(j).templates) > 0: - newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) - else: + k = 0 + isEx = 0 + while k < len(self.wtp.parse(j).templates) and isEx == 0 : + if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']): + newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) + isEx = 1 + k += 1 + if isEx == 0: newSense.add_example(self.wtp.parse(j).plain_text().strip()) if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ': senses.append(newSense) cnt = 0 nombreSousDef = 0 - while l[i].level == 3 and i < len(l) : + while i < len(l) and l[i].level == 3 : cnt +=1 if l[i].pattern == '\\## ': nombreSousDef += 1 - newSense2 = Sense(f"{baseId}{nombreDef}{nombreSousDef}") - if len(l[i].templates) > 0: - newSense2.set_domain(l[i].templates[-1].arguments[-1].value)#We could use the second parameter for a comment + newSense2 = Sense(f"{baseId}{nombreDef}_{nombreSousDef}") newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) elif l[i].pattern == '\\##:': for j in l[i].items: - if len(self.wtp.parse(j).templates) > 0: - newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) - else: + k = 0 + isEx = 0 + while k < len(self.wtp.parse(j).templates) and isEx == 0 : + if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']): + newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) + isEx = 1 + k += 1 + if isEx == 0: newSense2.add_example(self.wtp.parse(j).plain_text().strip()) if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ': senses.append(newSense2)