Skip to content
Snippets Groups Projects
Commit c8795383 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

Mutualisation en_en/fr_en

parent da8f8f5c
No related branches found
No related tags found
No related merge requests found
...@@ -8,6 +8,9 @@ string_values = { ...@@ -8,6 +8,9 @@ string_values = {
"t_acc":"a", #template for accents "t_acc":"a", #template for accents
"t_deflabel":"lb", "t_deflabel":"lb",
"t_ex":["ux", "usex"], "t_ex":["ux", "usex"],
"sense_pattern":[ ## structure(s) for sense patterns add_subdef is to be added to def patterns
{"def":"\\#", "ex":"\\#:", "add_subdef":"\\#"}
],
"POS": { #https://en.wiktionary.org/wiki/Wiktionary:POS "POS": { #https://en.wiktionary.org/wiki/Wiktionary:POS
"Adjective":"Adj", "Adjective":"Adj",
"Adverb":"Adv", "Adverb":"Adv",
......
...@@ -48,54 +48,6 @@ class En_en_straktor(Wikstraktor): ...@@ -48,54 +48,6 @@ class En_en_straktor(Wikstraktor):
pos = self.constants['POS'][parsedwikitext] pos = self.constants['POS'][parsedwikitext]
return pos return pos
def process_senses(self, sensesContent):
l = sensesContent.get_lists(('\\# ', '\\#:','\\## ', '\\##:' ))
i = 0
senses = []
while i < len(l):
if l[i].pattern == '\\# ':
theDef = self.wtp.parse(l[i].items[0]).plain_text().strip()
if theDef != "":
newSense = Sense(self.entry_language, theDef, self.wiki_language)
#newSence.add_translation()
elif l[i].pattern == '\\#:':
for j in l[i].items:
k = 0
isEx = 0
while k < len(self.wtp.parse(j).templates) and isEx == 0 :
if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']):
newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value)
isEx = 1
k += 1
if isEx == 0:
newSense.add_example(self.wtp.parse(j).plain_text().strip())
if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
senses.append(newSense)
cnt = 0
while i < len(l) and l[i].level == 3 :
cnt +=1
if l[i].pattern == '\\## ':
newSense2 = SubSense(self.entry_language, self.wtp.parse(l[i].items[0]).plain_text().strip(), self.wiki_language)
#newSense2.add_translation()
elif l[i].pattern == '\\##:':
for j in l[i].items:
k = 0
isEx = 0
while k < len(self.wtp.parse(j).templates) and isEx == 0 :
if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']):
newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value)
isEx = 1
k += 1
if isEx == 0:
newSense2.add_example(self.wtp.parse(j).plain_text().strip())
if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
newSense.add_subsense(newSense2)
i += 1
if cnt > 0:
i -= 1
i += 1
return senses
if __name__ == "__main__": if __name__ == "__main__":
ensk = En_en_straktor() ensk = En_en_straktor()
print(ensk.fetch("test"), "entries added") print(ensk.fetch("test"), "entries added")
...@@ -55,57 +55,6 @@ class Fr_en_straktor(Wikstraktor): ...@@ -55,57 +55,6 @@ class Fr_en_straktor(Wikstraktor):
ik += 1 ik += 1
return pos return pos
def process_example(self, example_wiki_text):
k = 0
isEx = 0
res = None
#process templates
while k < len(self.wtp.parse(example_wiki_text).templates) and isEx == 0 :
if (self.wtp.parse(example_wiki_text).templates[k].normal_name() in self.constants['t_ex']):
res = self.wtp.parse(example_wiki_text).templates[0].arguments[-1].value
isEx = 1
k += 1
if isEx == 0:
res = self.wtp.parse(example_wiki_text).plain_text().strip()
return res
def process_definition(self, definition, sub_items, def_level = True):
if def_level:
newSense = Sense(self.entry_language, self.wtp.parse(definition).plain_text().strip(), self.wiki_language)
pattern_ex = self.constants['sense_pattern'][0]["ex"]
pattern_subdef = self.constants['sense_pattern'][0]["add_subdef"] + self.constants['sense_pattern'][0]["def"]
else:
newSense = SubSense(self.entry_language, self.wtp.parse(item).plain_text().strip(), self.wiki_language)
pattern_subdef = None
pattern_ex = self.constants['sense_pattern'][0]["add_subdef"] + self.constants['sense_pattern'][0]["ex"]
#Process examples
a = 0
#print(newSense, sub_items)# DEBUG:
for item_list in sub_items:
if item_list.pattern == pattern_ex:
for item in item_list.items:
newSense.add_example(self.process_example(item))
#Si on veut traiter les sous items (ex traductions), on peut utiliser
#item_list.sublists(a)
if def_level and item_list.pattern == pattern_subdef:
for item in item_list.items:
newSense.add_subsense(self.process_definition(item, item_list.sublists(a), False))
a += 1
return newSense
def process_senses(self, sensesContent):
l = sensesContent.get_lists((self.constants['sense_pattern'][0]["def"]))
senses = []
if len(l) > 1:
self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nmore than one sense list, make sure we don't forget anything\nignored lists : \n{l[1:]}\n===================")
l = l[0] #l now contains a list of list items
if l.pattern == self.constants['sense_pattern'][0]["def"]:
i = 0
for item in l.items:
senses.append(self.process_definition(item, l.sublists(i)))
i += 1
return senses
if __name__ == "__main__": if __name__ == "__main__":
ensk = Fr_en_straktor() ensk = Fr_en_straktor()
print(ensk.fetch("test"), "entries added") print(ensk.fetch("test"), "entries added")
...@@ -572,14 +572,58 @@ class Wikstraktor: ...@@ -572,14 +572,58 @@ class Wikstraktor:
def process_etymology(self, parsedwikitext): def process_etymology(self, parsedwikitext):
pass#in subclass pass#in subclass
#can be overloaded
def process_example(self, example_wiki_text): def process_example(self, example_wiki_text):
pass#in subclass k = 0
isEx = 0
res = None
#process templates
while k < len(self.wtp.parse(example_wiki_text).templates) and isEx == 0 :
if (self.wtp.parse(example_wiki_text).templates[k].normal_name() in self.constants['t_ex']):
res = self.wtp.parse(example_wiki_text).templates[0].arguments[-1].value
isEx = 1
k += 1
if isEx == 0:
res = self.wtp.parse(example_wiki_text).plain_text().strip()
return res
#can be overloaded
def process_definition(self, definition, sub_items, def_level = True): def process_definition(self, definition, sub_items, def_level = True):
pass#in subclass if def_level:
newSense = Sense(self.entry_language, self.wtp.parse(definition).plain_text().strip(), self.wiki_language)
def process_senses(self, parsedwikitext): pattern_ex = self.constants['sense_pattern'][0]["ex"]
pass#in subclass pattern_subdef = self.constants['sense_pattern'][0]["add_subdef"] + self.constants['sense_pattern'][0]["def"]
else:
newSense = SubSense(self.entry_language, self.wtp.parse(definition).plain_text().strip(), self.wiki_language)
pattern_subdef = None
pattern_ex = self.constants['sense_pattern'][0]["add_subdef"] + self.constants['sense_pattern'][0]["ex"]
#Process examples
a = 0
#print(newSense, sub_items)# DEBUG:
for item_list in sub_items:
if item_list.pattern == pattern_ex:
for item in item_list.items:
newSense.add_example(self.process_example(item))
#Si on veut traiter les sous items (ex traductions), on peut utiliser
#item_list.sublists(a)
if def_level and item_list.pattern == pattern_subdef:
for item in item_list.items:
newSense.add_subsense(self.process_definition(item, item_list.sublists(a), False))
a += 1
return newSense
def process_senses(self, sensesContent):
l = sensesContent.get_lists((self.constants['sense_pattern'][0]["def"]))
senses = []
if len(l) > 1:
self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nmore than one sense list, make sure we don't forget anything\nignored lists : \n{l[1:]}\n===================")
l = l[0] #l now contains a list of list items
if l.pattern == self.constants['sense_pattern'][0]["def"]:
i = 0
for item in l.items:
senses.append(self.process_definition(item, l.sublists(i)))
i += 1
return senses
def __str__(self): def __str__(self):
return self.export() return self.export()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment