Skip to content
Snippets Groups Projects
Commit f2c4e375 authored by Enzo Simonnet's avatar Enzo Simonnet
Browse files

Upload New File

parent e892e30c
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
from wikstraktor import Wikstraktor, Pronunciation, Sense
from parsers.fr_constants import string_values
debugEty = 0
class Fr_en_straktor(Wikstraktor):
def __init__(self):
super().__init__()
self.wiki_language = "fr"
self.entry_language = "en"
self.constants = string_values
self.site = self.pwb.Site(f'wiktionary:fr')
def process_pronunciation(self, proContent):
# TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux
l = proContent.get_lists()[0]
i = 0
pronunciations = []
while i < len(l.fullitems):
p = Pronunciation()
templates = self.wtp.parse(l.fullitems[i]).templates
a = None
for j, t in enumerate(templates):
#if t.normal_name() == self.constants['t_acc']:
# p.set_transcription(t.arguments[i+1].value)
if t.normal_name() == self.constants['t_snd']:
p.add_sound(self.get_file_url(t.arguments[-1].value))
j=0
while "=" in t.arguments[j]:
j += 1
p.set_accent(t.arguments[j])
if p.accent != None and p.sounds != []:
pronunciations.append(p)
p = Pronunciation()
i += 1
return pronunciations
def process_etymology(self, etyContent):
global debugEty
debugEty += 1
return "Etymology" + str(debugEty)
def process_senses(self, entry, pos, sensesContent):
baseId = f"{entry}_{pos}_"
l = sensesContent.get_lists(('\\# ', '\\#:','\\## ', '\\##:' ))
i = 0
senses = []
nombreDef = 0
while i < len(l):
if l[i].pattern == '\\# ':
nombreDef += 1
newSense = Sense(f"{baseId}{nombreDef}")
newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
elif l[i].pattern == '\\#:':
for j in l[i].items:
k = 0
isEx = 0
while k < len(self.wtp.parse(j).templates) and isEx == 0 :
if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']):
newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value)
isEx = 1
k += 1
if isEx == 0:
newSense.add_example(self.wtp.parse(j).plain_text().strip())
if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
senses.append(newSense)
cnt = 0
nombreSousDef = 0
while i < len(l) and l[i].level == 3 :
cnt +=1
if l[i].pattern == '\\## ':
nombreSousDef += 1
newSense2 = Sense(f"{baseId}{nombreDef}_{nombreSousDef}")
newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
elif l[i].pattern == '\\##:':
for j in l[i].items:
k = 0
isEx = 0
while k < len(self.wtp.parse(j).templates) and isEx == 0 :
if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']):
newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value)
isEx = 1
k += 1
if isEx == 0:
newSense2.add_example(self.wtp.parse(j).plain_text().strip())
if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
senses.append(newSense2)
i += 1
if cnt > 0:
i -= 1
i += 1
return senses
if __name__ == "__main__":
ensk = Fr_en_straktor()
print(ensk.fetch("test"), "entries added")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment