Skip to content
Snippets Groups Projects
Commit dbf61662 authored by Enzo Simonnet's avatar Enzo Simonnet
Browse files

def + exemples ok

parent 613ce653
No related branches found
No related tags found
No related merge requests found
string_values = { #!/usr/bin/env python3
"ety":"Etymology", from wikstraktor import Wikstraktor, Pronunciation, Sense
"pro":"Pronunciation",
"en":"English", from parsers.en_constants import string_values
"fr":"French",
"t_ipa":"IPA", #template for transcription debugEty = 0
"t_snd":"audio", #template for audio
"t_acc":"a", #template for accents class En_en_straktor(Wikstraktor):
"t_deflabel":"lb", def __init__(self):
"POS": { #https://en.wiktionary.org/wiki/Wiktionary:POS super().__init__()
"Adjective":"Adj", self.wiki_language = "en"
"Adverb":"Adv", self.entry_language = "en"
"Ambiposition":"Ambip", self.constants = string_values
"Article":"Art", self.site = self.pwb.Site(f'wiktionary:en')
"Circumposition":"Circump",
"Classifier":"Class", def process_pronunciation(self, proContent):
"Conjunction":"Conj", # TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux
"Contraction":"Cont", l = proContent.get_lists()[0]
"Counter":"Count", i = 0
"Determiner":"Det", pronunciations = []
"Ideophone":"Ideophone", while i < len(l.fullitems):
"Interjection":"Interj", p = Pronunciation()
"Noun":"N", templates = self.wtp.parse(l.fullitems[i]).templates
"Numeral":"Num", a = None
"Participle":"Part", for j, t in enumerate(templates):
"Particle":"Particle", if (t.normal_name() == self.constants['t_acc'] and templates[j+1].normal_name()!= self.constants['t_acc']):
"Postposition":"Postp", a = t.arguments[0].value
"Preposition":"Prep", elif t.normal_name() == self.constants['t_ipa']:
"Pronoun":"Pro", p.set_transcription(t.arguments[1].value)
"Proper noun":"NP", p.set_accent(a)
"Verb":"V" # TODO: compléter elif t.normal_name() == self.constants['t_snd']:
} p.add_sound(self.get_file_url(t.arguments[1].value), a)
} if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] :
if p.ipa != None or p.accent != None:
pronunciations.append(p)
p = Pronunciation()
i += 1
return pronunciations
def process_etymology(self, etyContent):
global debugEty
debugEty += 1
return "Etymology" + str(debugEty)
def process_senses(self, entry, pos, sensesContent):
baseId = f"{entry}_{pos}_"
#here we don't look at
l = sensesContent.get_lists()[0]
i = 0
senses = []
while i < len(l.fullitems):
newSense = Sense(f"{baseId}{i}")
li = self.wtp.parse(l.fullitems[i])
j = 0
while j < len(li.templates) and li.templates[j].normal_name() != self.constants['t_deflabel']:
j += 1
if j < len(li.templates):
newSense.set_domain(li.templates[j].arguments[-1].value)#We could use the second parameter for a comment
newSense.add_def(self.wiki_language, self.wtp.parse(li.get_lists()[0].items[0]).plain_text().strip())
while j < len(li.templates)-1 and li.templates[j+1].normal_name() == self.constants['t_ex']:
newSense.add_example(li.templates[j+1].arguments[1].value)
j += 1
senses.append(newSense)
if len(li.get_lists(pattern = '##')) > 0 :
for cnt, k in enumerate (li.get_lists(pattern = '##')[0].items):
if self.wtp.parse(k).templates[0].normal_name() == self.constants['t_deflabel']:
newSense2 = Sense(f"{baseId}{i}{cnt}")
newSense2.set_domain(self.wtp.parse(k).templates[0].arguments[-1].value)#We could use the second parameter for a comment
newSense2.add_def(self.wiki_language, self.wtp.parse(k).plain_text().strip())
for a in self.wtp.parse(li.get_lists(pattern = '##')[0].fullitems[cnt]).templates:
if a.normal_name() == self.constants['t_ex']:
newSense2.add_example(a.arguments[-1].value)
senses.append(newSense2)
# TODO: process examples
i += 1
return senses
if __name__ == "__main__":
ensk = En_en_straktor()
print(ensk.fetch("test"), "entries added")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment