diff --git a/parsers/en_constants.py b/parsers/en_constants.py index 9e2dcf233f665996487db9b55fe7a06c7875dbd1..4b5402f83de183d38cdafb22a779a4ab5cae973f 100644 --- a/parsers/en_constants.py +++ b/parsers/en_constants.py @@ -1,9 +1,32 @@ string_values = { "ety":"Etymology", - "ipa":"Pronunciation", + "pro":"Pronunciation", "en":"English", "fr":"French", "t_ipa":"IPA", #template for transcription "t_snd":"audio", #template for audio - "t_acc":"a" #template for accents + "t_acc":"a", #template for accents + "POS": { #https://en.wiktionary.org/wiki/Wiktionary:POS + "Adjective":"Adj", + "Adverb":"Adv", + "Ambiposition":"Ambip", + "Article":"Art", + "Circumposition":"Circump", + "Classifier":"Class", + "Conjunction":"Conj", + "Contraction":"Cont", + "Counter":"Count", + "Determiner":"Det", + "Ideophone":"Ideophone", + "Interjection":"Interj", + "Noun":"N", + "Numeral":"Num", + "Participle":"Part", + "Particle":"Particle", + "Postposition":"Postp", + "Preposition":"Prep", + "Pronoun":"Pro", + "Proper noun":"NP", + "Verb":"V" # TODO: compléter + } } diff --git a/parsers/en_en.py b/parsers/en_en.py index 1c9f2d8085cf4219580b29065f0f7bbab5d4f92b..785b470f9a5c789b9e560a5ea7261566dc9eb874 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -4,6 +4,8 @@ from pronunciation import Pronunciation from parsers.en_constants import string_values +debugEty = 0 + class En_en_straktor(Wikstraktor): def __init__(self): super().__init__() @@ -40,6 +42,15 @@ class En_en_straktor(Wikstraktor): print(pronunciations[0], pronunciations[1]) return pronunciations + def process_etymology(self, etyContent): + global debugEty + debugEty += 1 + return "Etymology" + str(debugEty) + + def process_senses(self, sensesContent): + import random as r + return "Cool"+r.choice(['a', 'b', 'c', 'd', 'e', 'f', 'g']) + if __name__ == "__main__": ensk = En_en_straktor() print(ensk.fetch("test"), "entries added") diff --git a/wikstraktor.py b/wikstraktor.py index 7c831be0200a49f59f507f97e0126fbd43f3ab6a..6303389a652c67a26471e94459cfa7fe694cbe0c 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -9,14 +9,20 @@ class Entry: def __init__(self, lemma): self.lemma = lemma - def set_pronunciation(self, pron): + def set_pronunciations(self, pron): if isinstance(pron, Pronunciation): - self.pronunciation = pron + self.pronunciations = pron else: raise ValueError(f"Entry.set_pronunciation: {pron} is not a Pronunciation object ({pron.__class__.__name__}).") + def set_POS(self, pos): + self.pos = pos + def __str__(self): - res = f"{self.lemma} ({self.cat})" + res = f"{self.lemma} ({self.pos})\n" + for p in self.pronunciations: + res += f"{str(p)}\n" + return res class ParserContext: def __init__(self, entry): @@ -30,8 +36,8 @@ class ParserContext: res = self.context[-1]["wiki"].level return res - def push(self, wiki_context, entry_context=None): - self.context.append({"wiki":wiki_context, "entry_info":entry_context}) + def push(self, wiki_context): + self.context.append({"wiki":wiki_context}) def pop(self): return self.context.pop() @@ -42,22 +48,22 @@ class ParserContext: else: self.context[-1]['wiki'] = wiki_context - def set_top_entry_info(self, entry_context): + def set_top_entry_info(self, key, entry_context): if len(self.context) == 0: raise ValueError(f"Trying to set up entry info ({entry_context}), in an empty parserContext.") else: - self.context[-1]['entry_info'] = entry_context + self.context[-1][key] = entry_context def create_entry(self): res = Entry(self.lemma) for l in self.context: - if l['entry_info'] == None: - pass - elif l['entry_info'].__class__.__name__ == "Pronunciation": - res.set_pronunciation(l['entry_info']) - else: - # TODO: Ajouter les autres types - pass + if l['pro'] != None: + res.set_pronunciations(l['entry_info']) + if l['ety'] != None: + pass #On ignore l'étymologie pour le moment + if l['POS'] != None: + res.set_pos(l['POS']) + # TODO: Ajouter les autres types return res def debug_top(self): @@ -65,7 +71,13 @@ class ParserContext: if len(self.context) == 0 : res += "0" else: - res += f"{len(self.context)}, {self.context[-1]['wiki'].level*'#'} {self.context[-1]['wiki'].title} / {str(self.context[-1]['entry_info'])}" + info = "" + for k,v in self.context[-1].items(): + if k != 'wiki': + if info != "": + info += "\n\t\t\t" + info += f"{k} → {str(v)}" + res += f"{len(self.context)*'='} {self.context[-1]['wiki'].level*'#'} {self.context[-1]['wiki'].title} / {info}" return res @@ -125,8 +137,13 @@ class Wikstraktor: while self.parserContext.get_level() > s.level: self.parserContext.pop() self.parserContext.set_top_wiki(s) - if s.title == self.constants['ipa']: - self.parserContext.set_top_entry_info(self.process_pronunciation(self.wtp.parse(s.contents))) + if s.title == self.constants['pro']: + self.parserContext.set_top_entry_info('pro', self.process_pronunciation(self.wtp.parse(s.contents))) + elif self.constants['ety'] in s.title: + self.parserContext.set_top_entry_info('ety', self.process_etymology(self.wtp.parse(s.contents))) + elif s.title in self.constants['POS'].keys(): + self.parserContext.set_top_entry_info('POS', self.constants['POS'][s.title]) + self.parserContext.set_top_entry_info('senses', self.process_senses(self.wtp.parse(s.contents))) print(self.parserContext.debug_top()) print("ok") @@ -139,8 +156,8 @@ class Wikstraktor: if __name__ == "__main__": e = Wikstraktor.get_instance('en', "en") - print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav")) - print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav")) + # print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav")) + # print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav")) print(e.fetch("test"), "entries added") # site = pywikibot.Site(f'wiktionary:en') # p = pywikibot.FilePage(site, "File:LL-Q1860 (eng)-Nattes à chat----parent.wav")