Skip to content
Snippets Groups Projects
Commit 46ab0653 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

Restructuration factorisation de la pile (ParserContext)

parent a3c5d9a3
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
from wikstraktor import Wikstraktor
from parsers.en_constants import string_values
from pronunciation import Pronunciation
def debugC(c, e):
res = "Context: "
if len(c) == 0 :
res += "0"
else:
res += f"{len(c)}, {c[-1].level*'#'} {c[-1].title}"
res += " / "
if len(e) == 0:
res += "0"
else:
res += f"{len(e)}, {(len(e) - 1)*'='} {e[-1]}"
return res
from parsers.en_constants import string_values
class En_en_straktor(Wikstraktor):
def __init__(self):
......@@ -52,24 +40,6 @@ class En_en_straktor(Wikstraktor):
print(pronunciations[0], pronunciations[1])
return pronunciations
def parse(self, entry, sections):
wiki_context = []
entry_context = [] #todo récupérer les infos section par section et créer des entrées avec les infos des niveaux supérieurs.
for s in sections:
if s.title != None :
#handle wiki context
if len(wiki_context) == 0 or s.level > wiki_context[-1].level:
wiki_context.append(s)
else:
while len(wiki_context)>0 and s.level < wiki_context[-1].level:
wiki_context.pop()
wiki_context[-1] = s
if s.title == self.constants['ipa']:
entry_context.append(self.process_pronunciation(self.wtp.parse(s.contents)))
print(s.level, debugC(wiki_context, entry_context))
print("ok")
if __name__ == "__main__":
ensk = En_en_straktor()
print(ensk.fetch("test"), "entries added")
......@@ -2,14 +2,73 @@
import pywikibot
import wikitextparser
import importlib
from pronunciation import Pronunciation
class Entry:
def __init__(self, lemma):
self.lemma = lemma
def set_pronunciation(self, pron):
if isinstance(pron, Pronunciation):
self.pronunciation = pron
else:
raise ValueError(f"Entry.set_pronunciation: {pron} is not a Pronunciation object ({pron.__class__.__name__}).")
def __str__(self):
res = f"{self.lemma} ({self.cat})"
class ParserContext:
def __init__(self, entry):
self.lemma = entry
self.context = []
def get_level(self):
if len(self.context) == 0:
res = -1
else:
res = self.context[-1]["wiki"].level
return res
def push(self, wiki_context, entry_context=None):
self.context.append({"wiki":wiki_context, "entry_info":entry_context})
def pop(self):
return self.context.pop()
def set_top_wiki(self, wiki_context):
if len(self.context) == 0:
self.push(wiki_context)
else:
self.context[-1]['wiki'] = wiki_context
def set_top_entry_info(self, entry_context):
if len(self.context) == 0:
raise ValueError(f"Trying to set up entry info ({entry_context}), in an empty parserContext.")
else:
self.context[-1]['entry_info'] = entry_context
def create_entry(self):
res = Entry(self.lemma)
for l in self.context:
if l['entry_info'] == None:
pass
elif l['entry_info'].__class__.__name__ == "Pronunciation":
res.set_pronunciation(l['entry_info'])
else:
# TODO: Ajouter les autres types
pass
return res
def debug_top(self):
res = "Context: "
if len(self.context) == 0 :
res += "0"
else:
res += f"{len(self.context)}, {self.context[-1]['wiki'].level*'#'} {self.context[-1]['wiki'].title} / {str(self.context[-1]['entry_info'])}"
return res
class Wikstraktor:
@classmethod
def get_instance(cls, wiki_language, entry_language):
......@@ -21,6 +80,12 @@ class Wikstraktor:
instance = None
return instance
def __init__(self):
self.entries = []
self.pwb = pywikibot
self.wtp = wikitextparser
self.parserContext = None
def get_file_url(self, file_page_name):
res = None
try:
......@@ -30,11 +95,6 @@ class Wikstraktor:
print(f"{file_page_name} does not exist in {self.site}.")
return res
def __init__(self):
self.entries = []
self.pwb = pywikibot
self.wtp = wikitextparser
#retrieves the content of a page and processes it (adding the entries to the list of entries)
#returns the number of entries added
def fetch(self, graphy):
......@@ -54,9 +114,21 @@ class Wikstraktor:
nb_entries_added = self.parse(page.title(), sections[i].sections)#self.wtp.parse(s.contents).sections)
return nb_entries_added
def parse(self):
#handled by subclass
return -1
def parse(self, entry, sections):
self.parserContext = ParserContext(entry)
for s in sections:
if s.title != None :
#handle wiki context
if self.parserContext.get_level() < s.level:
self.parserContext.push(s)
else:
while self.parserContext.get_level() > s.level:
self.parserContext.pop()
self.parserContext.set_top_wiki(s)
if s.title == self.constants['ipa']:
self.parserContext.set_top_entry_info(self.process_pronunciation(self.wtp.parse(s.contents)))
print(self.parserContext.debug_top())
print("ok")
def __str__(self):
res = ""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment