Skip to content
Snippets Groups Projects
Commit b6546143 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

could be useful for debugging purposes/understanding wtp.sections

parent f1d71a8f
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3 #!/usr/bin/env python3
from wikstraktor import Wikstraktor from wikstraktor import Wikstraktor
#from en_constants import string_values from parsers.en_constants import string_values
def debugC(c):
res = "Context: "
if len(c) == 0 :
res += "0"
else:
res += f"{len(c)}, {c[-1].level*'#'} {c[-1].title}"
return res
class En_en_straktor(Wikstraktor): class En_en_straktor(Wikstraktor):
def __init__(self): def __init__(self):
super().__init__()
self.wiki_language = "en" self.wiki_language = "en"
self.entry_language = "en" self.entry_language = "en"
#self.constants = string_values self.constants = string_values
#self.site = pwb.Site(f'wiktionary:{self.wiki_language}') self.site = self.pwb.Site(f'wiktionary:{self.wiki_language}')
def parse(self, wp_content): def parse(self, entry, sections):
#sections = wtp.parse(wp_content).sections context = []
#print(sections) for s in sections:
if s.title != None :
if len(context) == 0 or s.level > context[-1].level:
context.append(s)
else:
while len(context)>0 and s.level < context[-1].level:
context.pop()
context[-1] = s
print(s.level, debugC(context))
print("ok") print("ok")
if __name__ == "__main__":
ensk = En_en_straktor()
print(ensk.fetch("test"), "entries added")
#!/usr/bin/env python3 #!/usr/bin/env python3
import pywikibot as pwb import pywikibot
import wikitextparser as wtp import wikitextparser
import importlib import importlib
class Entry: class Entry:
...@@ -23,22 +23,26 @@ class Wikstraktor: ...@@ -23,22 +23,26 @@ class Wikstraktor:
def __init__(self): def __init__(self):
self.entries = [] self.entries = []
self.pwb = pywikibot
self.wtp = wikitextparser
#retrieves the content of a page and processes it (adding the entries to the list of entries) #retrieves the content of a page and processes it (adding the entries to the list of entries)
#returns the number of entries added #returns the number of entries added
def fetch(self, graphy): def fetch(self, graphy):
nb_entries_added = 0 nb_entries_added = 0
# page = pwb.Page(self.site, graphy) page = self.pwb.Page(self.site, graphy)
# if page.text != "": to_parse = []
# sections = wtp.parse(page.text).sections if page.text != "":
# found = False sections = self.wtp.parse(page.text).sections
# i = 0 found = False
# ### find language i = 0
# while i < len(sections) and not found: ### find language
# found = sections[i].title.capitalize() == self.constants[self.entry_language] while i < len(sections) and not found:
# i += 1 found = sections[i].title != None and sections[i].title.capitalize() == self.constants[self.entry_language]
# if i < len(sections) and found: if not found:
# nb_entries_added = self.parse(page.title(), sections[i]) i += 1
if found:
nb_entries_added = self.parse(page.title(), sections[i].sections)#self.wtp.parse(s.contents).sections)
return nb_entries_added return nb_entries_added
def parse(self): def parse(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment