Skip to content
Snippets Groups Projects
Commit 339c6f4f authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

begin parsing

parent a999d488
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
import pywikibot as pwb
import wikitextparser as wtp
class Entry:
def __init__(self, lemma):
self.lemma = lemma
def __str__(self):
res = f"{self.lemma} ({self.cat})"
class Entries:
def __init__(self, graphy, languagewiki, languageInWiki):
self.language = languageInWiki
self.site = pwb.Site(f'wiktionary:{languagewiki}')
self.entries = []
page = pwb.Page(self.site, graphy)
if page.text != "":
self.graphy = page.title()
wp_content = wtp.parse(page.text)
self.parse(wp_content)
def parse(self, wp_content):
sections = wp_content.sections
over = False
i = 0
### find language
while i < len(sections) and not over:
over = sections[i].title == self.language
i += 1
### language found i points to the first subsection
if sections[i].title == "Pronunciation" or sections[i].title == "Pronunciation" :
print(sections[i].contents)
def __str__(self):
res = ""
for e in self.entries:
res += f"{e}\n"
return res
if __name__ == "__main__":
site = pwb.Site('wiktionary:en')
page = pwb.Page(site, "test")
print(page.text)
e = Entries("test", 'en', "English")
#print(e)
#Entry("test", wtp.parse(page.text)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment