Skip to content
Snippets Groups Projects
Commit 7f220e33 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

see also redirections + cas d'un mot EN qui n'existe qu'en majuscules

parent 2763bfd2
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,7 @@ string_values = {
"t_infl":{"past participle of":1, "infl of":1},
"t_ex":["ux", "usex"],
"t_lbl":["lb","lbl", "label"], #template for labels
"t_sa":["also","see also"],
"regions":{
"UK":"United Kingdom",
"United Kingdom":"United Kingdom",
......
......@@ -119,8 +119,18 @@ class En_en_straktor(Wikstraktor):
pos = self.constants['POS'][parsedwikitext]
return pos
def check_see_also(self, parsedwikitext):
templates = parsedwikitext.templates
for t in templates:
if t.normal_name() in self.constants['t_sa']:
for p in t.arguments:
self.add_redirect(p.value)
def fetch(self, graphy, follow_redirections):
return super().fetch(graphy.lower(), follow_redirections)
res = super().fetch(graphy.lower(), follow_redirections)
if res == 0:
res = super().fetch(graphy, follow_redirections)
return res
if __name__ == "__main__":
ensk = En_en_straktor()
......
......@@ -12,6 +12,7 @@ string_values = {
"t_ipa":"pron", #template for transcription
"t_snd":"écouter", #template for audio
"t_acc":["US", "UK"], #template for accents (inutile utilise régions)
"t_sa":["voir"],
"regions":{
"UK":"Royaume-Uni",
"United Kingdom":"Royaume-Uni",
......
......@@ -61,6 +61,15 @@ class Fr_en_straktor(Wikstraktor):
ik += 1
return pos
def check_see_also(self, parsedwikitext):
templates = parsedwikitext.templates
for t in templates:
if t.normal_name() in self.constants['t_sa']:
for p in t.arguments:
self.add_redirect(p.value)
elif any(s+"/" in t.normal_name() for s in self.constants['t_sa']):
self.check_see_also(self.wtp.parse(self.pwb.Page(self.site, "Modèle:"+t.normal_name())))
if __name__ == "__main__":
ensk = Fr_en_straktor()
print(ensk.fetch("test"), "entries added")
......@@ -20,6 +20,8 @@ class Wikstraklog:
self.wx_v = wikstraktor_version
self.w_l = word_language
self.wk_l = wiki_language
self.cur_w = None
self.cur_pid = -1
def set_context(self, word, permanentId):
self.cur_w = word
......
......@@ -569,9 +569,9 @@ class Wikstraktor:
def fetch(self, graphy, follow_redirections=False):
nb_entries_added = 0
page = self.pwb.Page(self.site, graphy)
to_parse = []
if page.text != "":
sections = self.wtp.parse(page.text).sections
parsedText = self.wtp.parse(page.text)
sections = parsedText.sections
found = False
i = 0
### find language
......@@ -582,7 +582,8 @@ class Wikstraktor:
if found:
nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)
else:
self.log.add_log("Wikstraktor.fetch", f"{graphy}” page not found")
self.check_see_also(parsedText)
self.log.add_log("Wikstraktor.fetch", f"{graphy}” page not found (checked see also — {len(self.redirects)} results)", graphy, -1) #no permanentId better set to null, but database used
if len(self.redirects) > 0 and follow_redirections:
for e,p in self.redirects.items():
if not p:
......@@ -647,6 +648,11 @@ class Wikstraktor:
def process_etymology(self, parsedwikitext):
pass#in subclass
def check_see_also(self, parsedwikitext):
#parses the text for see also redirections
#adds the corresponding redirects
pass#insubclass
def add_redirect(self, redirect):
if redirect not in self.redirects.keys():
self.redirects[redirect] = False
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment