diff --git a/parsers/en_constants.py b/parsers/en_constants.py index d3888eb44d0d31051df88e02f87a3ff3f2ad13cd..aa7392c4a7bc8855a0cc4da6b984bd2b9037a95c 100644 --- a/parsers/en_constants.py +++ b/parsers/en_constants.py @@ -12,6 +12,7 @@ string_values = { "t_infl":{"past participle of":1, "infl of":1}, "t_ex":["ux", "usex"], "t_lbl":["lb","lbl", "label"], #template for labels + "t_sa":["also","see also"], "regions":{ "UK":"United Kingdom", "United Kingdom":"United Kingdom", diff --git a/parsers/en_en.py b/parsers/en_en.py index febb56a627bd994c8b5924948acc8e45925ffea0..bdc3ce4aa10b57af19fa2bf38c2267ceb5c0b0ec 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -119,8 +119,18 @@ class En_en_straktor(Wikstraktor): pos = self.constants['POS'][parsedwikitext] return pos + def check_see_also(self, parsedwikitext): + templates = parsedwikitext.templates + for t in templates: + if t.normal_name() in self.constants['t_sa']: + for p in t.arguments: + self.add_redirect(p.value) + def fetch(self, graphy, follow_redirections): - return super().fetch(graphy.lower(), follow_redirections) + res = super().fetch(graphy.lower(), follow_redirections) + if res == 0: + res = super().fetch(graphy, follow_redirections) + return res if __name__ == "__main__": ensk = En_en_straktor() diff --git a/parsers/fr_constants.py b/parsers/fr_constants.py index 5a97c53661b9f5851aa9b1a0679bd37b01b94acd..b99999b51d475e7dc7a9acf154484b9a4b7a690f 100644 --- a/parsers/fr_constants.py +++ b/parsers/fr_constants.py @@ -12,6 +12,7 @@ string_values = { "t_ipa":"pron", #template for transcription "t_snd":"écouter", #template for audio "t_acc":["US", "UK"], #template for accents (inutile utilise régions) +"t_sa":["voir"], "regions":{ "UK":"Royaume-Uni", "United Kingdom":"Royaume-Uni", diff --git a/parsers/fr_en.py b/parsers/fr_en.py index a5caab06612e5c37b60c690d5662bfd1ab49e021..eba66f0f2c4390d17e4acb5e4c9f2ab1a5780886 100644 --- a/parsers/fr_en.py +++ b/parsers/fr_en.py @@ -61,6 +61,15 @@ class Fr_en_straktor(Wikstraktor): ik += 1 return pos + def check_see_also(self, parsedwikitext): + templates = parsedwikitext.templates + for t in templates: + if t.normal_name() in self.constants['t_sa']: + for p in t.arguments: + self.add_redirect(p.value) + elif any(s+"/" in t.normal_name() for s in self.constants['t_sa']): + self.check_see_also(self.wtp.parse(self.pwb.Page(self.site, "Modèle:"+t.normal_name()))) + if __name__ == "__main__": ensk = Fr_en_straktor() print(ensk.fetch("test"), "entries added") diff --git a/wikstraklog.py b/wikstraklog.py index 74b912b5fd1dc128d7b302342431de1e37809e0a..ba03fb1de810c9e4f6939720440f7f610ad7b349 100755 --- a/wikstraklog.py +++ b/wikstraklog.py @@ -20,6 +20,8 @@ class Wikstraklog: self.wx_v = wikstraktor_version self.w_l = word_language self.wk_l = wiki_language + self.cur_w = None + self.cur_pid = -1 def set_context(self, word, permanentId): self.cur_w = word diff --git a/wikstraktor.py b/wikstraktor.py index a6830540e42c764cd5ea2942fd0e48f6f0506e03..07853b23cf87097e0d6c8598eb9cbc31ddc2d186 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -569,9 +569,9 @@ class Wikstraktor: def fetch(self, graphy, follow_redirections=False): nb_entries_added = 0 page = self.pwb.Page(self.site, graphy) - to_parse = [] if page.text != "": - sections = self.wtp.parse(page.text).sections + parsedText = self.wtp.parse(page.text) + sections = parsedText.sections found = False i = 0 ### find language @@ -582,7 +582,8 @@ class Wikstraktor: if found: nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections) else: - self.log.add_log("Wikstraktor.fetch", f"“{graphy}†page not found") + self.check_see_also(parsedText) + self.log.add_log("Wikstraktor.fetch", f"“{graphy}†page not found (checked see also — {len(self.redirects)} results)", graphy, -1) #no permanentId better set to null, but database used if len(self.redirects) > 0 and follow_redirections: for e,p in self.redirects.items(): if not p: @@ -647,6 +648,11 @@ class Wikstraktor: def process_etymology(self, parsedwikitext): pass#in subclass + def check_see_also(self, parsedwikitext): + #parses the text for see also redirections + #adds the corresponding redirects + pass#insubclass + def add_redirect(self, redirect): if redirect not in self.redirects.keys(): self.redirects[redirect] = False