diff --git a/parsers/en_constants.py b/parsers/en_constants.py index 76eca3e234cadaf30e83ceae3d84706d54bbc888..d3888eb44d0d31051df88e02f87a3ff3f2ad13cd 100644 --- a/parsers/en_constants.py +++ b/parsers/en_constants.py @@ -9,6 +9,7 @@ string_values = { "t_deflabel":"lb", "t_alt":"alternative spelling of", "t_alt_param":1, #number of the parameter of t_alt containing the other spelling + "t_infl":{"past participle of":1, "infl of":1}, "t_ex":["ux", "usex"], "t_lbl":["lb","lbl", "label"], #template for labels "regions":{ diff --git a/parsers/en_en.py b/parsers/en_en.py index c52dcfc5a91146d2352e464d2b6f653da7d3b241..6353a0279ecca7f82c77c2645e3175be4e33d61f 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -79,6 +79,7 @@ class En_en_straktor(Wikstraktor): def_text = parsed_def.plain_text().strip() templates = parsed_def.templates the_def = self.parse_template_1(templates) + self.try_inflected_forms(templates) if the_def == None: the_def = self.parse_alt_spell(templates) if the_def == None: diff --git a/parsers/fr_constants.py b/parsers/fr_constants.py index ffcde47bdc2acfe4cb90d3d510ff2566fefd06f0..5a97c53661b9f5851aa9b1a0679bd37b01b94acd 100644 --- a/parsers/fr_constants.py +++ b/parsers/fr_constants.py @@ -6,6 +6,7 @@ string_values = { "t_deflabel":["lexique", "info lex"], "t_alt":"variante de", "t_alt_param":0, #number of the parameter of t_alt containing the other spelling +"t_infl":{"en-conj-irrég":"inf", "en-conj-aux":"1", "en-conj-rég":"inf", "lien":0, "l":0}, "t_ex":"exemple", #Inexistants "t_ipa":"pron", #template for transcription diff --git a/wikstraktor.py b/wikstraktor.py index dae797951cc2f4c20b1316b06f56d2546c7fa9b5..7e1871d74df5ded168cfb6ca2e02707ffae1faff 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -547,6 +547,7 @@ class Wikstraktor: def __init__(self): self.entries = [] + self.redirects = {} self.pwb = pywikibot self.wtp = wikitextparser self.parserContext = None @@ -579,6 +580,10 @@ class Wikstraktor: nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections) else: self.log.add_log("Wikstraktor.fetch", f"“{graphy}†page not found") + if len(self.redirects) > 0: + for e,p in self.redirects.items(): + if not p: + nb_entries_added += self.fetch(self.process_redirect(e)) return nb_entries_added def parse(self, entry, v_id, sections): @@ -639,14 +644,29 @@ class Wikstraktor: def process_etymology(self, parsedwikitext): pass#in subclass + def add_redirect(self, redirect): + if redirect not in self.redirects.keys(): + self.redirects[redirect] = False + + def process_redirect(self, redirect): + if redirect in self.redirects.keys(): + self.redirects[redirect] = True + return redirect + def parse_alt_spell(self, templates): the_def = None for t in templates: if t.normal_name() == self.constants['t_alt']: the_def = Definition(self.entry_language, f"Alternate spelling of “{t.arguments[self.constants['t_alt_param']].value}â€") + self.add_redirect(t.arguments[self.constants['t_alt_param']].value) break return the_def + def try_inflected_forms(self, templates): + for t in templates: + if t.normal_name() in self.constants['t_infl'].keys(): + self.add_redirect(t.arguments[self.constants['t_infl'][t.normal_name()]].value) + #can be overloaded def process_example(self, example_wiki_text): k = 0