From 948508cceee4c8486cd97d3e9588da6626ff6a04 Mon Sep 17 00:00:00 2001
From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr>
Date: Mon, 11 Sep 2023 18:36:51 +0200
Subject: [PATCH] follow redirects

---
 parsers/en_constants.py |  1 +
 parsers/en_en.py        |  1 +
 parsers/fr_constants.py |  1 +
 wikstraktor.py          | 20 ++++++++++++++++++++
 4 files changed, 23 insertions(+)

diff --git a/parsers/en_constants.py b/parsers/en_constants.py
index 76eca3e..d3888eb 100644
--- a/parsers/en_constants.py
+++ b/parsers/en_constants.py
@@ -9,6 +9,7 @@ string_values = {
 	"t_deflabel":"lb",
 	"t_alt":"alternative spelling of",
 	"t_alt_param":1, #number of the parameter of t_alt containing the other spelling
+	"t_infl":{"past participle of":1, "infl of":1},
 	"t_ex":["ux", "usex"],
 	"t_lbl":["lb","lbl", "label"], #template for labels
 	"regions":{
diff --git a/parsers/en_en.py b/parsers/en_en.py
index c52dcfc..6353a02 100644
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -79,6 +79,7 @@ class En_en_straktor(Wikstraktor):
 		def_text = parsed_def.plain_text().strip()
 		templates = parsed_def.templates
 		the_def = self.parse_template_1(templates)
+		self.try_inflected_forms(templates)
 		if the_def == None:
 			the_def = self.parse_alt_spell(templates)
 		if the_def == None:
diff --git a/parsers/fr_constants.py b/parsers/fr_constants.py
index ffcde47..5a97c53 100644
--- a/parsers/fr_constants.py
+++ b/parsers/fr_constants.py
@@ -6,6 +6,7 @@ string_values = {
 "t_deflabel":["lexique", "info lex"],
 "t_alt":"variante de",
 "t_alt_param":0, #number of the parameter of t_alt containing the other spelling
+"t_infl":{"en-conj-irrég":"inf", "en-conj-aux":"1", "en-conj-rég":"inf", "lien":0, "l":0},
 "t_ex":"exemple",
 	#Inexistants
 "t_ipa":"pron", #template for transcription
diff --git a/wikstraktor.py b/wikstraktor.py
index dae7979..7e1871d 100755
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -547,6 +547,7 @@ class Wikstraktor:
 
 	def __init__(self):
 		self.entries = []
+		self.redirects = {}
 		self.pwb = pywikibot
 		self.wtp = wikitextparser
 		self.parserContext = None
@@ -579,6 +580,10 @@ class Wikstraktor:
 				nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)
 			else:
 				self.log.add_log("Wikstraktor.fetch", f"“{graphy}” page not found")
+		if len(self.redirects) > 0:
+			for e,p in self.redirects.items():
+				if not p:
+					nb_entries_added += self.fetch(self.process_redirect(e))
 		return nb_entries_added
 
 	def parse(self, entry, v_id, sections):
@@ -639,14 +644,29 @@ class Wikstraktor:
 	def process_etymology(self, parsedwikitext):
 		pass#in subclass
 
+	def add_redirect(self, redirect):
+		if redirect not in self.redirects.keys():
+			self.redirects[redirect] = False
+
+	def process_redirect(self, redirect):
+		if redirect in self.redirects.keys():
+			self.redirects[redirect] = True
+		return redirect
+
 	def parse_alt_spell(self, templates):
 		the_def = None
 		for t in templates:
 			if t.normal_name() == self.constants['t_alt']:
 				the_def = Definition(self.entry_language, f"Alternate spelling of “{t.arguments[self.constants['t_alt_param']].value}”")
+				self.add_redirect(t.arguments[self.constants['t_alt_param']].value)
 				break
 		return the_def
 
+	def try_inflected_forms(self, templates):
+		for t in templates:
+			if t.normal_name() in self.constants['t_infl'].keys():
+				self.add_redirect(t.arguments[self.constants['t_infl'][t.normal_name()]].value)
+
 	#can be overloaded
 	def process_example(self, example_wiki_text):
 		k = 0
-- 
GitLab