From 7f220e33f5ce7603e91934a1cf5c67d14c4b2f7d Mon Sep 17 00:00:00 2001
From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr>
Date: Thu, 11 Jan 2024 15:02:32 +0100
Subject: [PATCH] see also redirections + cas d'un mot EN qui n'existe qu'en
 majuscules

---
 parsers/en_constants.py |  1 +
 parsers/en_en.py        | 12 +++++++++++-
 parsers/fr_constants.py |  1 +
 parsers/fr_en.py        |  9 +++++++++
 wikstraklog.py          |  2 ++
 wikstraktor.py          | 12 +++++++++---
 6 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/parsers/en_constants.py b/parsers/en_constants.py
index d3888eb..aa7392c 100644
--- a/parsers/en_constants.py
+++ b/parsers/en_constants.py
@@ -12,6 +12,7 @@ string_values = {
 	"t_infl":{"past participle of":1, "infl of":1},
 	"t_ex":["ux", "usex"],
 	"t_lbl":["lb","lbl", "label"], #template for labels
+	"t_sa":["also","see also"],
 	"regions":{
 		"UK":"United Kingdom",
 		"United Kingdom":"United Kingdom",
diff --git a/parsers/en_en.py b/parsers/en_en.py
index febb56a..bdc3ce4 100644
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -119,8 +119,18 @@ class En_en_straktor(Wikstraktor):
 			pos = self.constants['POS'][parsedwikitext]
 		return pos
 
+	def check_see_also(self, parsedwikitext):
+		templates = parsedwikitext.templates
+		for t in templates:
+			if t.normal_name() in self.constants['t_sa']:
+				for p in t.arguments:
+					self.add_redirect(p.value)
+
 	def fetch(self, graphy, follow_redirections):
-		return super().fetch(graphy.lower(), follow_redirections)
+		res = super().fetch(graphy.lower(), follow_redirections)
+		if res == 0:
+			res = super().fetch(graphy, follow_redirections)
+		return res
 
 if __name__ == "__main__":
 	ensk = En_en_straktor()
diff --git a/parsers/fr_constants.py b/parsers/fr_constants.py
index 5a97c53..b99999b 100644
--- a/parsers/fr_constants.py
+++ b/parsers/fr_constants.py
@@ -12,6 +12,7 @@ string_values = {
 "t_ipa":"pron", #template for transcription
 "t_snd":"écouter", #template for audio
 "t_acc":["US", "UK"], #template for accents (inutile utilise régions)
+"t_sa":["voir"],
 "regions":{
 		"UK":"Royaume-Uni",
 		"United Kingdom":"Royaume-Uni",
diff --git a/parsers/fr_en.py b/parsers/fr_en.py
index a5caab0..eba66f0 100644
--- a/parsers/fr_en.py
+++ b/parsers/fr_en.py
@@ -61,6 +61,15 @@ class Fr_en_straktor(Wikstraktor):
 			ik += 1
 		return pos
 
+	def check_see_also(self, parsedwikitext):
+		templates = parsedwikitext.templates
+		for t in templates:
+			if t.normal_name() in self.constants['t_sa']:
+				for p in t.arguments:
+					self.add_redirect(p.value)
+			elif any(s+"/" in t.normal_name() for s in self.constants['t_sa']):
+				self.check_see_also(self.wtp.parse(self.pwb.Page(self.site, "Modèle:"+t.normal_name())))
+
 if __name__ == "__main__":
 	ensk = Fr_en_straktor()
 	print(ensk.fetch("test"), "entries added")
diff --git a/wikstraklog.py b/wikstraklog.py
index 74b912b..ba03fb1 100755
--- a/wikstraklog.py
+++ b/wikstraklog.py
@@ -20,6 +20,8 @@ class Wikstraklog:
         self.wx_v = wikstraktor_version
         self.w_l = word_language
         self.wk_l = wiki_language
+        self.cur_w = None
+        self.cur_pid = -1
 
     def set_context(self, word, permanentId):
         self.cur_w = word
diff --git a/wikstraktor.py b/wikstraktor.py
index a683054..07853b2 100755
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -569,9 +569,9 @@ class Wikstraktor:
 	def fetch(self, graphy, follow_redirections=False):
 		nb_entries_added = 0
 		page = self.pwb.Page(self.site, graphy)
-		to_parse = []
 		if page.text != "":
-			sections = self.wtp.parse(page.text).sections
+			parsedText = self.wtp.parse(page.text)
+			sections = parsedText.sections
 			found = False
 			i = 0
 			### find language
@@ -582,7 +582,8 @@ class Wikstraktor:
 			if found:
 				nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)
 			else:
-				self.log.add_log("Wikstraktor.fetch", f"“{graphy}” page not found")
+				self.check_see_also(parsedText)
+				self.log.add_log("Wikstraktor.fetch", f"“{graphy}” page not found (checked see also — {len(self.redirects)} results)", graphy, -1) #no permanentId better set to null, but database used
 		if len(self.redirects) > 0 and follow_redirections:
 			for e,p in self.redirects.items():
 				if not p:
@@ -647,6 +648,11 @@ class Wikstraktor:
 	def process_etymology(self, parsedwikitext):
 		pass#in subclass
 
+	def check_see_also(self, parsedwikitext):
+		#parses the text for see also redirections
+		#adds the corresponding redirects
+		pass#insubclass
+
 	def add_redirect(self, redirect):
 		if redirect not in self.redirects.keys():
 			self.redirects[redirect] = False
-- 
GitLab