From 2827aaf28c8886e20d78f7ce41573e5383d58764 Mon Sep 17 00:00:00 2001
From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr>
Date: Thu, 16 Mar 2023 09:50:57 +0100
Subject: [PATCH] Create Sense with definition

---
 .gitignore       |  4 ++--
 parsers/en_en.py |  9 ++++----
 parsers/fr_en.py |  6 ++----
 wikstraktor.py   | 53 ++++++++++++++++++++++++++++++++----------------
 4 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/.gitignore b/.gitignore
index a240499..69e9892 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,5 @@
 user-config.py
 user-password.py
-pywikibot.lwp
 __pycache__
 apicache-py3
 logs
@@ -8,5 +7,6 @@ throttle.ctrl
 user_list.py
 KNM.csv
 .~lock*
-test.json
+*.json
+*.lwp
 wikstraktorenv
diff --git a/parsers/en_en.py b/parsers/en_en.py
index 8761a51..cf93078 100644
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -54,8 +54,10 @@ class En_en_straktor(Wikstraktor):
 		senses = []
 		while i < len(l):
 			if l[i].pattern == '\\# ':
-				newSense = Sense(lang=self.entry_language)
-				newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
+				theDef = self.wtp.parse(l[i].items[0]).plain_text().strip()
+				if theDef != "":
+					print(theDef)# DEBUG:
+					newSense = Sense(self.entry_language, theDef, self.wiki_language)
 				#newSence.add_translation()
 			elif l[i].pattern == '\\#:':
 				for j in l[i].items:
@@ -74,8 +76,7 @@ class En_en_straktor(Wikstraktor):
 			while i < len(l) and l[i].level == 3 :
 				cnt +=1
 				if l[i].pattern == '\\## ':
-					newSense2 = SubSense(lang=self.entry_language)
-					newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
+					newSense2 = SubSense(self.entry_language, self.wtp.parse(l[i].items[0]).plain_text().strip(), self.wiki_language)
 					#newSense2.add_translation()
 				elif l[i].pattern == '\\##:':
 					for j in l[i].items:
diff --git a/parsers/fr_en.py b/parsers/fr_en.py
index 49f9896..fcaa931 100644
--- a/parsers/fr_en.py
+++ b/parsers/fr_en.py
@@ -61,8 +61,7 @@ class Fr_en_straktor(Wikstraktor):
 		senses = []
 		while i < len(l):
 			if l[i].pattern == '\\# ':
-				newSense = Sense(lang=self.entry_language)
-				newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
+				newSense = Sense(self.entry_language, self.wtp.parse(l[i].items[0]).plain_text().strip(), self.wiki_language)
 				#la version d'enzo ajoute +ieurs defs (for i in l[i].items)
 			elif l[i].pattern == '\\#:':
 				for j in l[i].items:
@@ -81,8 +80,7 @@ class Fr_en_straktor(Wikstraktor):
 			while i < len(l) and l[i].level == 3 :
 				cnt +=1
 				if l[i].pattern == '\\## ':
-					newSense2 = SubSense(lang=self.entry_language)
-					newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
+					newSense2 = SubSense(self.entry_language, self.wtp.parse(l[i].items[0]).plain_text().strip(), self.wiki_language)
 				elif l[i].pattern == '\\##:':
 					for j in l[i].items:
 						k = 0
diff --git a/wikstraktor.py b/wikstraktor.py
index 0a38474..c1e98fb 100755
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -4,6 +4,8 @@ import wikitextparser
 import importlib
 import json
 
+#ICITE : fr marche pas, en prend des trucs vides Ã  virer (cf. yellowâ€¦ def & example)
+
 
 class SubInfo:
 	next_id = 1
@@ -44,7 +46,7 @@ class Sound:
 		self.accent = accent
 
 	def __eq__(self, other):
-		return self.url == other.url and self.accent == other.accent
+		return isinstance(other, self.__class__) and self.url == other.url and self.accent == other.accent
 
 	def serializable(self):
 		if self.accent == None:
@@ -86,7 +88,7 @@ class Pronunciation(SubInfo):
 		return json.dumps(self.serializable(''))
 
 	def __eq__(self, other):
-		res = self.ipa == other.ipa and self.accent == other.accent and len(self.sounds)==len(other.sounds)
+		res = isinstance(other, self.__class__) and self.ipa == other.ipa and self.accent == other.accent and len(self.sounds)==len(other.sounds)
 		i = 0
 		while res and i<len(self.sounds):
 			res = self.sounds[i] == other.sounds[i]
@@ -110,11 +112,14 @@ class Definition(SubInfo):
 
 	def __init__(self, lang, text, prefix=None):
 		super().__init__(prefix)
-		self.lang = lang
-		self.text = text
+		if text != "":
+			self.lang = lang
+			self.text = text
+		else:
+			raise ValueError(f"Definition.__init__: â€œ{text}â€ empty definition.")
 
 	def __eq__(self, other):
-		return self.lang == other.lang and self.text == other.text
+		return isinstance(other, self.__class__) and self.lang == other.lang and self.text == other.text
 
 	def serializable(self, prefix = None):
 		res = super().serializable(prefix)
@@ -131,12 +136,16 @@ class Example(SubInfo):
 
 	def __init__(self, transcript, source=None, url=None, prefix=None):
 		super().__init__(prefix)
-		self.text = transcript
-		self.source = source
-		self.url = url
+		if transcript != "":
+			self.text = transcript
+			self.source = source
+			self.url = url
+		else:
+			raise ValueError(f"Example.__init__: â€œ{transcript}â€ empty example.")
+
 
 	def __eq__(self, other):
-		return self.text==other.text and self.source==other.source and self.url==other.url
+		return isinstance(other, self.__class__) and self.text==other.text and self.source==other.source and self.url==other.url
 
 	def serializable(self, prefix = None):
 		res = super().serializable(prefix)
@@ -150,7 +159,7 @@ class Example(SubInfo):
 class Sense(SubInfo):
 	prfx = ""
 
-	def __init__(self, prefix=None, lang=None):
+	def __init__(self, lang=None, definition=None, wiki_lang=None, prefix=None):
 		self.lang = lang
 		self.label = None
 		self.set_id(prefix)
@@ -166,6 +175,11 @@ class Sense(SubInfo):
 		self.examples = [] #liste des exemples (un texte obligatoire, source et url sont optionnels)
 		self.translations = [] #liste des traductions dans d'autres langues
 		self.domain = None #domaine d'usage du mot dans ce sens
+		if definition != None:
+			try:
+				self.add_def(wiki_lang, definition)
+			except ValueError as err:
+				raise ValueError(f"Sense.__init__() with empty definition\n{err}")
 
 	def set_id(self, prefix=None):
 		if prefix != None and self.label == None:
@@ -181,19 +195,22 @@ class Sense(SubInfo):
 
 	def add_def(self, lang, definition):
 		theDef = Definition(lang, definition)
-		if theDef not in self.definitions:
+		if theDef != None and theDef not in self.definitions:
 			theDef.set_id(self.set_id())
 			self.definitions.append(theDef)
 
 	def add_example(self, transcript, src=None, url=None, prefix=None):
-		theEx = Example(transcript, src, url, prefix)
-		if theEx not in self.examples:
-			theEx.set_id(self.set_id())
-			self.examples.append(theEx)
+		try:
+			theEx = Example(transcript, src, url, prefix)
+			if theEx != None and theEx not in self.examples:
+				theEx.set_id(self.set_id())
+				self.examples.append(theEx)
+		except ValueError as e:
+			print(f"Skipped empty example")
 
 	def add_translation(self, lang=None, translation=None):
 		theTranslation = Translation(lang, translation)
-		if theTranslation not in self.translations:
+		if theTranslation != None and theTranslation not in self.translations:
 			theTranslation.set_id(self.set_id())
 			self.translations.append(theTranslation)
 
@@ -204,7 +221,7 @@ class Sense(SubInfo):
 			self.subsenses.append(subsense)
 
 	def __eq__(self, other):
-		res = self.label == other.label and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain
+		res = isinstance(other, self.__class__) and self.label == other.label and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain
 		i = 0
 		while res and i < len(self.examples):
 			res = self.examples[i] in other.examples
@@ -314,7 +331,7 @@ class Entry:
 		return self.lemma != None and len(self.pronunciations) > 0 and self.pos != None and len(self.senses) > 0
 
 	def __eq__(self, other):
-		res = self.lemma == other.lemma and self.lang == other.lang and self.pos ==other.pos and len(self.pronunciations) == len(other.pronunciations) and len(self.senses) == len(other.senses)
+		res = isinstance(other, self.__class__) and self.lemma == other.lemma and self.lang == other.lang and self.pos ==other.pos and len(self.pronunciations) == len(other.pronunciations) and len(self.senses) == len(other.senses)
 		i = 0
 		while res and i < len(self.senses):
 			res = self.senses[i] == other.senses[i]
-- 
GitLab