From 73c27df30a39e9a9962e6ef5e1f1fc9c20abd353 Mon Sep 17 00:00:00 2001
From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr>
Date: Mon, 3 Oct 2022 21:12:09 +0200
Subject: [PATCH] Correction bug doublon

---
 parsers/en_en.py |  7 +----
 pronunciation.py | 11 ++++++++
 wikstraktor.py   | 68 +++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/parsers/en_en.py b/parsers/en_en.py
index 785b470..2f86b7b 100644
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -27,19 +27,15 @@ class En_en_straktor(Wikstraktor):
 						templates.append(t)
 			a = None
 			for t in templates:
-				print(t.normal_name())
 				if t.normal_name() == self.constants['t_acc']:
 					a = t.arguments[0].value
 				elif t.normal_name() == self.constants['t_ipa']:
 					p.set_transcription(t.arguments[1].value)
 					p.set_accent(a)
-					print(t, t.arguments, t.arguments[0].value)
 				elif t.normal_name() == self.constants['t_snd']:
 					p.add_sound(self.get_file_url(t.arguments[1].value), a)
-					print(t, t.arguments, t.arguments[1].value)
 			pronunciations.append(p)
 			i += 1
-		print(pronunciations[0], pronunciations[1])
 		return pronunciations
 
 	def process_etymology(self, etyContent):
@@ -48,8 +44,7 @@ class En_en_straktor(Wikstraktor):
 		return "Etymology" + str(debugEty)
 
 	def process_senses(self, sensesContent):
-		import random as r
-		return "Cool"+r.choice(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
+		return ["Cool."+sensesContent.plain_text()[3:15]]
 
 if __name__ == "__main__":
 	ensk = En_en_straktor()
diff --git a/pronunciation.py b/pronunciation.py
index 6cc9b67..9525292 100644
--- a/pronunciation.py
+++ b/pronunciation.py
@@ -4,6 +4,9 @@ class Sound:
 		self.url = url
 		self.accent = accent
 
+	def __eq__(self, other):
+		return self.url == other.url and self.accent == other.accent
+
 	def serializable(self):
 		if self.accent == None:
 			res = {"url":self.url}
@@ -38,3 +41,11 @@ class Pronunciation:
 
 	def __str__(self):
 		return f"{self.serializable()}"
+
+	def __eq__(self, other):
+		res = self.ipa == other.ipa and self.accent == other.accent and len(self.sounds)==len(other.sounds)
+		i = 0
+		while res and i<len(self.sounds):
+			res = self.sounds[i] == other.sounds[i]
+			i += 1
+		return res
diff --git a/wikstraktor.py b/wikstraktor.py
index 6303389..5be3681 100755
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -8,26 +8,56 @@ from pronunciation import Pronunciation
 class Entry:
 	def __init__(self, lemma):
 		self.lemma = lemma
+		self.pronunciations = []
+		self.pos = None
+		self.senses = []
 
 	def set_pronunciations(self, pron):
 		if isinstance(pron, Pronunciation):
-			self.pronunciations = pron
+			self.pronunciations.append(pron)
+		elif type(pron) == list:
+			for p in pron:
+				if isinstance(p, Pronunciation):
+					self.pronunciations.append(p)
+				else:
+					raise ValueError(f"Entry.set_pronunciation: {p} is not a Pronunciation object ({p.__class__.__name__}).")
 		else:
 			raise ValueError(f"Entry.set_pronunciation: {pron} is not a Pronunciation object ({pron.__class__.__name__}).")
 
-	def set_POS(self, pos):
+	def set_pos(self, pos):
 		self.pos = pos
 
+	def set_senses(self, senses):
+		self.senses = senses
+
+	def is_valid(self):
+		return self.lemma != None and len(self.pronunciations) > 0 and self.pos != None and len(self.senses) > 0
+
+	def __eq__(self, other):
+		res = self.lemma == other.lemma and self.pos ==other.pos and len(self.pronunciations) == len(other.pronunciations) and len(self.senses) == len(other.senses)
+		i = 0
+		while res and i < len(self.senses):
+			res = self.senses[i] == other.senses[i]
+			i += 1
+		i = 0
+		while res and i < len(self.pronunciations):
+			res = self.pronunciations[i] == other.pronunciations[i]
+			i += 1
+		return res
+
 	def __str__(self):
 		res = f"{self.lemma} ({self.pos})\n"
 		for p in self.pronunciations:
 			res += f"{str(p)}\n"
+		for s in self.senses:
+			res += f"{str(s)}\n"
 		return res
 
 class ParserContext:
 	def __init__(self, entry):
 		self.lemma = entry
 		self.context = []
+		self.entries = []
 
 	def get_level(self):
 		if len(self.context) == 0:
@@ -39,7 +69,9 @@ class ParserContext:
 	def push(self, wiki_context):
 		self.context.append({"wiki":wiki_context})
 
-	def pop(self):
+	def pop(self, testNewEntry = True):
+		if testNewEntry:
+			self.create_entry()
 		return self.context.pop()
 
 	def set_top_wiki(self, wiki_context):
@@ -48,22 +80,30 @@ class ParserContext:
 		else:
 			self.context[-1]['wiki'] = wiki_context
 
-	def set_top_entry_info(self, key, entry_context):
+	def set_top_entry_info(self, key, entry_context, testNewEntry=True):
 		if len(self.context) == 0:
 			raise ValueError(f"Trying to set up entry info ({entry_context}), in an empty parserContext.")
 		else:
 			self.context[-1][key] = entry_context
+			if testNewEntry:
+				self.create_entry()
 
 	def create_entry(self):
 		res = Entry(self.lemma)
 		for l in self.context:
-			if l['pro'] != None:
-				res.set_pronunciations(l['entry_info'])
-			if l['ety'] != None:
+			if "pro" in l.keys():
+				res.set_pronunciations(l['pro'])
+			if "ety" in l.keys():
 				pass #On ignore l'Ã©tymologie pour le moment
-			if l['POS'] != None:
+			if "POS" in l.keys():
 				res.set_pos(l['POS'])
+			if "senses" in l.keys():
+				res.set_senses(l['senses'])
 			# TODO: Ajouter les autres types
+		if res.is_valid() and res not in self.entries:
+			self.entries.append(res)
+		else:
+			res = None
 		return res
 
 	def debug_top(self):
@@ -142,15 +182,18 @@ class Wikstraktor:
 				elif self.constants['ety'] in s.title:
 					self.parserContext.set_top_entry_info('ety', self.process_etymology(self.wtp.parse(s.contents)))
 				elif s.title in self.constants['POS'].keys():
-					self.parserContext.set_top_entry_info('POS', self.constants['POS'][s.title])
+					self.parserContext.set_top_entry_info('POS', self.constants['POS'][s.title], False)
 					self.parserContext.set_top_entry_info('senses', self.process_senses(self.wtp.parse(s.contents)))
-			print(self.parserContext.debug_top())
-		print("ok")
+		res = len(self.parserContext.entries)
+		if res > 0:
+			for e in self.parserContext.entries:
+				self.entries.append(e)
+		return res
 
 	def __str__(self):
 		res = ""
 		for e in self.entries:
-			res += f"{e}\n"
+			res += f"{str(e)}\n"
 		return res
 
 
@@ -159,6 +202,7 @@ if __name__ == "__main__":
 	# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes Ã  chat----parent.wav"))
 	# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes Ã  chat-parent.wav"))
 	print(e.fetch("test"), "entries added")
+	print(e)
 	# site = pywikibot.Site(f'wiktionary:en')
 	# p = pywikibot.FilePage(site, "File:LL-Q1860 (eng)-Nattes Ã  chat----parent.wav")
 	# print(p)
-- 
GitLab