From 5bf2f6f7161bfd64b2de8d86c9020833678f4068 Mon Sep 17 00:00:00 2001
From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr>
Date: Thu, 16 Mar 2023 01:00:04 +0100
Subject: [PATCH] v1 avec identifiants

---
 parsers/en_en.py |  4 +---
 parsers/fr_en.py |  1 -
 wikstraktor.py   | 31 ++++++++++++++++---------------
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/parsers/en_en.py b/parsers/en_en.py
index 1e1a5de..8761a51 100644
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -19,7 +19,7 @@ class En_en_straktor(Wikstraktor):
 		i = 0
 		pronunciations = []
 		while i < len(l.fullitems):
-			p = Pronunciation(len(pronunciations))
+			p = Pronunciation()
 			templates = self.wtp.parse(l.fullitems[i]).templates
 			a = None
 			for j, t in enumerate(templates):
@@ -49,7 +49,6 @@ class En_en_straktor(Wikstraktor):
 		return pos
 
 	def process_senses(self, sensesContent):
-		print("process_senses")
 		l = sensesContent.get_lists(('\\# ', '\\#:','\\## ', '\\##:' ))
 		i = 0
 		senses = []
@@ -95,7 +94,6 @@ class En_en_straktor(Wikstraktor):
 			if cnt > 0:
 				i -= 1
 			i += 1
-		print(f"process_senses done {senses[-1].serializable()}")
 		return senses
 
 if __name__ == "__main__":
diff --git a/parsers/fr_en.py b/parsers/fr_en.py
index a44ff5d..49f9896 100644
--- a/parsers/fr_en.py
+++ b/parsers/fr_en.py
@@ -53,7 +53,6 @@ class Fr_en_straktor(Wikstraktor):
 				keys = list(self.constants['POS'].keys())
 				pos = keys[ik]
 			ik += 1
-		print(pos)
 		return pos
 
 	def process_senses(self, sensesContent):
diff --git a/wikstraktor.py b/wikstraktor.py
index 23a81f4..0a38474 100755
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -83,7 +83,7 @@ class Pronunciation(SubInfo):
 		return res
 
 	def __str__(self):
-		return f"{self.serializable('')}"
+		return json.dumps(self.serializable(''))
 
 	def __eq__(self, other):
 		res = self.ipa == other.ipa and self.accent == other.accent and len(self.sounds)==len(other.sounds)
@@ -167,12 +167,15 @@ class Sense(SubInfo):
 		self.translations = [] #liste des traductions dans d'autres langues
 		self.domain = None #domaine d'usage du mot dans ce sens
 
-	def set_id(self, prefix):
+	def set_id(self, prefix=None):
 		if prefix != None and self.label == None:
-			self.label = f"{self.lang}.{prefix}_{self.__class__.next_id}"  #l'identifiant du sens
+			self.label = f"{prefix}_{self.__class__.next_id}"  #l'identifiant du sens
 			self.__class__.inc_n_id()
 		return self.label
 
+	def get_id(self):
+		return f"{self.lang}.{self.label}"
+
 	def set_domain(self, d):
 		self.domain = d
 
@@ -242,8 +245,11 @@ class Sense(SubInfo):
 				res["Translations"].append(t.serializable(prefix))
 		return res
 
+	def __str__(self):
+		return json.dumps(self.serializable())
+
 class SubSense(Sense):
-	def set_id(self, prefix):
+	def set_id(self, prefix=None):
 		if prefix != None and self.label == None:
 			self.label = f"{prefix}.{self.__class__.next_id}"  #l'identifiant du sens
 			self.__class__.inc_n_id()
@@ -268,8 +274,9 @@ class Entry:
 		self.pos = pos
 
 	def get_id(self, source_id=0):
-		if self.pos !=None:
-			pos = "_"+self.pos
+		#TODO: remplacer un jour le source id par la bonne source
+		if self.pos != None:
+			pos = self.pos
 		else:
 			pos = ""
 		return f"{self.lang}-{source_id}.{self.lemma}{pos}"
@@ -292,7 +299,7 @@ class Entry:
 			self.pronunciations.append(p)
 
 	def set_senses(self, senses):
-		for s in self.senses:
+		for s in senses:
 			if isinstance(s, Sense):
 				self.add_sense(s)
 			else:
@@ -301,7 +308,7 @@ class Entry:
 	def add_sense(self, s):
 		if s not in self.senses:
 			s.set_id(self.get_id())
-			self.senses.append(s)##ICITE
+			self.senses.append(s)
 
 	def is_valid(self):
 		return self.lemma != None and len(self.pronunciations) > 0 and self.pos != None and len(self.senses) > 0
@@ -333,7 +340,6 @@ class Entry:
 		res[self.lemma]["senses"] = {}
 		for s in self.senses:
 			res[self.lemma]["senses"][s.get_id()]=s.serializable(id)
-		print(f"Entry:{res}")##
 		return res
 
 	def __str__(self):
@@ -387,7 +393,6 @@ class ParserContext:
 		#Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS
 		res = Entry(self.lemma, self.lang, self.wiki_lang, self.version_id)
 		for l in self.context:
-			#print(l.keys())
 			if "pro" in l.keys():
 				res.set_pronunciations(l['pro'])
 			if "ety" in l.keys():
@@ -397,7 +402,6 @@ class ParserContext:
 			if "senses" in l.keys():
 				res.set_senses(l['senses'])
 			# TODO: Ajouter les autres types
-		print(res)
 		if res.is_valid() and res not in self.entries:
 			self.entries.append(res)
 		else:
@@ -491,6 +495,7 @@ class Wikstraktor:
 						self.parserContext.set_top_entry_info('POS', pos, False)
 						self.parserContext.set_top_entry_info('senses', self.process_senses(self.wtp.parse(s.contents))) #cette ligne le prob
 		# self.parserContext.entries augmente pas même lorsque qu'on entre dans le if au dessus.
+
 		res = len(self.parserContext.entries)
 		if res > 0:
 			for e in self.parserContext.entries:
@@ -502,7 +507,6 @@ class Wikstraktor:
 			res = title == self.constants['pro']
 		else:
 			res = title in self.constants['pro']
-		#print(title, res)
 		return res
 
 	def isEty(self, title):
@@ -531,7 +535,6 @@ class Wikstraktor:
 		res = []
 		for e in self.entries:
 			res.append(e.serializable(id))
-		print(f"Export{res}")##
 		if compact:
 			return json.dumps(res, ensure_ascii=ascii)
 		else:
@@ -558,8 +561,6 @@ if __name__ == "__main__":
 		resp = None
 		if w.fetch(args.mot) > 0:
 			resp = w.export(not args.no_id, args.force_ascii, args.compact)
-			print(w)##
-			print(resp)##
 		if args.destination_file != None:
 			f = open(args.destination_file, "w")
 			f.write(resp)
-- 
GitLab