Structure json mise à jour

af6061d5 · Enzo Simonnet · b02d9677 · af6061d5 · af6061d5 · af6061d5
Commit af6061d5 authored 2 years ago by Enzo Simonnet
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -17,6 +17,7 @@ class En_en_straktor(Wikstraktor):
 		# TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux
 		l = proContent.get_lists()[0]
 		i = 0
+		cpt = 0
 		pronunciations = []
 		while i < len(l.fullitems):
 			p = Pronunciation()
@@ -32,6 +33,8 @@ class En_en_straktor(Wikstraktor):
 					p.add_sound(self.get_file_url(t.arguments[1].value), a)
 				if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] :
 					if p.ipa != None or p.accent != None:
+						cpt += 1
+						p.id= f"p_{cpt}"
 						pronunciations.append(p)
 						p = Pronunciation()
 			i += 1
@@ -58,18 +61,21 @@ class En_en_straktor(Wikstraktor):
 			if l[i].pattern == '\\# ':
 				nombreDef += 1
 				newSense = Sense(f"{baseId}{nombreDef}")
-				newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
+				newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip(), f"d_{nombreDef}")
+				newSense.add_translation(f"t_{nombreDef}_0")
 			elif l[i].pattern == '\\#:':
+				cptEx=0
 				for j in l[i].items:
 					k = 0
 					isEx = 0
 					while k < len(self.wtp.parse(j).templates) and isEx == 0 :
 						if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']):
-							newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value)
+							cptEx +=1
+							newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value, f"e_{nombreDef}_{cptEx}")
 							isEx = 1
 						k += 1
 					if isEx == 0:
-						newSense.add_example(self.wtp.parse(j).plain_text().strip())
+						newSense.add_example(self.wtp.parse(j).plain_text().strip(), f"e_{nombreDef}_{cptEx}")
 			if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
 				senses.append(newSense)
 			cnt = 0
@@ -79,18 +85,21 @@ class En_en_straktor(Wikstraktor):
 				if l[i].pattern == '\\## ':
 					nombreSousDef += 1
 					newSense2 = Sense(f"{baseId}{nombreDef}_{nombreSousDef}")
-					newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
+					newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip(), f"sd_{nombreDef}_{nombreSousDef}")
+					newSense2.add_translation(f"t_{nombreDef}_{nombreSousDef}_0")
 				elif l[i].pattern == '\\##:':
+					cptex2 = 0
 					for j in l[i].items:
 						k = 0
 						isEx = 0
 						while k < len(self.wtp.parse(j).templates) and isEx == 0 :
 							if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']):
-								newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value)
+								cptex2 +=1
+								newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value, f"se_{nombreDef}_{nombreSousDef}_{cptex2}")
 								isEx = 1
 							k += 1
 						if isEx == 0:
-							newSense2.add_example(self.wtp.parse(j).plain_text().strip())
+							newSense2.add_example(self.wtp.parse(j).plain_text().strip(), f"se_{nombreDef}_{nombreSousDef}_{cptex2}")
 				if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
 					newSense.add_subsense(newSense2)
 				i += 1

--- a/parsers/fr_en.py
+++ b/parsers/fr_en.py
@@ -53,7 +53,7 @@ class Fr_en_straktor(Wikstraktor):
 				keys = list(self.constants['POS'].keys())
 				pos = keys[ik]
 			ik += 1
-# 		print(pos)
+		print(pos)
 		return pos
 	def process_senses(self, entry, pos, sensesContent):
@@ -64,9 +64,11 @@ class Fr_en_straktor(Wikstraktor):
 		nombreDef = 0
 		while i < len(l):
 			if l[i].pattern == '\\# ':
-				nombreDef += 1
+				#A revoir ça, très douteux
-				newSense = Sense(f"{baseId}{nombreDef}")
+				for nbDef in l[i].items :
-				newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip())
+					nombreDef += 1
+					newSense = Sense(f"{baseId}{nombreDef}")
+					newSense.add_def(self.wiki_language, self.wtp.parse(nbDef).plain_text().strip())
 			elif l[i].pattern == '\\#:':
 				for j in l[i].items:
 					k = 0

--- a/test_wikstraktor.py
+++ b/test_wikstraktor.py
@@ -5,7 +5,7 @@ if __name__ == "__main__":
 	# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav"))
 	# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav"))
 	#e.fetch("water")
-	f.fetch("blue")
+	f.fetch("water")
 	# print(e.fetch("test"), "entries added")
 	#print(e)
 	file_path = 'test.json'
@@ -22,3 +22,5 @@ if __name__ == "__main__":
 	# print(p.get_file_url())
 	#print(e)
 	#Entry("test", wtp.parse(page.text)))
+	# PRENDS PAS LE FICHIER AUDIO POUR "LIVE" EN_EN
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -19,7 +19,7 @@ class Sound:
 		if self.accent == None:
 			res = {"url":self.url}
 		else:
-			res = {"accent":self.accent, "url":self.url}
+			res = { "accent":self.accent, "url":self.url}
 		return res
 class Pronunciation:
@@ -27,6 +27,7 @@ class Pronunciation:
 		self.ipa = None
 		self.sounds = []
 		self.accent = None
+		self.id = None
 	def set_transcription(self, tscpt):
 		self.ipa = tscpt
@@ -42,9 +43,9 @@ class Pronunciation:
 		for s in self.sounds:
 			snds.append(s.serializable())
 		if self.accent == None:
-			res = {"transcript":self.ipa, "sounds":snds}
+			res = {"ID":self.id, "transcript":self.ipa, "sounds":snds}
 		else:
-			res = {"accent":self.accent, "transcript":self.ipa, "sounds":snds}
+			res = {"ID":self.id,"accent":self.accent, "transcript":self.ipa, "sounds":snds}
 		return res
 	def __str__(self):
@@ -70,7 +71,8 @@ class Pronunciation:
 #######
 class Definition:
-	def __init__(self, lang, text):
+	def __init__(self, lang, text, id):
+		self.id = id
 		self.lang = lang
 		self.text = text
@@ -78,23 +80,28 @@ class Definition:
 		return self.lang == other.lang and self.text == other.text
 	def serializable(self):
-		return {"lang":self.lang, "definition":self.text}
+		return {"ID":self.id, "lang":self.lang, "definition":self.text}
-class Translation(Definition):
+class Translation():
+	def __init__(self, id, lang=None, text=None):
+		self.id = id
+		self.lang = lang
+		self.text = text
 	def serializable(self):
-		return {"lang":self.lang, "translation":self.text}
+		return {"ID:" : self.id, "lang":self.lang, "translation":self.text}
 class Example:
-	def __init__(self, transcript, source=None, url=None):
+	def __init__(self, transcript, id, source=None, url=None):
 		self.text = transcript
 		self.source = source
 		self.url = url
+		self.id = id
 	def __eq__(self, other):
 		return self.text==other.text and self.source==other.source and self.url==other.url
 	def serializable(self):
-		res = {"example":self.text}
+		res = {"ID":self.id, "example":self.text}
 		if self.source != None:
 			res["source"] = self.source
 		if self.url != None:
@@ -113,20 +120,20 @@ class Sense:
 	def set_domain(self, d):
 		self.domain = d
-	def add_def(self, lang, definition):
+	def add_def(self, lang, definition, id):
-		theDef = Definition(lang, definition)
+		theDef = Definition(lang, definition, id)
 		if self.definition == None:
 			self.definition = theDef
 		elif self.definition != theDef:
 			raise ValueError(f"Superposition de deux définitions:\n\t{self.definition}\nremplacée par\n\t{theDef}")
-	def add_example(self, transcript, src=None, url=None):
+	def add_example(self, transcript, id, src=None, url=None):
-		theEx = Example(transcript, src, url)
+		theEx = Example(transcript, id, src, url)
 		if theEx not in self.examples:
 			self.examples.append(theEx)
-	def add_translation(self, lang, translation):
+	def add_translation(self, id, lang=None, translation=None):
-		theTranslation = Translation(lang, translation)
+		theTranslation = Translation(id, lang, translation)
 		if theTranslation not in self.translations:
 			self.translations.append(theTranslation)
@@ -165,10 +172,10 @@ class Sense:
 				res[self.label]["Examples"] = []
 				for e in self.examples:
 					res[self.label]["Examples"].append(e.serializable())
-			if len(self.translations) > 0:
+			#if len(self.translations) > 0:
-				res[self.label]["Translations"] = []
+			res[self.label]["Translations"] = []
-				for t in self.translations:
+			for t in self.translations:
-					res[self.label]["Translations"].append(t.serializable())
+				res[self.label]["Translations"].append(t.serializable())
 		else:
 			if self.domain != None:
 				res["Domain"] = self.domain
@@ -181,10 +188,10 @@ class Sense:
 				res["Examples"] = []
 				for e in self.examples:
 					res["Examples"].append(e.serializable())
-			if len(self.translations) > 0:
+			#if len(self.translations) > 0:
-				res["Translations"] = []
+			res["Translations"] = []
-				for t in self.translations:
+			for t in self.translations:
-					res["Translations"].append(t.serializable())
+				res["Translations"].append(t.serializable())
 		return res
@@ -282,8 +289,8 @@ class ParserContext:
 			if testNewEntry:
 				self.create_entry()
+#Pb là dedans
 	def create_entry(self):
-		#Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS
 		res = Entry(self.lemma)
 		for l in self.context:
 			#print(l.keys())
@@ -387,7 +394,8 @@ class Wikstraktor:
 					pos = self.process_POS(stitle)
 					if pos != None :
 						self.parserContext.set_top_entry_info('POS', pos, False)
-						self.parserContext.set_top_entry_info('senses', self.process_senses(entry, pos+str(len(self.parserContext.entries)), self.wtp.parse(s.contents)))
+						self.parserContext.set_top_entry_info('senses', self.process_senses(entry, pos+str(len(self.parserContext.entries)), self.wtp.parse(s.contents))) #cette ligne le prob
+		# self.parserContext.entries augmente pas même lorsque qu'on entre dans le if au dessus.
 		res = len(self.parserContext.entries)
 		if res > 0:
 			for e in self.parserContext.entries: