Replace en_constants.py

1f3789ad · Enzo Simonnet · 86daa52a · 1f3789ad
Commit 1f3789ad authored 2 years ago by Enzo Simonnet
--- a/parsers/en_constants.py
+++ b/parsers/en_constants.py
-#!/usr/bin/env python3
-from wikstraktor import Wikstraktor, Pronunciation, Sense
-
-from parsers.en_constants import string_values
-
-debugEty = 0
-
-class En_en_straktor(Wikstraktor):
-	def __init__(self):
-		super().__init__()
-		self.wiki_language = "en"
-		self.entry_language = "en"
-		self.constants = string_values
-		self.site = self.pwb.Site(f'wiktionary:en')
-
-	def process_pronunciation(self, proContent):
-		# TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux
-		l = proContent.get_lists()[0]
-		i = 0
-		pronunciations = []
-		while i < len(l.fullitems):
-			p = Pronunciation()
-			templates = self.wtp.parse(l.fullitems[i]).templates
-			a = None
-			for j, t in enumerate(templates):
-				if (t.normal_name() == self.constants['t_acc'] and templates[j+1].normal_name()!= self.constants['t_acc']):
-					a = t.arguments[0].value
-				elif t.normal_name() == self.constants['t_ipa']:
-					p.set_transcription(t.arguments[1].value)
-					p.set_accent(a)
-				elif t.normal_name() == self.constants['t_snd']:
-					p.add_sound(self.get_file_url(t.arguments[1].value), a)
-				if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] :
-					if p.ipa != None or p.accent != None:
-						pronunciations.append(p)
-						p = Pronunciation()
-			i += 1
-		return pronunciations
-
-	def process_etymology(self, etyContent):
-		global debugEty
-		debugEty += 1
-		return "Etymology" + str(debugEty)
-
-	def process_senses(self, entry, pos, sensesContent):
-		baseId = f"{entry}_{pos}_"
-		#here we don't look at
-		l = sensesContent.get_lists()[0]
-		i = 0
-		senses = []
-		while i < len(l.fullitems):
-			newSense = Sense(f"{baseId}{i}")
-			li = self.wtp.parse(l.fullitems[i])
-			j = 0
-			while j < len(li.templates) and li.templates[j].normal_name() != self.constants['t_deflabel']:
-				j += 1
-			if j < len(li.templates):
-				newSense.set_domain(li.templates[j].arguments[-1].value)#We could use the second parameter for a comment
-				newSense.add_def(self.wiki_language, self.wtp.parse(li.get_lists()[0].items[0]).plain_text().strip())
-				while j < len(li.templates)-1 and li.templates[j+1].normal_name() == self.constants['t_ex']:
-					newSense.add_example(li.templates[j+1].arguments[1].value)
-					j += 1
-				senses.append(newSense)
-				if len(li.get_lists(pattern = '##')) > 0 :
-					for cnt, k in enumerate (li.get_lists(pattern = '##')[0].items):
-							if self.wtp.parse(k).templates[0].normal_name() == self.constants['t_deflabel']:
-								newSense2 = Sense(f"{baseId}{i}{cnt}")
-								newSense2.set_domain(self.wtp.parse(k).templates[0].arguments[-1].value)#We could use the second parameter for a comment
-								newSense2.add_def(self.wiki_language, self.wtp.parse(k).plain_text().strip())
-							for a in self.wtp.parse(li.get_lists(pattern = '##')[0].fullitems[cnt]).templates:
-								if a.normal_name() == self.constants['t_ex']:
-									newSense2.add_example(a.arguments[-1].value)
-							senses.append(newSense2)
-			# TODO: process examples
-			i += 1
-		return senses
-
-if __name__ == "__main__":
-	ensk = En_en_straktor()
-	print(ensk.fetch("test"), "entries added")
+string_values = {
+	"ety":"Etymology",
+	"pro":"Pronunciation",
+	"en":"English",
+	"fr":"French",
+	"t_ipa":"IPA", #template for transcription
+	"t_snd":"audio", #template for audio
+	"t_acc":"a", #template for accents
+	"t_deflabel":"lb",
+	"t_ex":"ux",
+	"POS": { #https://en.wiktionary.org/wiki/Wiktionary:POS
+		"Adjective":"Adj",
+		"Adverb":"Adv",
+		"Ambiposition":"Ambip",
+		"Article":"Art",
+		"Circumposition":"Circump",
+		"Classifier":"Class",
+		"Conjunction":"Conj",
+		"Contraction":"Cont",
+		"Counter":"Count",
+		"Determiner":"Det",
+		"Ideophone":"Ideophone",
+		"Interjection":"Interj",
+		"Noun":"N",
+		"Numeral":"Num",
+		"Participle":"Part",
+		"Particle":"Particle",
+		"Postposition":"Postp",
+		"Preposition":"Prep",
+		"Pronoun":"Pro",
+		"Proper noun":"NP",
+		"Verb":"V" # TODO: compléter
+	}
+}