#!/usr/bin/env python3
import pywikibot
import wikitextparser
import importlib
import json
from wikstraktor_version import version as the_version
from wikstraklog import Wikstraklog

#ICITE : fr marche pas, en prend des trucs vides à virer (cf. yellow… def & example)


class SubInfo:
	next_id = 1
	prfx = "err"

	@classmethod
	def inc_n_id(cls):
		cls.next_id += 1

	@classmethod
	def reset(cls):
		cls.next_id = 0

	def __init__(self, prefix = None):
		self.id = None
		self.set_id(prefix)

	def set_id(self, prefix):
		if self.id == None and prefix != None:
			self.id = f"{prefix}_{self.__class__.prfx}{self.__class__.next_id}"
			self.__class__.inc_n_id()
		return self.id

	def serializable(self, prefix = None):
		res = {}
		if self.set_id(prefix) != None:
			res["id"] = self.id
		return res



#######
# Oral
#######
class Sound:
	def __init__(self, url, accent):
		self.url = url
		self.accent = accent

	def __eq__(self, other):
		return isinstance(other, self.__class__) and self.url == other.url and self.accent == other.accent

	def serializable(self):
		if self.accent == None:
			res = {"url":self.url}
		else:
			res = {"accent":self.accent, "url":self.url}
		return res

class Pronunciation(SubInfo):
	prfx = "prn"

	def __init__(self, prefix = None):
		super().__init__(prefix)
		self.ipa = None
		self.sounds = []
		self.accent = None

	def set_transcription(self, tscpt):
		self.ipa = tscpt

	def set_accent(self, accent):
		self.accent = accent

	def add_sound(self, url, accent=None):
		self.sounds.append(Sound(url,accent))

	def serializable(self, prefix = None):
		snds = []
		for s in self.sounds:
			snds.append(s.serializable())
		res = super().serializable(prefix)
		res['transcript'] = self.ipa
		if self.accent != None:
			res['accent'] = self.accent
		res['sounds'] = snds
		return res

	def __str__(self):
		return json.dumps(self.serializable(''))

	def __eq__(self, other):
		res = isinstance(other, self.__class__) and self.ipa == other.ipa and self.accent == other.accent and len(self.sounds)==len(other.sounds)
		i = 0
		while res and i<len(self.sounds):
			res = self.sounds[i] == other.sounds[i]
			i += 1
		return res

#######
# Metadata
## TODO:
#  * POS : créer une classe POS avec les traits dépendants (ex: masc en fr)
#######

#######
# Senses
# TODO: créer une classe Translations
#######

class Definition(SubInfo):
	prfx = "def"
	key = "definition"

	def __init__(self, lang, text, prefix=None):
		super().__init__(prefix)
		if text != "":
			self.lang = lang
			self.text = text
		else:
			raise ValueError(f"Definition.__init__: “{text}” empty definition.")

	def __eq__(self, other):
		return isinstance(other, self.__class__) and self.lang == other.lang and self.text == other.text

	def serializable(self, prefix = None):
		res = super().serializable(prefix)
		res["lang"] = self.lang
		res[self.__class__.key] = self.text
		return res

class Translation(Definition):
	prfx = "trad"
	key = "translation"

class Example(SubInfo):
	prfx = "ex"

	def __init__(self, transcript, source=None, url=None, prefix=None):
		super().__init__(prefix)
		if transcript != "":
			self.text = transcript
			self.source = source
			self.url = url
		else:
			raise ValueError(f"Example.__init__: “{transcript}” empty example.")


	def __eq__(self, other):
		return isinstance(other, self.__class__) and self.text==other.text and self.source==other.source and self.url==other.url

	def serializable(self, prefix = None):
		res = super().serializable(prefix)
		res["example"]=self.text
		if self.source != None:
			res["source"] = self.source
		if self.url != None:
			res["url"] = self.url
		return res

class Sense(SubInfo):
	prfx = ""

	def __init__(self, lang=None, definition=None, wiki_lang=None, prefix=None):
		self.lang = lang
		self.label = None
		self.set_id(prefix)
		#On réinitialise les identifiants des sous-éléments
		if not isinstance(self, SubSense):
			Definition.reset()
			Example.reset()
			Translation.reset()
			SubSense.reset()

		self.definitions = [] #liste des définitions (elles auront une langue et un texte)
		self.subsenses = [] #liste des sous-définitions (récursif…)
		self.examples = [] #liste des exemples (un texte obligatoire, source et url sont optionnels)
		self.translations = [] #liste des traductions dans d'autres langues
		self.domain = None #domaine d'usage du mot dans ce sens
		if definition != None:
			try:
				self.add_def(wiki_lang, definition)
			except ValueError as err:
				raise ValueError(f"Sense.__init__() with empty definition\n{err}")

	def set_id(self, prefix=None):
		if prefix != None and self.label == None:
			self.label = f"{prefix}_{self.__class__.next_id}"  #l'identifiant du sens
			self.__class__.inc_n_id()
		return self.label

	def get_id(self):
		return f"{self.lang}.{self.label}"

	def set_domain(self, d):
		self.domain = d

	def add_def(self, lang, definition):
		theDef = Definition(lang, definition)
		if theDef != None and theDef not in self.definitions:
			theDef.set_id(self.set_id())
			self.definitions.append(theDef)

	def add_example(self, transcript, src=None, url=None, prefix=None):
		try:
			theEx = Example(transcript, src, url, prefix)
			if theEx != None and theEx not in self.examples:
				theEx.set_id(self.set_id())
				self.examples.append(theEx)
		except ValueError as e:
			print(f"Skipped empty example")

	def add_translation(self, lang=None, translation=None):
		theTranslation = Translation(lang, translation)
		if theTranslation != None and theTranslation not in self.translations:
			theTranslation.set_id(self.set_id())
			self.translations.append(theTranslation)

	def add_subsense(self, subsense):
		if self.label!=None:
			subsense.set_id(self.set_id())
		if subsense not in self.subsenses:
			self.subsenses.append(subsense)

	def __eq__(self, other):
		res = isinstance(other, self.__class__) and self.label == other.label and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain
		i = 0
		while res and i < len(self.examples):
			res = self.examples[i] in other.examples
			i+=1
		i = 0
		while res and i < len(self.translations):
			res = self.translations[i] in other.translations
			i+=1
		i = 0
		while res and i < len(self.definitions):
			res = self.definitions[i] in other.definitions
			i+=1
		i = 0
		while res and i < len(self.subsenses):
			res = self.subsenses[i] in other.subsenses
			i+=1
		return res

	def serializable(self, prefix = None):
		res = {}
		if self.domain != None:
			res["Domain"] = self.domain
		if len(self.definitions) > 0:
			res["Definitions"] = []
			for d in self.definitions:
				res["Definitions"].append(d.serializable(prefix))
		if len(self.subsenses) > 0:
			res["Subsenses"] = {}
			for t in self.subsenses:
				res["Subsenses"][t.set_id(self.label)]= t.serializable(prefix)
		if len(self.examples) > 0 :
			res["Examples"] = []
			for e in self.examples:
				res["Examples"].append(e.serializable(prefix))
		if len(self.translations) > 0:
			res["Translations"] = []
			for t in self.translations:
				res["Translations"].append(t.serializable(prefix))
		return res

	def __str__(self):
		return json.dumps(self.serializable())

class SubSense(Sense):
	def set_id(self, prefix=None):
		if prefix != None and self.label == None:
			self.label = f"{prefix}.{self.__class__.next_id}"  #l'identifiant du sens
			self.__class__.inc_n_id()
		return self.label

class Entry:
	#version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id)
	def __init__(self, lemma, lang, wiki_lang, version_id, wkskt_version):
		self.lemma = lemma
		self.lang = lang
		#Si un jour on mixe +ieurs données de plusieurs wiktionnaires, ce sera utile
		self.sources = []
		self.sources.append({"wiktionary_language":wiki_lang,"permanentId":version_id,"wikstraktor_version":wkskt_version})
		self.current_source = 0
		self.pronunciations = []
		self.pos = None
		self.senses = []
 #l'identifiant unique de la version de la page du wiktionnaire
		Sense.reset()

	def set_pos(self, pos):
		self.pos = pos

	def get_id(self, source_id=0):
		#TODO: remplacer un jour le source id par la bonne source
		if self.pos != None:
			pos = self.pos
		else:
			pos = ""
		return f"{self.lang}-{source_id}.{self.lemma}{pos}"

	def set_pronunciations(self, pron):
		if isinstance(pron, Pronunciation):
			self.add_pronunciation(pron)
		elif type(pron) == list:
			for p in pron:
				if isinstance(p, Pronunciation):
					self.add_pronunciation(p)
				else:
					raise ValueError(f"Entry.set_pronunciations: {p} is not a Pronunciation object ({p.__class__.__name__}).")
		else:
			raise ValueError(f"Entry.set_pronunciations: {pron} is not a Pronunciation object ({pron.__class__.__name__}).")

	def add_pronunciation(self, p):
		if p not in self.pronunciations:
			p.set_id(self.get_id())
			self.pronunciations.append(p)

	def set_senses(self, senses):
		for s in senses:
			if isinstance(s, Sense):
				self.add_sense(s)
			else:
				raise ValueError(f"Entry.set_senses: {s} is not a Sense object ({p.__class__.__name__}).")

	def add_sense(self, s):
		if s not in self.senses:
			s.set_id(self.get_id())
			self.senses.append(s)

	def is_valid(self):
		return self.lemma != None and len(self.pronunciations) > 0 and self.pos != None and len(self.senses) > 0

	def __eq__(self, other):
		res = isinstance(other, self.__class__) and self.lemma == other.lemma and self.lang == other.lang and self.pos ==other.pos and len(self.pronunciations) == len(other.pronunciations) and len(self.senses) == len(other.senses)
		i = 0
		while res and i < len(self.senses):
			res = self.senses[i] == other.senses[i]
			i += 1
		i = 0
		while res and i < len(self.pronunciations):
			res = self.pronunciations[i] == other.pronunciations[i]
			i += 1
		return res

	def serializable(self, id=True):
		res = {}
		res['sources'] = self.sources
		if id:
			id = self.get_id()
			res['id'] = id
		else:
			id == None
		res[self.lemma] = {"pos":self.pos}
		res[self.lemma]["pronunciations"] = []
		for p in self.pronunciations:
			res[self.lemma]["pronunciations"].append(p.serializable(id))
		res[self.lemma]["senses"] = {}
		for s in self.senses:
			res[self.lemma]["senses"][s.get_id()]=s.serializable(id)
		return res

	def __str__(self):
		res = f"{self.lemma}_{self.lang} ({self.pos})\n"
		for p in self.pronunciations:
			res += f"{str(p)}\n"
		for s in self.senses:
			res += f"{str(s)}\n"
		return res

class ParserContext:
	def __init__(self, entry, lang, wiki_lang, wversion_id, version_id):
		self.lemma = entry
		self.lang = lang
		self.wiki_lang = wiki_lang
		self.page_version_id = wversion_id
		self.wikstraktor_version = version_id
		self.context = []
		self.entries = []

	def get_level(self):
		if len(self.context) == 0:
			res = -1
		else:
			res = self.context[-1]["wiki"].level
		return res

	def push(self, wiki_context):
		self.context.append({"wiki":wiki_context})

	def pop(self, testNewEntry = True):
		if testNewEntry:
			self.create_entries()
		return self.context.pop()

	def flush(self):
		while len(self.context) > 0:
			self.pop(True)

	def set_top_wiki(self, wiki_context):
		if len(self.context) == 0:
			self.push(wiki_context)
		else:
			self.context[-1]['wiki'] = wiki_context

	def set_top_entry_info(self, key, entry_context, testNewEntry=True):
		if len(self.context) == 0:
			raise ValueError(f"Trying to set up entry info ({entry_context}), in an empty parserContext.")
		else:
			self.context[-1][key] = entry_context
			if testNewEntry:
				self.create_entries()

	def create_entries(self):
		#In the key dict there are traits that describe every thing (ety, pro) and different entities (POS:senses)
		tmp = {}
		res = 0
		pro = None
		for l in self.context:
			for k,v in l.items():
				if k == "pro":
					pro = v
				elif k == "ety" or k == "wiki":
					#wiki context is not necessary
					pass #On ignore l'étymologie pour le moment
				else:
					tmp[k]=v
		if(pro!=None and len(tmp)>0):
			for pos,senses in tmp.items():
				e = Entry(self.lemma, self.lang, self.wiki_lang, self.page_version_id, self.wikstraktor_version)
				e.set_pronunciations(pro)
				e.set_pos(pos)
				e.set_senses(senses)
				#an improvement would be to remove that sense from context, but we test not to add doubles
				if e.is_valid() and e not in self.entries:
					res += 1
					self.entries.append(e)
		return res

	def debug_top(self):
		res = "Context: "
		if len(self.context) == 0 :
			res += "0"
		else:
			info = ""
			for k,v in self.context[-1].items():
				if k != 'wiki':
					if info != "":
						info += "\n\t\t\t"
					info += f"{k} → {str(v)}"
			res += f"{len(self.context)*'='} {self.context[-1]['wiki'].level*'#'} {self.context[-1]['wiki'].title} / {info}"
		return res

	def __str__(self):
		res = ""
		i=0
		for c in self.context:
			res += f"====={i}======\n"
			for k,v in c.items():
				if k!= "wiki":
					res+=f"  {k}→{v}\n"
				else:
					res+=f"  {k}→{len(v)}\n"
			i+=1
		return res+f"nb of entries: {len(self.entries)}"



class Wikstraktor:
	@classmethod
	def get_instance(cls, wiki_language, entry_language):
		try:
			m_name = f"{wiki_language}_{entry_language}".capitalize()
			instance = getattr(importlib.import_module(f"parsers.{m_name.lower()}"), f"{m_name}_straktor")()
			instance.version = the_version
			instance.log = Wikstraklog(the_version, entry_language, wiki_language)
		except ModuleNotFoundError:
			print(f"parsers.{m_name.lower()} module not found or {m_name}_straktor not found in module")
			instance = None
		return instance

	def __init__(self):
		self.entries = []
		self.pwb = pywikibot
		self.wtp = wikitextparser
		self.parserContext = None

	def get_file_url(self, file_page_name):
		res = None
		try:
			f = self.pwb.FilePage(self.site, file_page_name)
			res = f.get_file_url()
		except pywikibot.exceptions.NoPageError:
			print(f"{file_page_name} does not exist in {self.site}.")
		return res

	#retrieves the content of a page and processes it (adding the entries to the list of entries)
	#returns the number of entries added
	def fetch(self, graphy):
		nb_entries_added = 0
		page = self.pwb.Page(self.site, graphy)
		to_parse = []
		if page.text != "":
			sections = self.wtp.parse(page.text).sections
			found = False
			i = 0
			### find language
			while i < len(sections) and not found:
				found = sections[i].title != None and sections[i].title.capitalize() == self.constants[self.entry_language]
				if not found:
					i += 1
			if found:
				nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)#self.wtp.parse(s.contents).sections)
		return nb_entries_added

	def parse(self, entry, v_id, sections):
		self.parserContext = ParserContext(entry, self.entry_language, self.wiki_language, v_id, self.version)
		self.log.set_context(entry, v_id)
		for s in sections:
			if s.title != None :
				#handle wiki context
				if self.parserContext.get_level() < s.level:
					self.parserContext.push(s)
				else:
					while self.parserContext.get_level() > s.level:
						self.parserContext.pop(True)
					self.parserContext.set_top_wiki(s)
				#get section title
				stitle = self.wtp.parse(s.title).templates
				if stitle == []:
					stitle = s.title
				else:
					stitle = stitle[0].arguments[0].value
				if self.isPro(stitle):
					self.parserContext.set_top_entry_info('pro', self.process_pronunciation(self.wtp.parse(s.contents)))
				elif self.isEty(stitle):
					self.parserContext.set_top_entry_info('ety', self.process_etymology(self.wtp.parse(s.contents)))
				else:
					#Edit to process other types of sections
					pos = self.process_POS(stitle)
					if pos != None :
						self.parserContext.set_top_entry_info(pos, self.process_senses(self.wtp.parse(s.contents)))
		self.parserContext.flush()
		res = len(self.parserContext.entries)
		if res > 0:
			for e in self.parserContext.entries:
				self.entries.append(e)
		return res

	def isPro(self, title):
		if type(self.constants['pro']) == str:
			res = title == self.constants['pro']
		else:
			res = title in self.constants['pro']
		return res

	def isEty(self, title):
		if type(self.constants['ety']) == str:
			res = title == self.constants['ety']
		else:
			res = title in self.constants['ety']
		return res

	#recognizes POS and returns None if it can't
	def process_POS(self, parsedwikitext):
		pass#in subclass

	def process_pronunciation(self, parsedwikitext):
		pass#in subclass

	def process_etymology(self, parsedwikitext):
		pass#in subclass

	def process_example(self, example_wiki_text):
		pass#in subclass

	def process_definition(self, definition, sub_items, def_level = True):
		pass#in subclass

	def process_senses(self, parsedwikitext):
		pass#in subclass

	def __str__(self):
		return self.export()

	def export(self, id=True, ascii=False, compact=False):
		res = []
		for e in self.entries:
			res.append(e.serializable(id))
		if compact:
			return json.dumps(res, ensure_ascii=ascii)
		else:
			return json.dumps(res, ensure_ascii=ascii, indent=4)

if __name__ == "__main__":
	import argparse
	from argparse import RawTextHelpFormatter #pour le formattage de l'aide
	parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description="""Interroger un wiktionnaire
	\033[1m\033[32mex :\033[0m
	‣\033[0m\033[32m./wikstraktor.py -m blue\033[0m
	‣\033[0m\033[32m./wikstraktor.py -m blue -f blue.json -A -C\033[0m
	‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -A -C\033[0m""")
	parser.add_argument("-l", "--language",  help="la langue du mot", type=str, default = "en")
	parser.add_argument("-w", "--wiki_language",  help="la langue du wiki", type=str, default = "en")
	parser.add_argument("-m", "--mot",  help="le mot à chercher", type=str, default=None)
	parser.add_argument("-f", "--destination_file", help="le fichier dans lequel stocker le résultat", type=str, default=None)
	parser.add_argument("-A", "--force_ascii", help="json avec que des caractères ascii", action="store_true")
	parser.add_argument("-C", "--compact", help="json sans indentation", action="store_true")
	parser.add_argument("-n", "--no_id", help="json sans id", action="store_true")
	args = parser.parse_args()
	if args.mot != None:
		w = Wikstraktor.get_instance(args.wiki_language, args.language)
		resp = None
		if w.fetch(args.mot) > 0:
			resp = w.export(not args.no_id, args.force_ascii, args.compact)
		if args.destination_file != None:
			f = open(args.destination_file, "w")
			f.write(resp)
			f.close
		else:
			print(resp)
	else:
		raise NameError("Pas de mot demandé")