Skip to content
Snippets Groups Projects
Commit bb67b48b authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

improving pron id

parent 7c707d79
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ import importlib ...@@ -5,6 +5,7 @@ import importlib
import json import json
from wikstraktor_version import version as the_version from wikstraktor_version import version as the_version
from wikstraklog import Wikstraklog from wikstraklog import Wikstraklog
from copy import deepcopy as dc
import re import re
def get_list_string_level(wikitext): def get_list_string_level(wikitext):
...@@ -37,12 +38,17 @@ class SubInfo: ...@@ -37,12 +38,17 @@ class SubInfo:
self.label = f"{prefix}_{self.__class__.prfx}{self.id}" self.label = f"{prefix}_{self.__class__.prfx}{self.id}"
return self.label return self.label
def replace_src_in_id(self, former_src, new_src): def replace_src_in_id(self, former_src, new_src, copy=False):
##Attention si on nettoie en mettant des sources partout, il faudra changer ##Attention si on nettoie en mettant des sources partout, il faudra changer
res = None res = None
if self.label != None and former_src != None and new_src != None : if self.label != None and former_src != None and new_src != None :
self.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label) if not copy:
res = self.label self.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label)
res = self.label
else:
new_obj = dc(self)
new_obj.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label)
res = new_obj
return res return res
def get_src_from_id(self): def get_src_from_id(self):
...@@ -242,7 +248,7 @@ class Sense(SubInfo): ...@@ -242,7 +248,7 @@ class Sense(SubInfo):
return key in self.metadata.keys() return key in self.metadata.keys()
def get_id(self): def get_id(self):
return self.id return self.label
def set_domain(self, d): def set_domain(self, d):
self.domain = d self.domain = d
...@@ -272,13 +278,13 @@ class Sense(SubInfo): ...@@ -272,13 +278,13 @@ class Sense(SubInfo):
self.translations.append(theTranslation) self.translations.append(theTranslation)
def add_subsense(self, subsense): def add_subsense(self, subsense):
if self.id!=None: if self.label!=None:
subsense.set_id(self.set_id()) subsense.set_id(self.set_id())
if subsense not in self.subsenses: if subsense not in self.subsenses:
self.subsenses.append(subsense) self.subsenses.append(subsense)
def __eq__(self, other): def __eq__(self, other):
res = isinstance(other, self.__class__) and self.id == other.id and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain and len(other.metadata) == len(self.metadata) and other.regions == self.regions res = isinstance(other, self.__class__) and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain and len(other.metadata) == len(self.metadata) and other.regions == self.regions #and self.id == other.id
i = 0 i = 0
while res and i < len(self.examples): while res and i < len(self.examples):
res = self.examples[i] in other.examples res = self.examples[i] in other.examples
...@@ -334,18 +340,17 @@ class Sense(SubInfo): ...@@ -334,18 +340,17 @@ class Sense(SubInfo):
if len(self.subsenses) > 0: if len(self.subsenses) > 0:
res["Subsenses"] = {} res["Subsenses"] = {}
for t in self.subsenses: for t in self.subsenses:
res["Subsenses"][t.set_id(self.id)]= t.serializable(prefix) res["Subsenses"][t.set_id(self.label)]= t.serializable(prefix)
return res return res
def __str__(self): def __str__(self):
return json.dumps(self.serializable()) return json.dumps(self.serializable())
class SubSense(Sense): class SubSense(Sense):
def set_id(self, prefix=None): def set_id(self, prefix, force = False):
if prefix != None and self.id == None: if (self.label == None or force) and prefix != None:
self.id = f"{prefix}.{self.__class__.next_id}" #l'identifiant du sens self.label = f"{prefix}.{self.id}"
self.__class__.inc_n_id() return self.label
return self.id
class Entry: class Entry:
#version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id) #version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id)
...@@ -433,10 +438,10 @@ class Entry: ...@@ -433,10 +438,10 @@ class Entry:
i += 1 i += 1
for p in other.pronunciations: for p in other.pronunciations:
src = p.get_src_from_id() src = p.get_src_from_id()
if src != None and src <= max_id and src_map[src] != src: if src != None and src <= max_id and src < len(src_map) and src_map[src] != src:
#max_id, c'est parce qu'un même objet peut être #max_id, c'est parce qu'un même objet peut être
#à plusieurs endroits et avoir déjà été modifié #à plusieurs endroits et avoir déjà été modifié
p.replace_src_in_id(src, src_map[src]) p=p.replace_src_in_id(src, src_map[src], True)
self.add_pronunciation(p) self.add_pronunciation(p)
for s in other.senses: for s in other.senses:
src = s.get_src_from_id() src = s.get_src_from_id()
...@@ -509,6 +514,10 @@ class ParserContext: ...@@ -509,6 +514,10 @@ class ParserContext:
self.wikstraktor_version = version_id self.wikstraktor_version = version_id
self.context = [] self.context = []
self.entries = [] self.entries = []
#reset counters
Sense.reset()
Sense.reset_sub_counters()
Pronunciation.reset()
def get_level(self): def get_level(self):
if len(self.context) == 0: if len(self.context) == 0:
......
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment