Skip to content
Snippets Groups Projects
Commit 8dbb5366 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

Avec gestion des sous-sens (attention parser français ne prend qu'1 POS)

parent 8eafa539
No related branches found
No related tags found
No related merge requests found
......@@ -15,8 +15,8 @@
"url1":"https://upload.wikimedia.org/wikipedia/commons/1/19/LL-Q1860_%28eng%29-Back_ache-water.wav"
}
],
"Senses":[
{
"Senses":{
"v1":{
"Translations":[
"translation1",
"...",
......@@ -26,16 +26,16 @@
"Stilles Mineralwasser.jpg",
"..."
],
"Definition":"blabla",
"Definition":{"lang":"fr", "definition" : "blabla"},
"Examples":[
"blabla",
"blabli",
"blablou"
],
"subSense":[
"SubSenses":[
{
"subdef":"blabla",
"subex":[
"Definition":{"lang":"en", "definition" : "whatnot"},
"Examples":[
"subexa",
"subexb",
"subexz"
......@@ -43,7 +43,7 @@
}
]
}
]
}
}
]
}
......@@ -61,4 +61,3 @@
\"Supplementary field for devs 5\"
...
\"Supplementary field for devs 10\ */
......@@ -41,7 +41,7 @@ class En_en_straktor(Wikstraktor):
global debugEty
debugEty += 1
return "Etymology" + str(debugEty)
def process_POS(self,parsedwikitext):
pos = None
if parsedwikitext in self.constants['POS'].keys():
......@@ -92,7 +92,7 @@ class En_en_straktor(Wikstraktor):
if isEx == 0:
newSense2.add_example(self.wtp.parse(j).plain_text().strip())
if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
senses.append(newSense2)
newSense.add_subsense(newSense2)
i += 1
if cnt > 0:
i -= 1
......
......@@ -43,7 +43,7 @@ class Fr_en_straktor(Wikstraktor):
global debugEty
debugEty += 1
return "Etymology" + str(debugEty)
def process_POS(self,parsedwikitext):
pos = None
ik = 0
......@@ -100,7 +100,7 @@ class Fr_en_straktor(Wikstraktor):
if isEx == 0:
newSense2.add_example(self.wtp.parse(j).plain_text().strip())
if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
senses.append(newSense2)
newSense.add_subsense(newSense2)
i += 1
if cnt > 0:
i -= 1
......@@ -110,5 +110,3 @@ class Fr_en_straktor(Wikstraktor):
if __name__ == "__main__":
ensk = Fr_en_straktor()
print(ensk.fetch("test"), "entries added")
......@@ -104,7 +104,8 @@ class Example:
class Sense:
def __init__(self, label):
self.label = label #l'identifiant du sens
self.definitions = [] #liste des définitions (elles auront une langue et un texte)
self.definition = None #liste des définitions (elles auront une langue et un texte)
self.subsenses = [] #liste des sous-définitions (récursif…)
self.examples = [] #liste des exemples (un texte obligatoire, source et url sont optionnels)
self.translations = [] #liste des traductions dans d'autres langues
self.domain = None #domaine d'usage du mot dans ce sens
......@@ -114,8 +115,10 @@ class Sense:
def add_def(self, lang, definition):
theDef = Definition(lang, definition)
if theDef not in self.definitions:
self.definitions.append(theDef)
if self.definition == None:
self.definition = theDef
elif self.definition != theDef:
raise ValueError(f"Superposition de deux définitions:\n\t{self.definition}\nremplacée par\n\t{theDef}")
def add_example(self, transcript, src=None, url=None):
theEx = Example(transcript, src, url)
......@@ -127,8 +130,12 @@ class Sense:
if theTranslation not in self.translations:
self.translations.append(theTranslation)
def add_subsense(self, subsense):
if subsense not in self.subsenses:
self.subsenses.append(subsense)
def __eq__(self, other):
res = self.label == other.label and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain
res = self.label == other.label and self.definition == other.definition and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain
i = 0
while res and i < len(self.examples):
res = self.examples[i] in other.examples
......@@ -138,25 +145,46 @@ class Sense:
res = self.translations[i] in other.translations
i+=1
i = 0
while res and i < len(self.definitions):
res = self.definitions[i] in other.definitions
while res and i < len(self.subsenses):
res = self.subsenses[i] in other.subsenses
i+=1
return res
def serializable(self):
def serializable(self, key = False):
res = {}
res[self.label]={}
if self.domain != None:
res[self.label]["domain"] = self.domain
res[self.label]["defs"] = []
for d in self.definitions:
res[self.label]["defs"].append(d.serializable())
res[self.label]["exs"] = []
for e in self.examples:
res[self.label]["exs"].append(e.serializable())
res[self.label]["trad"] = []
for t in self.translations:
res[self.label]["trad"].append(t.serializable())
if key:
res[self.label]={}
if self.domain != None:
res[self.label]["Domain"] = self.domain
res[self.label]["Definition"] = self.definition.serializable()
if len(self.subsenses) > 0:
res[self.label]["Subsenses"] = []
for t in self.subsenses:
res[self.label]["Subsenses"].append(t.serializable())
if len(self.examples) > 0 :
res[self.label]["Examples"] = []
for e in self.examples:
res[self.label]["Examples"].append(e.serializable())
if len(self.translations) > 0:
res[self.label]["Translations"] = []
for t in self.translations:
res[self.label]["Translations"].append(t.serializable())
else:
if self.domain != None:
res["Domain"] = self.domain
res["Definition"] = self.definition.serializable()
if len(self.subsenses) > 0:
res["Subsenses"] = {}
for t in self.subsenses:
res["Subsenses"][t.label]= t.serializable(key)
if len(self.examples) > 0 :
res["Examples"] = []
for e in self.examples:
res["Examples"].append(e.serializable())
if len(self.translations) > 0:
res["Translations"] = []
for t in self.translations:
res["Translations"].append(t.serializable())
return res
......@@ -206,9 +234,9 @@ class Entry:
res[self.lemma]["pronunciations"] = []
for p in self.pronunciations:
res[self.lemma]["pronunciations"].append(p.serializable())
res[self.lemma]["senses"] = []
res[self.lemma]["senses"] = {}
for s in self.senses:
res[self.lemma]["senses"].append(s.serializable())
res[self.lemma]["senses"][s.label]=s.serializable(False)
return res
def __str__(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment