Skip to content
Snippets Groups Projects
Commit af6061d5 authored by Enzo Simonnet's avatar Enzo Simonnet
Browse files

Structure json mise à jour

parent b02d9677
No related branches found
No related tags found
No related merge requests found
...@@ -17,6 +17,7 @@ class En_en_straktor(Wikstraktor): ...@@ -17,6 +17,7 @@ class En_en_straktor(Wikstraktor):
# TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux # TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux
l = proContent.get_lists()[0] l = proContent.get_lists()[0]
i = 0 i = 0
cpt = 0
pronunciations = [] pronunciations = []
while i < len(l.fullitems): while i < len(l.fullitems):
p = Pronunciation() p = Pronunciation()
...@@ -32,6 +33,8 @@ class En_en_straktor(Wikstraktor): ...@@ -32,6 +33,8 @@ class En_en_straktor(Wikstraktor):
p.add_sound(self.get_file_url(t.arguments[1].value), a) p.add_sound(self.get_file_url(t.arguments[1].value), a)
if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] : if j==len(templates)-1 or templates[j+1].normal_name()== self.constants['t_acc'] :
if p.ipa != None or p.accent != None: if p.ipa != None or p.accent != None:
cpt += 1
p.id= f"p_{cpt}"
pronunciations.append(p) pronunciations.append(p)
p = Pronunciation() p = Pronunciation()
i += 1 i += 1
...@@ -58,18 +61,21 @@ class En_en_straktor(Wikstraktor): ...@@ -58,18 +61,21 @@ class En_en_straktor(Wikstraktor):
if l[i].pattern == '\\# ': if l[i].pattern == '\\# ':
nombreDef += 1 nombreDef += 1
newSense = Sense(f"{baseId}{nombreDef}") newSense = Sense(f"{baseId}{nombreDef}")
newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip(), f"d_{nombreDef}")
newSense.add_translation(f"t_{nombreDef}_0")
elif l[i].pattern == '\\#:': elif l[i].pattern == '\\#:':
cptEx=0
for j in l[i].items: for j in l[i].items:
k = 0 k = 0
isEx = 0 isEx = 0
while k < len(self.wtp.parse(j).templates) and isEx == 0 : while k < len(self.wtp.parse(j).templates) and isEx == 0 :
if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']): if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']):
newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) cptEx +=1
newSense.add_example(self.wtp.parse(j).templates[0].arguments[-1].value, f"e_{nombreDef}_{cptEx}")
isEx = 1 isEx = 1
k += 1 k += 1
if isEx == 0: if isEx == 0:
newSense.add_example(self.wtp.parse(j).plain_text().strip()) newSense.add_example(self.wtp.parse(j).plain_text().strip(), f"e_{nombreDef}_{cptEx}")
if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ': if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
senses.append(newSense) senses.append(newSense)
cnt = 0 cnt = 0
...@@ -79,18 +85,21 @@ class En_en_straktor(Wikstraktor): ...@@ -79,18 +85,21 @@ class En_en_straktor(Wikstraktor):
if l[i].pattern == '\\## ': if l[i].pattern == '\\## ':
nombreSousDef += 1 nombreSousDef += 1
newSense2 = Sense(f"{baseId}{nombreDef}_{nombreSousDef}") newSense2 = Sense(f"{baseId}{nombreDef}_{nombreSousDef}")
newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) newSense2.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip(), f"sd_{nombreDef}_{nombreSousDef}")
newSense2.add_translation(f"t_{nombreDef}_{nombreSousDef}_0")
elif l[i].pattern == '\\##:': elif l[i].pattern == '\\##:':
cptex2 = 0
for j in l[i].items: for j in l[i].items:
k = 0 k = 0
isEx = 0 isEx = 0
while k < len(self.wtp.parse(j).templates) and isEx == 0 : while k < len(self.wtp.parse(j).templates) and isEx == 0 :
if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']): if (self.wtp.parse(j).templates[k].normal_name() in self.constants['t_ex']):
newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value) cptex2 +=1
newSense2.add_example(self.wtp.parse(j).templates[0].arguments[-1].value, f"se_{nombreDef}_{nombreSousDef}_{cptex2}")
isEx = 1 isEx = 1
k += 1 k += 1
if isEx == 0: if isEx == 0:
newSense2.add_example(self.wtp.parse(j).plain_text().strip()) newSense2.add_example(self.wtp.parse(j).plain_text().strip(), f"se_{nombreDef}_{nombreSousDef}_{cptex2}")
if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ': if i == len(l)-1 or l[i+1].pattern == '\\# ' or l[i+1].pattern == '\\## ':
newSense.add_subsense(newSense2) newSense.add_subsense(newSense2)
i += 1 i += 1
......
...@@ -53,7 +53,7 @@ class Fr_en_straktor(Wikstraktor): ...@@ -53,7 +53,7 @@ class Fr_en_straktor(Wikstraktor):
keys = list(self.constants['POS'].keys()) keys = list(self.constants['POS'].keys())
pos = keys[ik] pos = keys[ik]
ik += 1 ik += 1
# print(pos) print(pos)
return pos return pos
def process_senses(self, entry, pos, sensesContent): def process_senses(self, entry, pos, sensesContent):
...@@ -64,9 +64,11 @@ class Fr_en_straktor(Wikstraktor): ...@@ -64,9 +64,11 @@ class Fr_en_straktor(Wikstraktor):
nombreDef = 0 nombreDef = 0
while i < len(l): while i < len(l):
if l[i].pattern == '\\# ': if l[i].pattern == '\\# ':
nombreDef += 1 #A revoir ça, très douteux
newSense = Sense(f"{baseId}{nombreDef}") for nbDef in l[i].items :
newSense.add_def(self.wiki_language, self.wtp.parse(l[i].items[0]).plain_text().strip()) nombreDef += 1
newSense = Sense(f"{baseId}{nombreDef}")
newSense.add_def(self.wiki_language, self.wtp.parse(nbDef).plain_text().strip())
elif l[i].pattern == '\\#:': elif l[i].pattern == '\\#:':
for j in l[i].items: for j in l[i].items:
k = 0 k = 0
......
...@@ -5,7 +5,7 @@ if __name__ == "__main__": ...@@ -5,7 +5,7 @@ if __name__ == "__main__":
# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav")) # print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav"))
# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav")) # print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav"))
#e.fetch("water") #e.fetch("water")
f.fetch("blue") f.fetch("water")
# print(e.fetch("test"), "entries added") # print(e.fetch("test"), "entries added")
#print(e) #print(e)
file_path = 'test.json' file_path = 'test.json'
...@@ -22,3 +22,5 @@ if __name__ == "__main__": ...@@ -22,3 +22,5 @@ if __name__ == "__main__":
# print(p.get_file_url()) # print(p.get_file_url())
#print(e) #print(e)
#Entry("test", wtp.parse(page.text))) #Entry("test", wtp.parse(page.text)))
# PRENDS PAS LE FICHIER AUDIO POUR "LIVE" EN_EN
...@@ -19,7 +19,7 @@ class Sound: ...@@ -19,7 +19,7 @@ class Sound:
if self.accent == None: if self.accent == None:
res = {"url":self.url} res = {"url":self.url}
else: else:
res = {"accent":self.accent, "url":self.url} res = { "accent":self.accent, "url":self.url}
return res return res
class Pronunciation: class Pronunciation:
...@@ -27,6 +27,7 @@ class Pronunciation: ...@@ -27,6 +27,7 @@ class Pronunciation:
self.ipa = None self.ipa = None
self.sounds = [] self.sounds = []
self.accent = None self.accent = None
self.id = None
def set_transcription(self, tscpt): def set_transcription(self, tscpt):
self.ipa = tscpt self.ipa = tscpt
...@@ -42,9 +43,9 @@ class Pronunciation: ...@@ -42,9 +43,9 @@ class Pronunciation:
for s in self.sounds: for s in self.sounds:
snds.append(s.serializable()) snds.append(s.serializable())
if self.accent == None: if self.accent == None:
res = {"transcript":self.ipa, "sounds":snds} res = {"ID":self.id, "transcript":self.ipa, "sounds":snds}
else: else:
res = {"accent":self.accent, "transcript":self.ipa, "sounds":snds} res = {"ID":self.id,"accent":self.accent, "transcript":self.ipa, "sounds":snds}
return res return res
def __str__(self): def __str__(self):
...@@ -70,7 +71,8 @@ class Pronunciation: ...@@ -70,7 +71,8 @@ class Pronunciation:
####### #######
class Definition: class Definition:
def __init__(self, lang, text): def __init__(self, lang, text, id):
self.id = id
self.lang = lang self.lang = lang
self.text = text self.text = text
...@@ -78,23 +80,28 @@ class Definition: ...@@ -78,23 +80,28 @@ class Definition:
return self.lang == other.lang and self.text == other.text return self.lang == other.lang and self.text == other.text
def serializable(self): def serializable(self):
return {"lang":self.lang, "definition":self.text} return {"ID":self.id, "lang":self.lang, "definition":self.text}
class Translation(Definition): class Translation():
def __init__(self, id, lang=None, text=None):
self.id = id
self.lang = lang
self.text = text
def serializable(self): def serializable(self):
return {"lang":self.lang, "translation":self.text} return {"ID:" : self.id, "lang":self.lang, "translation":self.text}
class Example: class Example:
def __init__(self, transcript, source=None, url=None): def __init__(self, transcript, id, source=None, url=None):
self.text = transcript self.text = transcript
self.source = source self.source = source
self.url = url self.url = url
self.id = id
def __eq__(self, other): def __eq__(self, other):
return self.text==other.text and self.source==other.source and self.url==other.url return self.text==other.text and self.source==other.source and self.url==other.url
def serializable(self): def serializable(self):
res = {"example":self.text} res = {"ID":self.id, "example":self.text}
if self.source != None: if self.source != None:
res["source"] = self.source res["source"] = self.source
if self.url != None: if self.url != None:
...@@ -113,20 +120,20 @@ class Sense: ...@@ -113,20 +120,20 @@ class Sense:
def set_domain(self, d): def set_domain(self, d):
self.domain = d self.domain = d
def add_def(self, lang, definition): def add_def(self, lang, definition, id):
theDef = Definition(lang, definition) theDef = Definition(lang, definition, id)
if self.definition == None: if self.definition == None:
self.definition = theDef self.definition = theDef
elif self.definition != theDef: elif self.definition != theDef:
raise ValueError(f"Superposition de deux définitions:\n\t{self.definition}\nremplacée par\n\t{theDef}") raise ValueError(f"Superposition de deux définitions:\n\t{self.definition}\nremplacée par\n\t{theDef}")
def add_example(self, transcript, src=None, url=None): def add_example(self, transcript, id, src=None, url=None):
theEx = Example(transcript, src, url) theEx = Example(transcript, id, src, url)
if theEx not in self.examples: if theEx not in self.examples:
self.examples.append(theEx) self.examples.append(theEx)
def add_translation(self, lang, translation): def add_translation(self, id, lang=None, translation=None):
theTranslation = Translation(lang, translation) theTranslation = Translation(id, lang, translation)
if theTranslation not in self.translations: if theTranslation not in self.translations:
self.translations.append(theTranslation) self.translations.append(theTranslation)
...@@ -165,10 +172,10 @@ class Sense: ...@@ -165,10 +172,10 @@ class Sense:
res[self.label]["Examples"] = [] res[self.label]["Examples"] = []
for e in self.examples: for e in self.examples:
res[self.label]["Examples"].append(e.serializable()) res[self.label]["Examples"].append(e.serializable())
if len(self.translations) > 0: #if len(self.translations) > 0:
res[self.label]["Translations"] = [] res[self.label]["Translations"] = []
for t in self.translations: for t in self.translations:
res[self.label]["Translations"].append(t.serializable()) res[self.label]["Translations"].append(t.serializable())
else: else:
if self.domain != None: if self.domain != None:
res["Domain"] = self.domain res["Domain"] = self.domain
...@@ -181,10 +188,10 @@ class Sense: ...@@ -181,10 +188,10 @@ class Sense:
res["Examples"] = [] res["Examples"] = []
for e in self.examples: for e in self.examples:
res["Examples"].append(e.serializable()) res["Examples"].append(e.serializable())
if len(self.translations) > 0: #if len(self.translations) > 0:
res["Translations"] = [] res["Translations"] = []
for t in self.translations: for t in self.translations:
res["Translations"].append(t.serializable()) res["Translations"].append(t.serializable())
return res return res
...@@ -282,8 +289,8 @@ class ParserContext: ...@@ -282,8 +289,8 @@ class ParserContext:
if testNewEntry: if testNewEntry:
self.create_entry() self.create_entry()
#Pb là dedans
def create_entry(self): def create_entry(self):
#Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS
res = Entry(self.lemma) res = Entry(self.lemma)
for l in self.context: for l in self.context:
#print(l.keys()) #print(l.keys())
...@@ -387,7 +394,8 @@ class Wikstraktor: ...@@ -387,7 +394,8 @@ class Wikstraktor:
pos = self.process_POS(stitle) pos = self.process_POS(stitle)
if pos != None : if pos != None :
self.parserContext.set_top_entry_info('POS', pos, False) self.parserContext.set_top_entry_info('POS', pos, False)
self.parserContext.set_top_entry_info('senses', self.process_senses(entry, pos+str(len(self.parserContext.entries)), self.wtp.parse(s.contents))) self.parserContext.set_top_entry_info('senses', self.process_senses(entry, pos+str(len(self.parserContext.entries)), self.wtp.parse(s.contents))) #cette ligne le prob
# self.parserContext.entries augmente pas même lorsque qu'on entre dans le if au dessus.
res = len(self.parserContext.entries) res = len(self.parserContext.entries)
if res > 0: if res > 0:
for e in self.parserContext.entries: for e in self.parserContext.entries:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment