Skip to content
Snippets Groups Projects
Commit 33cfdfc9 authored by Enzo Simonnet's avatar Enzo Simonnet
Browse files

Replace wikstraktor.py

parent e05cda9c
No related branches found
No related tags found
No related merge requests found
...@@ -258,6 +258,7 @@ class ParserContext: ...@@ -258,6 +258,7 @@ class ParserContext:
#Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS #Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS
res = Entry(self.lemma) res = Entry(self.lemma)
for l in self.context: for l in self.context:
print(l.keys())
if "pro" in l.keys(): if "pro" in l.keys():
res.set_pronunciations(l['pro']) res.set_pronunciations(l['pro'])
if "ety" in l.keys(): if "ety" in l.keys():
...@@ -344,17 +345,19 @@ class Wikstraktor: ...@@ -344,17 +345,19 @@ class Wikstraktor:
while self.parserContext.get_level() > s.level: while self.parserContext.get_level() > s.level:
self.parserContext.pop() self.parserContext.pop()
self.parserContext.set_top_wiki(s) self.parserContext.set_top_wiki(s)
if self.wtp.parse(s.title).templates == []: stitle = self.wtp.parse(s.title).templates
if stitle == []:
stitle = s.title stitle = s.title
else: else:
stitle = self.wtp.parse(s.title).templates[0].arguments[0].value stitle = stitle[0].arguments[0].value
if stitle == self.constants['pro']: if self.isPro(stitle):
self.parserContext.set_top_entry_info('pro', self.process_pronunciation(self.wtp.parse(s.contents))) self.parserContext.set_top_entry_info('pro', self.process_pronunciation(self.wtp.parse(s.contents)))
elif self.constants['ety'] in stitle: elif self.isEty(stitle):
self.parserContext.set_top_entry_info('ety', self.process_etymology(self.wtp.parse(s.contents))) self.parserContext.set_top_entry_info('ety', self.process_etymology(self.wtp.parse(s.contents)))
elif stitle in self.constants['POS'].keys(): # elif stitle in self.constants['POS'].keys():
if s.title in self.constants['POS'].keys(): else:
pos = self.constants['POS'][stitle] pos = self.process_POS(stitle)
if pos != None :
self.parserContext.set_top_entry_info('POS', pos, False) self.parserContext.set_top_entry_info('POS', pos, False)
self.parserContext.set_top_entry_info('senses', self.process_senses(entry, pos+str(len(self.parserContext.entries)), self.wtp.parse(s.contents))) self.parserContext.set_top_entry_info('senses', self.process_senses(entry, pos+str(len(self.parserContext.entries)), self.wtp.parse(s.contents)))
res = len(self.parserContext.entries) res = len(self.parserContext.entries)
...@@ -363,6 +366,24 @@ class Wikstraktor: ...@@ -363,6 +366,24 @@ class Wikstraktor:
self.entries.append(e) self.entries.append(e)
return res return res
def isPro(self, title):
if type(self.constants['pro']) == str:
res = title == self.constants['pro']
else:
res = title in self.constants['pro']
print(title, res)
return res
def isEty(self, title):
if type(self.constants['ety']) == str:
res = title == self.constants['ety']
else:
res = title in self.constants['ety']
return res
def process_POS(self, parsedwikitext):
pass#in subclass
def process_pronunciation(self, parsedwikitext): def process_pronunciation(self, parsedwikitext):
pass#in subclass pass#in subclass
...@@ -385,7 +406,7 @@ if __name__ == "__main__": ...@@ -385,7 +406,7 @@ if __name__ == "__main__":
# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav")) # print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav"))
# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav")) # print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav"))
#e.fetch("water") #e.fetch("water")
f.fetch("water") f.fetch("blue")
# print(e.fetch("test"), "entries added") # print(e.fetch("test"), "entries added")
#print(e) #print(e)
file_path = 'test.json' file_path = 'test.json'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment