Skip to content
Snippets Groups Projects
Commit 90eac4c6 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

process 1 and lbl templates in English

parent 3d184fb5
No related branches found
No related tags found
No related merge requests found
...@@ -8,6 +8,38 @@ string_values = { ...@@ -8,6 +8,38 @@ string_values = {
"t_acc":"a", #template for accents "t_acc":"a", #template for accents
"t_deflabel":"lb", "t_deflabel":"lb",
"t_ex":["ux", "usex"], "t_ex":["ux", "usex"],
"t_lbl":["lb","lbl", "label"], #template for labels
"regions":{
"UK":"United Kingdom",
"United Kingdom":"United Kingdom",
"British":"Great Britain",
"GB":"Great Britain",
"Great Britain":"Great Britain",
"Scot":"Scotland",
"Scottish":"Scotland",
"Scotland":"Scotland",
"Irl":"Ireland",
"Irish":"Ireland",
"Ireland":"Ireland",
"Ulst":"Northern Ireland",
"Ulster":"Northern Ireland",
"Northern Ireland":"Northern Ireland",
"Wls":"Wales",
"Welsh":"Wales",
"Wales":"Wales",
"English":"England",
"Eng":"England",
"En":"England",
"England":"England",
"US":"United States of America",
"USA":"United States of America",
"United States":"United States of America",
"United States of America":"United States of America",
"NZ":"New Zealand",
"New Zealand":"New Zealand",
"Au":"Australia",
"AU":"Australia",
"Australia":"Australia"},
"sense_pattern":[ ## structure(s) for sense patterns add_subdef is to be added to def patterns "sense_pattern":[ ## structure(s) for sense patterns add_subdef is to be added to def patterns
{"def":"\\#", "ex":"\\#[:;]", "add_subdef":"\\#"} {"def":"\\#", "ex":"\\#[:;]", "add_subdef":"\\#"}
], ],
......
#!/usr/bin/env python3 #!/usr/bin/env python3
from wikstraktor import Wikstraktor, Pronunciation, Sense, SubSense from wikstraktor import Wikstraktor, Pronunciation, Sense, SubSense, Definition
from parsers.en_constants import string_values from parsers.en_constants import string_values
...@@ -42,6 +42,52 @@ class En_en_straktor(Wikstraktor): ...@@ -42,6 +42,52 @@ class En_en_straktor(Wikstraktor):
debugEty += 1 debugEty += 1
return "Etymology" + str(debugEty) return "Etymology" + str(debugEty)
def parse_template_1(self, templates):
the_def = None
for t in templates:
if t.normal_name() == "1":
the_def = Definition(self.entry_language, f"Other wording of “{t.arguments[0].value}")
break
return the_def
def parse_labels(self, a_def, templates):
key = "labels"
desc = "language"
num = 0
for t in templates:
if t.normal_name() in self.constants['t_lbl']:
while a_def.metadata_exists(f"{key}_{num}_{desc}"):
num+=1
a_def.add_metadata(f"{key}_{num}_{desc}", t.arguments[0].value)
complete_previous = False
for a in t.arguments[1:]:
if a.value == "_":
complete_previous = True
elif a.value == "and":
pass
elif a.value in self.constants['regions'].keys():
a_def.add_to_metadata("region", self.constants['regions'][a.value])
elif complete_previous:
a_def.extend_metadata(f"{key}_{num}", a.value, " ")
complete_previous = False
else:
a_def.add_to_metadata(f"{key}_{num}", a.value)
def parse_definition(self, def_string):
the_def = None
parsed_def = self.wtp.parse(def_string)
def_text = parsed_def.plain_text().strip()
templates = parsed_def.templates
if def_text != "":
the_def = Definition(self.entry_language, def_text)
else:
the_def = self.parse_template_1(templates)
if the_def != None:
self.parse_labels(the_def, templates)
else:
raise ValueError(f"En_en_straktor.parse_definition with empty definition\n\t{def_string}")
return the_def
def process_POS(self,parsedwikitext): def process_POS(self,parsedwikitext):
pos = None pos = None
if parsedwikitext in self.constants['POS'].keys(): if parsedwikitext in self.constants['POS'].keys():
......
...@@ -122,9 +122,26 @@ class Definition(SubInfo): ...@@ -122,9 +122,26 @@ class Definition(SubInfo):
raise ValueError(f"Definition.__init__: “{text}” empty definition.") raise ValueError(f"Definition.__init__: “{text}” empty definition.")
def add_metadata(self, key, value): def add_metadata(self, key, value):
if key in self.metadata.keys(): if self.metadata_exists(key):
self.log.add_log("Definition.add_metadata", f"for {self.text} replaced {key}:“{self.metadata['key']}” by {key}:“{value}") print("Definition.add_metadata", f"for {self.text} replaced {key}:“{self.metadata[key]}” by {key}:“{value}")
self.metadata["key"]=value self.metadata[key]=value
def add_to_metadata(self, key, value):
if not self.metadata_exists(key):
self.metadata[key] = []
self.metadata[key].append(value)
#to add at the end of the metadata, if empty add_metadata not add_to_metadata
def extend_metadata(self, key, value, separator=""):
if not self.metadata_exists(key):
self.add_metadata(key, value)
elif type(self.metadata[key]) == list:
self.metadata[key][-1] += separator+value
else:
self.metadata[key] += separator+value
def metadata_exists(self, key):
return key in self.metadata.keys()
def __eq__(self, other): def __eq__(self, other):
return isinstance(other, self.__class__) and self.lang == other.lang and self.text == other.text return isinstance(other, self.__class__) and self.lang == other.lang and self.text == other.text
...@@ -204,7 +221,10 @@ class Sense(SubInfo): ...@@ -204,7 +221,10 @@ class Sense(SubInfo):
self.domain = d self.domain = d
def add_def(self, lang, definition): def add_def(self, lang, definition):
theDef = Definition(lang, definition) if isinstance(definition, Definition):
theDef = definition
else:
theDef = Definition(lang, definition)
if theDef != None and theDef not in self.definitions: if theDef != None and theDef not in self.definitions:
theDef.set_id(self.set_id()) theDef.set_id(self.set_id())
self.definitions.append(theDef) self.definitions.append(theDef)
......
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment