Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
wikstraktor
Manage
Activity
Members
Labels
Plan
Issues
3
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
wikstraktor
Commits
90eac4c6
Commit
90eac4c6
authored
2 years ago
by
Mathieu Loiseau
Browse files
Options
Downloads
Patches
Plain Diff
process 1 and lbl templates in English
parent
3d184fb5
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
parsers/en_constants.py
+32
-0
32 additions, 0 deletions
parsers/en_constants.py
parsers/en_en.py
+47
-1
47 additions, 1 deletion
parsers/en_en.py
wikstraktor.py
+24
-4
24 additions, 4 deletions
wikstraktor.py
wikstraktor.sqlite
+0
-0
0 additions, 0 deletions
wikstraktor.sqlite
with
103 additions
and
5 deletions
parsers/en_constants.py
+
32
−
0
View file @
90eac4c6
...
@@ -8,6 +8,38 @@ string_values = {
...
@@ -8,6 +8,38 @@ string_values = {
"
t_acc
"
:
"
a
"
,
#template for accents
"
t_acc
"
:
"
a
"
,
#template for accents
"
t_deflabel
"
:
"
lb
"
,
"
t_deflabel
"
:
"
lb
"
,
"
t_ex
"
:[
"
ux
"
,
"
usex
"
],
"
t_ex
"
:[
"
ux
"
,
"
usex
"
],
"
t_lbl
"
:[
"
lb
"
,
"
lbl
"
,
"
label
"
],
#template for labels
"
regions
"
:{
"
UK
"
:
"
United Kingdom
"
,
"
United Kingdom
"
:
"
United Kingdom
"
,
"
British
"
:
"
Great Britain
"
,
"
GB
"
:
"
Great Britain
"
,
"
Great Britain
"
:
"
Great Britain
"
,
"
Scot
"
:
"
Scotland
"
,
"
Scottish
"
:
"
Scotland
"
,
"
Scotland
"
:
"
Scotland
"
,
"
Irl
"
:
"
Ireland
"
,
"
Irish
"
:
"
Ireland
"
,
"
Ireland
"
:
"
Ireland
"
,
"
Ulst
"
:
"
Northern Ireland
"
,
"
Ulster
"
:
"
Northern Ireland
"
,
"
Northern Ireland
"
:
"
Northern Ireland
"
,
"
Wls
"
:
"
Wales
"
,
"
Welsh
"
:
"
Wales
"
,
"
Wales
"
:
"
Wales
"
,
"
English
"
:
"
England
"
,
"
Eng
"
:
"
England
"
,
"
En
"
:
"
England
"
,
"
England
"
:
"
England
"
,
"
US
"
:
"
United States of America
"
,
"
USA
"
:
"
United States of America
"
,
"
United States
"
:
"
United States of America
"
,
"
United States of America
"
:
"
United States of America
"
,
"
NZ
"
:
"
New Zealand
"
,
"
New Zealand
"
:
"
New Zealand
"
,
"
Au
"
:
"
Australia
"
,
"
AU
"
:
"
Australia
"
,
"
Australia
"
:
"
Australia
"
},
"
sense_pattern
"
:[
## structure(s) for sense patterns add_subdef is to be added to def patterns
"
sense_pattern
"
:[
## structure(s) for sense patterns add_subdef is to be added to def patterns
{
"
def
"
:
"
\\
#
"
,
"
ex
"
:
"
\\
#[:;]
"
,
"
add_subdef
"
:
"
\\
#
"
}
{
"
def
"
:
"
\\
#
"
,
"
ex
"
:
"
\\
#[:;]
"
,
"
add_subdef
"
:
"
\\
#
"
}
],
],
...
...
This diff is collapsed.
Click to expand it.
parsers/en_en.py
+
47
−
1
View file @
90eac4c6
#!/usr/bin/env python3
#!/usr/bin/env python3
from
wikstraktor
import
Wikstraktor
,
Pronunciation
,
Sense
,
SubSense
from
wikstraktor
import
Wikstraktor
,
Pronunciation
,
Sense
,
SubSense
,
Definition
from
parsers.en_constants
import
string_values
from
parsers.en_constants
import
string_values
...
@@ -42,6 +42,52 @@ class En_en_straktor(Wikstraktor):
...
@@ -42,6 +42,52 @@ class En_en_straktor(Wikstraktor):
debugEty
+=
1
debugEty
+=
1
return
"
Etymology
"
+
str
(
debugEty
)
return
"
Etymology
"
+
str
(
debugEty
)
def
parse_template_1
(
self
,
templates
):
the_def
=
None
for
t
in
templates
:
if
t
.
normal_name
()
==
"
1
"
:
the_def
=
Definition
(
self
.
entry_language
,
f
"
Other wording of “
{
t
.
arguments
[
0
].
value
}
”
"
)
break
return
the_def
def
parse_labels
(
self
,
a_def
,
templates
):
key
=
"
labels
"
desc
=
"
language
"
num
=
0
for
t
in
templates
:
if
t
.
normal_name
()
in
self
.
constants
[
'
t_lbl
'
]:
while
a_def
.
metadata_exists
(
f
"
{
key
}
_
{
num
}
_
{
desc
}
"
):
num
+=
1
a_def
.
add_metadata
(
f
"
{
key
}
_
{
num
}
_
{
desc
}
"
,
t
.
arguments
[
0
].
value
)
complete_previous
=
False
for
a
in
t
.
arguments
[
1
:]:
if
a
.
value
==
"
_
"
:
complete_previous
=
True
elif
a
.
value
==
"
and
"
:
pass
elif
a
.
value
in
self
.
constants
[
'
regions
'
].
keys
():
a_def
.
add_to_metadata
(
"
region
"
,
self
.
constants
[
'
regions
'
][
a
.
value
])
elif
complete_previous
:
a_def
.
extend_metadata
(
f
"
{
key
}
_
{
num
}
"
,
a
.
value
,
"
"
)
complete_previous
=
False
else
:
a_def
.
add_to_metadata
(
f
"
{
key
}
_
{
num
}
"
,
a
.
value
)
def
parse_definition
(
self
,
def_string
):
the_def
=
None
parsed_def
=
self
.
wtp
.
parse
(
def_string
)
def_text
=
parsed_def
.
plain_text
().
strip
()
templates
=
parsed_def
.
templates
if
def_text
!=
""
:
the_def
=
Definition
(
self
.
entry_language
,
def_text
)
else
:
the_def
=
self
.
parse_template_1
(
templates
)
if
the_def
!=
None
:
self
.
parse_labels
(
the_def
,
templates
)
else
:
raise
ValueError
(
f
"
En_en_straktor.parse_definition with empty definition
\n\t
{
def_string
}
"
)
return
the_def
def
process_POS
(
self
,
parsedwikitext
):
def
process_POS
(
self
,
parsedwikitext
):
pos
=
None
pos
=
None
if
parsedwikitext
in
self
.
constants
[
'
POS
'
].
keys
():
if
parsedwikitext
in
self
.
constants
[
'
POS
'
].
keys
():
...
...
This diff is collapsed.
Click to expand it.
wikstraktor.py
+
24
−
4
View file @
90eac4c6
...
@@ -122,9 +122,26 @@ class Definition(SubInfo):
...
@@ -122,9 +122,26 @@ class Definition(SubInfo):
raise
ValueError
(
f
"
Definition.__init__: “
{
text
}
” empty definition.
"
)
raise
ValueError
(
f
"
Definition.__init__: “
{
text
}
” empty definition.
"
)
def
add_metadata
(
self
,
key
,
value
):
def
add_metadata
(
self
,
key
,
value
):
if
key
in
self
.
metadata
.
keys
():
if
self
.
metadata_exists
(
key
):
self
.
log
.
add_log
(
"
Definition.add_metadata
"
,
f
"
for
{
self
.
text
}
replaced
{
key
}
:“
{
self
.
metadata
[
'
key
'
]
}
” by
{
key
}
:“
{
value
}
”
"
)
print
(
"
Definition.add_metadata
"
,
f
"
for
{
self
.
text
}
replaced
{
key
}
:“
{
self
.
metadata
[
key
]
}
” by
{
key
}
:“
{
value
}
”
"
)
self
.
metadata
[
"
key
"
]
=
value
self
.
metadata
[
key
]
=
value
def
add_to_metadata
(
self
,
key
,
value
):
if
not
self
.
metadata_exists
(
key
):
self
.
metadata
[
key
]
=
[]
self
.
metadata
[
key
].
append
(
value
)
#to add at the end of the metadata, if empty add_metadata not add_to_metadata
def
extend_metadata
(
self
,
key
,
value
,
separator
=
""
):
if
not
self
.
metadata_exists
(
key
):
self
.
add_metadata
(
key
,
value
)
elif
type
(
self
.
metadata
[
key
])
==
list
:
self
.
metadata
[
key
][
-
1
]
+=
separator
+
value
else
:
self
.
metadata
[
key
]
+=
separator
+
value
def
metadata_exists
(
self
,
key
):
return
key
in
self
.
metadata
.
keys
()
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
isinstance
(
other
,
self
.
__class__
)
and
self
.
lang
==
other
.
lang
and
self
.
text
==
other
.
text
return
isinstance
(
other
,
self
.
__class__
)
and
self
.
lang
==
other
.
lang
and
self
.
text
==
other
.
text
...
@@ -204,7 +221,10 @@ class Sense(SubInfo):
...
@@ -204,7 +221,10 @@ class Sense(SubInfo):
self
.
domain
=
d
self
.
domain
=
d
def
add_def
(
self
,
lang
,
definition
):
def
add_def
(
self
,
lang
,
definition
):
theDef
=
Definition
(
lang
,
definition
)
if
isinstance
(
definition
,
Definition
):
theDef
=
definition
else
:
theDef
=
Definition
(
lang
,
definition
)
if
theDef
!=
None
and
theDef
not
in
self
.
definitions
:
if
theDef
!=
None
and
theDef
not
in
self
.
definitions
:
theDef
.
set_id
(
self
.
set_id
())
theDef
.
set_id
(
self
.
set_id
())
self
.
definitions
.
append
(
theDef
)
self
.
definitions
.
append
(
theDef
)
...
...
This diff is collapsed.
Click to expand it.
wikstraktor.sqlite
+
0
−
0
View file @
90eac4c6
No preview for this file type
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment