Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
wikstraktor
Manage
Activity
Members
Labels
Plan
Issues
3
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
wikstraktor
Commits
1c005715
Commit
1c005715
authored
2 years ago
by
Mathieu Loiseau
Browse files
Options
Downloads
Patches
Plain Diff
region + labels at sense level
parent
90eac4c6
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
parsers/en_constants.py
+9
-1
9 additions, 1 deletion
parsers/en_constants.py
parsers/en_en.py
+19
-15
19 additions, 15 deletions
parsers/en_en.py
wikstraktor.py
+61
-34
61 additions, 34 deletions
wikstraktor.py
wikstraktor.sqlite
+0
-0
0 additions, 0 deletions
wikstraktor.sqlite
with
89 additions
and
50 deletions
parsers/en_constants.py
+
9
−
1
View file @
1c005715
...
...
@@ -31,6 +31,10 @@ string_values = {
"
Eng
"
:
"
England
"
,
"
En
"
:
"
England
"
,
"
England
"
:
"
England
"
,
"
Canada
"
:
"
Canada
"
,
"
Canadian
"
:
"
Canada
"
,
'
North American
'
:
'
North America
'
,
'
North America
'
:
"
North America
"
,
"
US
"
:
"
United States of America
"
,
"
USA
"
:
"
United States of America
"
,
"
United States
"
:
"
United States of America
"
,
...
...
@@ -39,7 +43,11 @@ string_values = {
"
New Zealand
"
:
"
New Zealand
"
,
"
Au
"
:
"
Australia
"
,
"
AU
"
:
"
Australia
"
,
"
Australia
"
:
"
Australia
"
},
"
Australia
"
:
"
Australia
"
,
"
India
"
:
"
India
"
,
"
Indian
"
:
"
India
"
,
"
Nigeria
"
:
"
Nigeria
"
,
"
Nigerian
"
:
"
Nigeria
"
},
"
sense_pattern
"
:[
## structure(s) for sense patterns add_subdef is to be added to def patterns
{
"
def
"
:
"
\\
#
"
,
"
ex
"
:
"
\\
#[:;]
"
,
"
add_subdef
"
:
"
\\
#
"
}
],
...
...
This diff is collapsed.
Click to expand it.
parsers/en_en.py
+
19
−
15
View file @
1c005715
...
...
@@ -50,15 +50,15 @@ class En_en_straktor(Wikstraktor):
break
return
the_def
def
parse_labels
(
self
,
a_
def
,
templates
):
def
parse_labels
(
self
,
a_
sense
,
templates
):
key
=
"
labels
"
desc
=
"
language
"
num
=
0
for
t
in
templates
:
if
t
.
normal_name
()
in
self
.
constants
[
'
t_lbl
'
]:
while
a_
def
.
metadata_exists
(
f
"
{
key
}
_
{
num
}
_
{
desc
}
"
):
while
a_
sense
.
metadata_exists
(
f
"
{
key
}
_
{
num
}
_
{
desc
}
"
):
num
+=
1
a_
def
.
add_metadata
(
f
"
{
key
}
_
{
num
}
_
{
desc
}
"
,
t
.
arguments
[
0
].
value
)
a_
sense
.
add_metadata
(
f
"
{
key
}
_
{
num
}
_
{
desc
}
"
,
t
.
arguments
[
0
].
value
)
complete_previous
=
False
for
a
in
t
.
arguments
[
1
:]:
if
a
.
value
==
"
_
"
:
...
...
@@ -66,28 +66,32 @@ class En_en_straktor(Wikstraktor):
elif
a
.
value
==
"
and
"
:
pass
elif
a
.
value
in
self
.
constants
[
'
regions
'
].
keys
():
a_
def
.
add_to_metadata
(
"
region
"
,
self
.
constants
[
'
regions
'
][
a
.
value
])
a_
sense
.
add_
region
(
self
.
constants
[
'
regions
'
][
a
.
value
])
elif
complete_previous
:
a_
def
.
extend_metadata
(
f
"
{
key
}
_
{
num
}
"
,
a
.
value
,
"
"
)
a_
sense
.
extend_metadata
(
f
"
{
key
}
_
{
num
}
"
,
a
.
value
,
"
"
)
complete_previous
=
False
else
:
a_
def
.
add_to_metadata
(
f
"
{
key
}
_
{
num
}
"
,
a
.
value
)
a_
sense
.
add_to_metadata
(
f
"
{
key
}
_
{
num
}
"
,
a
.
value
)
def
parse_definition
(
self
,
def_string
):
the_def
=
None
parsed_def
=
self
.
wtp
.
parse
(
def_string
)
def
parse_definition
(
self
,
parsed_def
):
if
not
isinstance
(
parsed_def
,
self
.
wtp
.
WikiText
):
parsed_def
=
self
.
wtp
.
parse
(
parsed_def
)
def_text
=
parsed_def
.
plain_text
().
strip
()
templates
=
parsed_def
.
templates
if
def_text
!=
""
:
the_def
=
self
.
parse_template_1
(
templates
)
if
the_def
==
None
:
the_def
=
Definition
(
self
.
entry_language
,
def_text
)
else
:
the_def
=
self
.
parse_template_1
(
templates
)
if
the_def
!=
None
:
self
.
parse_labels
(
the_def
,
templates
)
else
:
if
the_def
==
None
:
raise
ValueError
(
f
"
En_en_straktor.parse_definition with empty definition
\n\t
{
def_string
}
"
)
return
the_def
def
get_sense_metadata
(
self
,
sense
,
parsed_def
):
if
not
isinstance
(
parsed_def
,
self
.
wtp
.
WikiText
):
parsed_def
=
self
.
wtp
.
parse
(
parsed_def
)
templates
=
parsed_def
.
templates
self
.
parse_labels
(
sense
,
templates
)
def
process_POS
(
self
,
parsedwikitext
):
pos
=
None
if
parsedwikitext
in
self
.
constants
[
'
POS
'
].
keys
():
...
...
This diff is collapsed.
Click to expand it.
wikstraktor.py
+
61
−
34
View file @
1c005715
...
...
@@ -117,32 +117,9 @@ class Definition(SubInfo):
if
text
!=
""
:
self
.
lang
=
lang
self
.
text
=
text
self
.
metadata
=
{}
else
:
raise
ValueError
(
f
"
Definition.__init__: “
{
text
}
” empty definition.
"
)
def
add_metadata
(
self
,
key
,
value
):
if
self
.
metadata_exists
(
key
):
print
(
"
Definition.add_metadata
"
,
f
"
for
{
self
.
text
}
replaced
{
key
}
:“
{
self
.
metadata
[
key
]
}
” by
{
key
}
:“
{
value
}
”
"
)
self
.
metadata
[
key
]
=
value
def
add_to_metadata
(
self
,
key
,
value
):
if
not
self
.
metadata_exists
(
key
):
self
.
metadata
[
key
]
=
[]
self
.
metadata
[
key
].
append
(
value
)
#to add at the end of the metadata, if empty add_metadata not add_to_metadata
def
extend_metadata
(
self
,
key
,
value
,
separator
=
""
):
if
not
self
.
metadata_exists
(
key
):
self
.
add_metadata
(
key
,
value
)
elif
type
(
self
.
metadata
[
key
])
==
list
:
self
.
metadata
[
key
][
-
1
]
+=
separator
+
value
else
:
self
.
metadata
[
key
]
+=
separator
+
value
def
metadata_exists
(
self
,
key
):
return
key
in
self
.
metadata
.
keys
()
def
__eq__
(
self
,
other
):
return
isinstance
(
other
,
self
.
__class__
)
and
self
.
lang
==
other
.
lang
and
self
.
text
==
other
.
text
...
...
@@ -150,8 +127,6 @@ class Definition(SubInfo):
res
=
super
().
serializable
(
prefix
)
res
[
"
lang
"
]
=
self
.
lang
res
[
self
.
__class__
.
key
]
=
self
.
text
if
len
(
self
.
metadata
.
keys
())
>
0
:
res
[
"
metadata
"
]
=
self
.
metadata
return
res
class
Translation
(
Definition
):
...
...
@@ -202,12 +177,39 @@ class Sense(SubInfo):
self
.
examples
=
[]
#liste des exemples (un texte obligatoire, source et url sont optionnels)
self
.
translations
=
[]
#liste des traductions dans d'autres langues
self
.
domain
=
None
#domaine d'usage du mot dans ce sens
self
.
metadata
=
{}
self
.
regions
=
set
()
if
definition
!=
None
:
try
:
self
.
add_def
(
wiki_lang
,
definition
)
except
ValueError
as
err
:
raise
ValueError
(
f
"
Sense.__init__() with empty definition
\n
{
err
}
"
)
def
add_metadata
(
self
,
key
,
value
):
if
self
.
metadata_exists
(
key
):
print
(
"
Definition.add_metadata
"
,
f
"
for
{
self
.
text
}
replaced
{
key
}
:“
{
self
.
metadata
[
key
]
}
” by
{
key
}
:“
{
value
}
”
"
)
self
.
metadata
[
key
]
=
value
def
add_to_metadata
(
self
,
key
,
value
):
if
not
self
.
metadata_exists
(
key
):
self
.
metadata
[
key
]
=
[]
self
.
metadata
[
key
].
append
(
value
)
def
add_region
(
self
,
region
):
self
.
regions
.
add
(
region
)
#to add at the end of the metadata, if empty add_metadata not add_to_metadata
def
extend_metadata
(
self
,
key
,
value
,
separator
=
""
):
if
not
self
.
metadata_exists
(
key
):
self
.
add_metadata
(
key
,
value
)
elif
type
(
self
.
metadata
[
key
])
==
list
:
self
.
metadata
[
key
][
-
1
]
+=
separator
+
value
else
:
self
.
metadata
[
key
]
+=
separator
+
value
def
metadata_exists
(
self
,
key
):
return
key
in
self
.
metadata
.
keys
()
def
set_id
(
self
,
prefix
=
None
):
if
prefix
!=
None
and
self
.
label
==
None
:
self
.
label
=
f
"
{
prefix
}
_
{
self
.
__class__
.
next_id
}
"
#l'identifiant du sens
...
...
@@ -251,7 +253,7 @@ class Sense(SubInfo):
self
.
subsenses
.
append
(
subsense
)
def
__eq__
(
self
,
other
):
res
=
isinstance
(
other
,
self
.
__class__
)
and
self
.
label
==
other
.
label
and
len
(
self
.
definitions
)
==
len
(
other
.
definitions
)
and
len
(
self
.
examples
)
==
len
(
other
.
examples
)
and
len
(
self
.
translations
)
==
len
(
other
.
translations
)
and
self
.
domain
==
other
.
domain
res
=
isinstance
(
other
,
self
.
__class__
)
and
self
.
label
==
other
.
label
and
len
(
self
.
definitions
)
==
len
(
other
.
definitions
)
and
len
(
self
.
examples
)
==
len
(
other
.
examples
)
and
len
(
self
.
translations
)
==
len
(
other
.
translations
)
and
self
.
domain
==
other
.
domain
and
len
(
other
.
metadata
)
==
len
(
self
.
metadata
)
and
other
.
regions
==
self
.
regions
i
=
0
while
res
and
i
<
len
(
self
.
examples
):
res
=
self
.
examples
[
i
]
in
other
.
examples
...
...
@@ -268,20 +270,30 @@ class Sense(SubInfo):
while
res
and
i
<
len
(
self
.
subsenses
):
res
=
self
.
subsenses
[
i
]
in
other
.
subsenses
i
+=
1
i
=
0
l
=
list
(
self
.
metadata
.
keys
())
while
res
and
i
<
len
(
l
):
res
=
l
[
i
]
in
other
.
metadata
.
keys
()
and
type
(
self
.
metadata
[
l
[
i
]])
==
type
(
other
.
metadata
[
l
[
i
]])
if
res
and
type
(
self
.
metadata
[
l
[
i
]])
==
list
and
len
(
self
.
metadata
[
l
[
i
]])
==
len
(
other
.
metadata
[
l
[
i
]]):
j
=
0
while
res
and
j
<
len
(
self
.
metadata
[
l
[
i
]]):
res
=
self
.
metadata
[
l
[
i
]][
j
]
in
other
.
metadata
[
l
[
i
]]
j
+=
1
i
+=
1
return
res
def
serializable
(
self
,
prefix
=
None
):
res
=
{}
if
self
.
domain
!=
None
:
res
[
"
Domain
"
]
=
self
.
domain
if
len
(
self
.
regions
)
>
0
:
res
[
'
Regions
'
]
=
list
(
self
.
regions
)
if
len
(
self
.
definitions
)
>
0
:
res
[
"
Definitions
"
]
=
[]
for
d
in
self
.
definitions
:
res
[
"
Definitions
"
].
append
(
d
.
serializable
(
prefix
))
if
len
(
self
.
subsenses
)
>
0
:
res
[
"
Subsenses
"
]
=
{}
for
t
in
self
.
subsenses
:
res
[
"
Subsenses
"
][
t
.
set_id
(
self
.
label
)]
=
t
.
serializable
(
prefix
)
if
len
(
self
.
metadata
.
keys
())
>
0
:
res
[
"
Metadata
"
]
=
self
.
metadata
if
len
(
self
.
examples
)
>
0
:
res
[
"
Examples
"
]
=
[]
for
e
in
self
.
examples
:
...
...
@@ -290,6 +302,10 @@ class Sense(SubInfo):
res
[
"
Translations
"
]
=
[]
for
t
in
self
.
translations
:
res
[
"
Translations
"
].
append
(
t
.
serializable
(
prefix
))
if
len
(
self
.
subsenses
)
>
0
:
res
[
"
Subsenses
"
]
=
{}
for
t
in
self
.
subsenses
:
res
[
"
Subsenses
"
][
t
.
set_id
(
self
.
label
)]
=
t
.
serializable
(
prefix
)
return
res
def
__str__
(
self
):
...
...
@@ -616,19 +632,30 @@ class Wikstraktor:
return
res
#can be overloaded
def
parse_definition
(
self
,
definition
):
return
self
.
wtp
.
parse
(
definition
).
plain_text
().
strip
()
def
parse_definition
(
self
,
definition_wikitext
):
if
type
(
definition_wikitext
)
==
str
:
res
=
self
.
wtp
.
parse
(
definition_wikitext
).
plain_text
().
strip
()
elif
isinstance
(
definition_wikitext
,
wikitextparser
.
WikiText
):
res
=
definition_wikitext
.
plain_text
().
strip
()
return
res
#can be overloaded
def
get_sense_metadata
(
self
,
sense
,
definition_wikitext
):
pass
#can be overloaded
def
process_definition
(
self
,
definition
,
sub_items
,
def_level
=
True
):
#does not process wk_en quotations
try
:
parsed_def
=
self
.
wtp
.
parse
(
definition
)
if
def_level
:
newSense
=
Sense
(
self
.
entry_language
,
self
.
parse_definition
(
definition
),
self
.
wiki_language
)
newSense
=
Sense
(
self
.
entry_language
,
self
.
parse_definition
(
parsed_def
),
self
.
wiki_language
)
self
.
get_sense_metadata
(
newSense
,
parsed_def
)
pattern_ex
=
self
.
constants
[
'
sense_pattern
'
][
0
][
"
ex
"
]
pattern_subdef
=
self
.
constants
[
'
sense_pattern
'
][
0
][
"
add_subdef
"
]
+
self
.
constants
[
'
sense_pattern
'
][
0
][
"
def
"
]
else
:
newSense
=
SubSense
(
self
.
entry_language
,
self
.
parse_definition
(
definition
),
self
.
wiki_language
)
newSense
=
SubSense
(
self
.
entry_language
,
self
.
parse_definition
(
parsed_def
),
self
.
wiki_language
)
self
.
get_sense_metadata
(
newSense
,
parsed_def
)
pattern_subdef
=
None
pattern_ex
=
self
.
constants
[
'
sense_pattern
'
][
0
][
"
add_subdef
"
]
+
self
.
constants
[
'
sense_pattern
'
][
0
][
"
ex
"
]
#Process examples
...
...
This diff is collapsed.
Click to expand it.
wikstraktor.sqlite
+
0
−
0
View file @
1c005715
No preview for this file type
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment