Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
wikstraktor
Manage
Activity
Members
Labels
Plan
Issues
3
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
wikstraktor
Commits
8dbb5366
Commit
8dbb5366
authored
2 years ago
by
Mathieu Loiseau
Browse files
Options
Downloads
Patches
Plain Diff
Avec gestion des sous-sens (attention parser français ne prend qu'1 POS)
parent
8eafa539
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
parsers/Structure_json.json
+7
-8
7 additions, 8 deletions
parsers/Structure_json.json
parsers/en_en.py
+2
-2
2 additions, 2 deletions
parsers/en_en.py
parsers/fr_en.py
+2
-4
2 additions, 4 deletions
parsers/fr_en.py
wikstraktor.py
+49
-21
49 additions, 21 deletions
wikstraktor.py
with
60 additions
and
35 deletions
parsers/Structure_json.json
+
7
−
8
View file @
8dbb5366
...
...
@@ -15,8 +15,8 @@
"url1"
:
"https://upload.wikimedia.org/wikipedia/commons/1/19/LL-Q1860_%28eng%29-Back_ache-water.wav"
}
],
"Senses"
:
[
{
"Senses"
:
{
"v1"
:
{
"Translations"
:[
"translation1"
,
"..."
,
...
...
@@ -26,16 +26,16 @@
"Stilles Mineralwasser.jpg"
,
"..."
],
"Definition"
:
"blabla"
,
"Definition"
:
{
"lang"
:
"fr"
,
"definition"
:
"blabla"
}
,
"Examples"
:[
"blabla"
,
"blabli"
,
"blablou"
],
"
s
ubSense"
:[
"
S
ubSense
s
"
:[
{
"
subdef"
:
"blabla"
,
"
subex
"
:[
"
Definition"
:{
"lang"
:
"en"
,
"definition"
:
"whatnot"
}
,
"
Examples
"
:[
"subexa"
,
"subexb"
,
"subexz"
...
...
@@ -43,7 +43,7 @@
}
]
}
]
}
}
]
}
...
...
@@ -61,4 +61,3 @@
\"
Supplementary field for devs 5
\"
...
\"
Supplementary field for devs 10
\
*/
This diff is collapsed.
Click to expand it.
parsers/en_en.py
+
2
−
2
View file @
8dbb5366
...
...
@@ -41,7 +41,7 @@ class En_en_straktor(Wikstraktor):
global
debugEty
debugEty
+=
1
return
"
Etymology
"
+
str
(
debugEty
)
def
process_POS
(
self
,
parsedwikitext
):
pos
=
None
if
parsedwikitext
in
self
.
constants
[
'
POS
'
].
keys
():
...
...
@@ -92,7 +92,7 @@ class En_en_straktor(Wikstraktor):
if
isEx
==
0
:
newSense2
.
add_example
(
self
.
wtp
.
parse
(
j
).
plain_text
().
strip
())
if
i
==
len
(
l
)
-
1
or
l
[
i
+
1
].
pattern
==
'
\\
#
'
or
l
[
i
+
1
].
pattern
==
'
\\
##
'
:
s
ense
s
.
a
ppend
(
newSense2
)
newS
ense
.
a
dd_subsense
(
newSense2
)
i
+=
1
if
cnt
>
0
:
i
-=
1
...
...
This diff is collapsed.
Click to expand it.
parsers/fr_en.py
+
2
−
4
View file @
8dbb5366
...
...
@@ -43,7 +43,7 @@ class Fr_en_straktor(Wikstraktor):
global
debugEty
debugEty
+=
1
return
"
Etymology
"
+
str
(
debugEty
)
def
process_POS
(
self
,
parsedwikitext
):
pos
=
None
ik
=
0
...
...
@@ -100,7 +100,7 @@ class Fr_en_straktor(Wikstraktor):
if
isEx
==
0
:
newSense2
.
add_example
(
self
.
wtp
.
parse
(
j
).
plain_text
().
strip
())
if
i
==
len
(
l
)
-
1
or
l
[
i
+
1
].
pattern
==
'
\\
#
'
or
l
[
i
+
1
].
pattern
==
'
\\
##
'
:
s
ense
s
.
a
ppend
(
newSense2
)
newS
ense
.
a
dd_subsense
(
newSense2
)
i
+=
1
if
cnt
>
0
:
i
-=
1
...
...
@@ -110,5 +110,3 @@ class Fr_en_straktor(Wikstraktor):
if
__name__
==
"
__main__
"
:
ensk
=
Fr_en_straktor
()
print
(
ensk
.
fetch
(
"
test
"
),
"
entries added
"
)
This diff is collapsed.
Click to expand it.
wikstraktor.py
+
49
−
21
View file @
8dbb5366
...
...
@@ -104,7 +104,8 @@ class Example:
class
Sense
:
def
__init__
(
self
,
label
):
self
.
label
=
label
#l'identifiant du sens
self
.
definitions
=
[]
#liste des définitions (elles auront une langue et un texte)
self
.
definition
=
None
#liste des définitions (elles auront une langue et un texte)
self
.
subsenses
=
[]
#liste des sous-définitions (récursif…)
self
.
examples
=
[]
#liste des exemples (un texte obligatoire, source et url sont optionnels)
self
.
translations
=
[]
#liste des traductions dans d'autres langues
self
.
domain
=
None
#domaine d'usage du mot dans ce sens
...
...
@@ -114,8 +115,10 @@ class Sense:
def
add_def
(
self
,
lang
,
definition
):
theDef
=
Definition
(
lang
,
definition
)
if
theDef
not
in
self
.
definitions
:
self
.
definitions
.
append
(
theDef
)
if
self
.
definition
==
None
:
self
.
definition
=
theDef
elif
self
.
definition
!=
theDef
:
raise
ValueError
(
f
"
Superposition de deux définitions:
\n\t
{
self
.
definition
}
\n
remplacée par
\n\t
{
theDef
}
"
)
def
add_example
(
self
,
transcript
,
src
=
None
,
url
=
None
):
theEx
=
Example
(
transcript
,
src
,
url
)
...
...
@@ -127,8 +130,12 @@ class Sense:
if
theTranslation
not
in
self
.
translations
:
self
.
translations
.
append
(
theTranslation
)
def
add_subsense
(
self
,
subsense
):
if
subsense
not
in
self
.
subsenses
:
self
.
subsenses
.
append
(
subsense
)
def
__eq__
(
self
,
other
):
res
=
self
.
label
==
other
.
label
and
len
(
self
.
definition
s
)
==
len
(
other
.
definition
s
)
and
len
(
self
.
examples
)
==
len
(
other
.
examples
)
and
len
(
self
.
translations
)
==
len
(
other
.
translations
)
and
self
.
domain
==
other
.
domain
res
=
self
.
label
==
other
.
label
and
self
.
definition
==
other
.
definition
and
len
(
self
.
examples
)
==
len
(
other
.
examples
)
and
len
(
self
.
translations
)
==
len
(
other
.
translations
)
and
self
.
domain
==
other
.
domain
i
=
0
while
res
and
i
<
len
(
self
.
examples
):
res
=
self
.
examples
[
i
]
in
other
.
examples
...
...
@@ -138,25 +145,46 @@ class Sense:
res
=
self
.
translations
[
i
]
in
other
.
translations
i
+=
1
i
=
0
while
res
and
i
<
len
(
self
.
definition
s
):
res
=
self
.
definition
s
[
i
]
in
other
.
definition
s
while
res
and
i
<
len
(
self
.
subsense
s
):
res
=
self
.
subsense
s
[
i
]
in
other
.
subsense
s
i
+=
1
return
res
def
serializable
(
self
):
def
serializable
(
self
,
key
=
False
):
res
=
{}
res
[
self
.
label
]
=
{}
if
self
.
domain
!=
None
:
res
[
self
.
label
][
"
domain
"
]
=
self
.
domain
res
[
self
.
label
][
"
defs
"
]
=
[]
for
d
in
self
.
definitions
:
res
[
self
.
label
][
"
defs
"
].
append
(
d
.
serializable
())
res
[
self
.
label
][
"
exs
"
]
=
[]
for
e
in
self
.
examples
:
res
[
self
.
label
][
"
exs
"
].
append
(
e
.
serializable
())
res
[
self
.
label
][
"
trad
"
]
=
[]
for
t
in
self
.
translations
:
res
[
self
.
label
][
"
trad
"
].
append
(
t
.
serializable
())
if
key
:
res
[
self
.
label
]
=
{}
if
self
.
domain
!=
None
:
res
[
self
.
label
][
"
Domain
"
]
=
self
.
domain
res
[
self
.
label
][
"
Definition
"
]
=
self
.
definition
.
serializable
()
if
len
(
self
.
subsenses
)
>
0
:
res
[
self
.
label
][
"
Subsenses
"
]
=
[]
for
t
in
self
.
subsenses
:
res
[
self
.
label
][
"
Subsenses
"
].
append
(
t
.
serializable
())
if
len
(
self
.
examples
)
>
0
:
res
[
self
.
label
][
"
Examples
"
]
=
[]
for
e
in
self
.
examples
:
res
[
self
.
label
][
"
Examples
"
].
append
(
e
.
serializable
())
if
len
(
self
.
translations
)
>
0
:
res
[
self
.
label
][
"
Translations
"
]
=
[]
for
t
in
self
.
translations
:
res
[
self
.
label
][
"
Translations
"
].
append
(
t
.
serializable
())
else
:
if
self
.
domain
!=
None
:
res
[
"
Domain
"
]
=
self
.
domain
res
[
"
Definition
"
]
=
self
.
definition
.
serializable
()
if
len
(
self
.
subsenses
)
>
0
:
res
[
"
Subsenses
"
]
=
{}
for
t
in
self
.
subsenses
:
res
[
"
Subsenses
"
][
t
.
label
]
=
t
.
serializable
(
key
)
if
len
(
self
.
examples
)
>
0
:
res
[
"
Examples
"
]
=
[]
for
e
in
self
.
examples
:
res
[
"
Examples
"
].
append
(
e
.
serializable
())
if
len
(
self
.
translations
)
>
0
:
res
[
"
Translations
"
]
=
[]
for
t
in
self
.
translations
:
res
[
"
Translations
"
].
append
(
t
.
serializable
())
return
res
...
...
@@ -206,9 +234,9 @@ class Entry:
res
[
self
.
lemma
][
"
pronunciations
"
]
=
[]
for
p
in
self
.
pronunciations
:
res
[
self
.
lemma
][
"
pronunciations
"
].
append
(
p
.
serializable
())
res
[
self
.
lemma
][
"
senses
"
]
=
[]
res
[
self
.
lemma
][
"
senses
"
]
=
{}
for
s
in
self
.
senses
:
res
[
self
.
lemma
][
"
senses
"
]
.
append
(
s
.
serializable
(
)
)
res
[
self
.
lemma
][
"
senses
"
]
[
s
.
label
]
=
s
.
serializable
(
False
)
return
res
def
__str__
(
self
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment