Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
wikstraktor
Manage
Activity
Members
Labels
Plan
Issues
3
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
wikstraktor
Commits
27514d48
Commit
27514d48
authored
2 years ago
by
Mathieu Loiseau
Browse files
Options
Downloads
Patches
Plain Diff
approximative POS processing
parent
46ab0653
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
parsers/en_constants.py
+25
-2
25 additions, 2 deletions
parsers/en_constants.py
parsers/en_en.py
+11
-0
11 additions, 0 deletions
parsers/en_en.py
wikstraktor.py
+36
-19
36 additions, 19 deletions
wikstraktor.py
with
72 additions
and
21 deletions
parsers/en_constants.py
+
25
−
2
View file @
27514d48
string_values
=
{
string_values
=
{
"
ety
"
:
"
Etymology
"
,
"
ety
"
:
"
Etymology
"
,
"
ipa
"
:
"
Pronunciation
"
,
"
pro
"
:
"
Pronunciation
"
,
"
en
"
:
"
English
"
,
"
en
"
:
"
English
"
,
"
fr
"
:
"
French
"
,
"
fr
"
:
"
French
"
,
"
t_ipa
"
:
"
IPA
"
,
#template for transcription
"
t_ipa
"
:
"
IPA
"
,
#template for transcription
"
t_snd
"
:
"
audio
"
,
#template for audio
"
t_snd
"
:
"
audio
"
,
#template for audio
"
t_acc
"
:
"
a
"
#template for accents
"
t_acc
"
:
"
a
"
,
#template for accents
"
POS
"
:
{
#https://en.wiktionary.org/wiki/Wiktionary:POS
"
Adjective
"
:
"
Adj
"
,
"
Adverb
"
:
"
Adv
"
,
"
Ambiposition
"
:
"
Ambip
"
,
"
Article
"
:
"
Art
"
,
"
Circumposition
"
:
"
Circump
"
,
"
Classifier
"
:
"
Class
"
,
"
Conjunction
"
:
"
Conj
"
,
"
Contraction
"
:
"
Cont
"
,
"
Counter
"
:
"
Count
"
,
"
Determiner
"
:
"
Det
"
,
"
Ideophone
"
:
"
Ideophone
"
,
"
Interjection
"
:
"
Interj
"
,
"
Noun
"
:
"
N
"
,
"
Numeral
"
:
"
Num
"
,
"
Participle
"
:
"
Part
"
,
"
Particle
"
:
"
Particle
"
,
"
Postposition
"
:
"
Postp
"
,
"
Preposition
"
:
"
Prep
"
,
"
Pronoun
"
:
"
Pro
"
,
"
Proper noun
"
:
"
NP
"
,
"
Verb
"
:
"
V
"
# TODO: compléter
}
}
}
This diff is collapsed.
Click to expand it.
parsers/en_en.py
+
11
−
0
View file @
27514d48
...
@@ -4,6 +4,8 @@ from pronunciation import Pronunciation
...
@@ -4,6 +4,8 @@ from pronunciation import Pronunciation
from
parsers.en_constants
import
string_values
from
parsers.en_constants
import
string_values
debugEty
=
0
class
En_en_straktor
(
Wikstraktor
):
class
En_en_straktor
(
Wikstraktor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
...
@@ -40,6 +42,15 @@ class En_en_straktor(Wikstraktor):
...
@@ -40,6 +42,15 @@ class En_en_straktor(Wikstraktor):
print
(
pronunciations
[
0
],
pronunciations
[
1
])
print
(
pronunciations
[
0
],
pronunciations
[
1
])
return
pronunciations
return
pronunciations
def
process_etymology
(
self
,
etyContent
):
global
debugEty
debugEty
+=
1
return
"
Etymology
"
+
str
(
debugEty
)
def
process_senses
(
self
,
sensesContent
):
import
random
as
r
return
"
Cool
"
+
r
.
choice
([
'
a
'
,
'
b
'
,
'
c
'
,
'
d
'
,
'
e
'
,
'
f
'
,
'
g
'
])
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
ensk
=
En_en_straktor
()
ensk
=
En_en_straktor
()
print
(
ensk
.
fetch
(
"
test
"
),
"
entries added
"
)
print
(
ensk
.
fetch
(
"
test
"
),
"
entries added
"
)
This diff is collapsed.
Click to expand it.
wikstraktor.py
+
36
−
19
View file @
27514d48
...
@@ -9,14 +9,20 @@ class Entry:
...
@@ -9,14 +9,20 @@ class Entry:
def
__init__
(
self
,
lemma
):
def
__init__
(
self
,
lemma
):
self
.
lemma
=
lemma
self
.
lemma
=
lemma
def
set_pronunciation
(
self
,
pron
):
def
set_pronunciation
s
(
self
,
pron
):
if
isinstance
(
pron
,
Pronunciation
):
if
isinstance
(
pron
,
Pronunciation
):
self
.
pronunciation
=
pron
self
.
pronunciation
s
=
pron
else
:
else
:
raise
ValueError
(
f
"
Entry.set_pronunciation:
{
pron
}
is not a Pronunciation object (
{
pron
.
__class__
.
__name__
}
).
"
)
raise
ValueError
(
f
"
Entry.set_pronunciation:
{
pron
}
is not a Pronunciation object (
{
pron
.
__class__
.
__name__
}
).
"
)
def
set_POS
(
self
,
pos
):
self
.
pos
=
pos
def
__str__
(
self
):
def
__str__
(
self
):
res
=
f
"
{
self
.
lemma
}
(
{
self
.
cat
}
)
"
res
=
f
"
{
self
.
lemma
}
(
{
self
.
pos
}
)
\n
"
for
p
in
self
.
pronunciations
:
res
+=
f
"
{
str
(
p
)
}
\n
"
return
res
class
ParserContext
:
class
ParserContext
:
def
__init__
(
self
,
entry
):
def
__init__
(
self
,
entry
):
...
@@ -30,8 +36,8 @@ class ParserContext:
...
@@ -30,8 +36,8 @@ class ParserContext:
res
=
self
.
context
[
-
1
][
"
wiki
"
].
level
res
=
self
.
context
[
-
1
][
"
wiki
"
].
level
return
res
return
res
def
push
(
self
,
wiki_context
,
entry_context
=
None
):
def
push
(
self
,
wiki_context
):
self
.
context
.
append
({
"
wiki
"
:
wiki_context
,
"
entry_info
"
:
entry_context
})
self
.
context
.
append
({
"
wiki
"
:
wiki_context
})
def
pop
(
self
):
def
pop
(
self
):
return
self
.
context
.
pop
()
return
self
.
context
.
pop
()
...
@@ -42,22 +48,22 @@ class ParserContext:
...
@@ -42,22 +48,22 @@ class ParserContext:
else
:
else
:
self
.
context
[
-
1
][
'
wiki
'
]
=
wiki_context
self
.
context
[
-
1
][
'
wiki
'
]
=
wiki_context
def
set_top_entry_info
(
self
,
entry_context
):
def
set_top_entry_info
(
self
,
key
,
entry_context
):
if
len
(
self
.
context
)
==
0
:
if
len
(
self
.
context
)
==
0
:
raise
ValueError
(
f
"
Trying to set up entry info (
{
entry_context
}
), in an empty parserContext.
"
)
raise
ValueError
(
f
"
Trying to set up entry info (
{
entry_context
}
), in an empty parserContext.
"
)
else
:
else
:
self
.
context
[
-
1
][
'
entry_info
'
]
=
entry_context
self
.
context
[
-
1
][
key
]
=
entry_context
def
create_entry
(
self
):
def
create_entry
(
self
):
res
=
Entry
(
self
.
lemma
)
res
=
Entry
(
self
.
lemma
)
for
l
in
self
.
context
:
for
l
in
self
.
context
:
if
l
[
'
entry_inf
o
'
]
=
=
None
:
if
l
[
'
pr
o
'
]
!
=
None
:
pass
res
.
set_pronunciations
(
l
[
'
entry_info
'
])
el
if
l
[
'
e
ntry_info
'
].
__class__
.
__name__
==
"
Pronunciati
on
"
:
if
l
[
'
e
ty
'
]
!=
N
on
e
:
res
.
set_pronunciation
(
l
[
'
entry_info
'
])
pass
#On ignore l'étymologie pour le moment
els
e
:
if
l
[
'
POS
'
]
!=
Non
e
:
# TODO: Ajouter les autres types
res
.
set_pos
(
l
[
'
POS
'
])
pas
s
# TODO: Ajouter les autres type
s
return
res
return
res
def
debug_top
(
self
):
def
debug_top
(
self
):
...
@@ -65,7 +71,13 @@ class ParserContext:
...
@@ -65,7 +71,13 @@ class ParserContext:
if
len
(
self
.
context
)
==
0
:
if
len
(
self
.
context
)
==
0
:
res
+=
"
0
"
res
+=
"
0
"
else
:
else
:
res
+=
f
"
{
len
(
self
.
context
)
}
,
{
self
.
context
[
-
1
][
'
wiki
'
].
level
*
'
#
'
}
{
self
.
context
[
-
1
][
'
wiki
'
].
title
}
/
{
str
(
self
.
context
[
-
1
][
'
entry_info
'
])
}
"
info
=
""
for
k
,
v
in
self
.
context
[
-
1
].
items
():
if
k
!=
'
wiki
'
:
if
info
!=
""
:
info
+=
"
\n\t\t\t
"
info
+=
f
"
{
k
}
→
{
str
(
v
)
}
"
res
+=
f
"
{
len
(
self
.
context
)
*
'
=
'
}
{
self
.
context
[
-
1
][
'
wiki
'
].
level
*
'
#
'
}
{
self
.
context
[
-
1
][
'
wiki
'
].
title
}
/
{
info
}
"
return
res
return
res
...
@@ -125,8 +137,13 @@ class Wikstraktor:
...
@@ -125,8 +137,13 @@ class Wikstraktor:
while
self
.
parserContext
.
get_level
()
>
s
.
level
:
while
self
.
parserContext
.
get_level
()
>
s
.
level
:
self
.
parserContext
.
pop
()
self
.
parserContext
.
pop
()
self
.
parserContext
.
set_top_wiki
(
s
)
self
.
parserContext
.
set_top_wiki
(
s
)
if
s
.
title
==
self
.
constants
[
'
ipa
'
]:
if
s
.
title
==
self
.
constants
[
'
pro
'
]:
self
.
parserContext
.
set_top_entry_info
(
self
.
process_pronunciation
(
self
.
wtp
.
parse
(
s
.
contents
)))
self
.
parserContext
.
set_top_entry_info
(
'
pro
'
,
self
.
process_pronunciation
(
self
.
wtp
.
parse
(
s
.
contents
)))
elif
self
.
constants
[
'
ety
'
]
in
s
.
title
:
self
.
parserContext
.
set_top_entry_info
(
'
ety
'
,
self
.
process_etymology
(
self
.
wtp
.
parse
(
s
.
contents
)))
elif
s
.
title
in
self
.
constants
[
'
POS
'
].
keys
():
self
.
parserContext
.
set_top_entry_info
(
'
POS
'
,
self
.
constants
[
'
POS
'
][
s
.
title
])
self
.
parserContext
.
set_top_entry_info
(
'
senses
'
,
self
.
process_senses
(
self
.
wtp
.
parse
(
s
.
contents
)))
print
(
self
.
parserContext
.
debug_top
())
print
(
self
.
parserContext
.
debug_top
())
print
(
"
ok
"
)
print
(
"
ok
"
)
...
@@ -139,8 +156,8 @@ class Wikstraktor:
...
@@ -139,8 +156,8 @@ class Wikstraktor:
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
e
=
Wikstraktor
.
get_instance
(
'
en
'
,
"
en
"
)
e
=
Wikstraktor
.
get_instance
(
'
en
'
,
"
en
"
)
print
(
e
.
get_file_url
(
"
File:LL-Q1860 (eng)-Nattes à chat----parent.wav
"
))
#
print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav"))
print
(
e
.
get_file_url
(
"
File:LL-Q1860 (eng)-Nattes à chat-parent.wav
"
))
#
print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav"))
print
(
e
.
fetch
(
"
test
"
),
"
entries added
"
)
print
(
e
.
fetch
(
"
test
"
),
"
entries added
"
)
# site = pywikibot.Site(f'wiktionary:en')
# site = pywikibot.Site(f'wiktionary:en')
# p = pywikibot.FilePage(site, "File:LL-Q1860 (eng)-Nattes à chat----parent.wav")
# p = pywikibot.FilePage(site, "File:LL-Q1860 (eng)-Nattes à chat----parent.wav")
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment