Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
wikstraktor
Manage
Activity
Members
Labels
Plan
Issues
3
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
wikstraktor
Commits
f391b7f3
Commit
f391b7f3
authored
2 years ago
by
Enzo Simonnet
Browse files
Options
Downloads
Patches
Plain Diff
Homogénéisation des pos (rapide)
parent
1c005715
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
parsers/fr_constants.py
+54
-15
54 additions, 15 deletions
parsers/fr_constants.py
test_wikstraktor.py
+1
-1
1 addition, 1 deletion
test_wikstraktor.py
wikstraktor.sqlite
+0
-0
0 additions, 0 deletions
wikstraktor.sqlite
with
55 additions
and
16 deletions
parsers/fr_constants.py
+
54
−
15
View file @
f391b7f3
...
@@ -9,11 +9,50 @@ string_values = {
...
@@ -9,11 +9,50 @@ string_values = {
"
t_ipa
"
:
"
pron
"
,
#template for transcription
"
t_ipa
"
:
"
pron
"
,
#template for transcription
"
t_snd
"
:
"
écouter
"
,
#template for audio
"
t_snd
"
:
"
écouter
"
,
#template for audio
"
t_acc
"
:[
"
US
"
,
"
UK
"
],
#template for accents
"
t_acc
"
:[
"
US
"
,
"
UK
"
],
#template for accents
"
regions
"
:{
"
UK
"
:
"
United Kingdom
"
,
"
United Kingdom
"
:
"
United Kingdom
"
,
"
British
"
:
"
Great Britain
"
,
"
GB
"
:
"
Great Britain
"
,
"
Great Britain
"
:
"
Great Britain
"
,
"
Scot
"
:
"
Scotland
"
,
"
Scottish
"
:
"
Scotland
"
,
"
Scotland
"
:
"
Scotland
"
,
"
Irl
"
:
"
Ireland
"
,
"
Irish
"
:
"
Ireland
"
,
"
Ireland
"
:
"
Ireland
"
,
"
Ulst
"
:
"
Northern Ireland
"
,
"
Ulster
"
:
"
Northern Ireland
"
,
"
Northern Ireland
"
:
"
Northern Ireland
"
,
"
Wls
"
:
"
Wales
"
,
"
Welsh
"
:
"
Wales
"
,
"
Wales
"
:
"
Wales
"
,
"
English
"
:
"
England
"
,
"
Eng
"
:
"
England
"
,
"
En
"
:
"
England
"
,
"
England
"
:
"
England
"
,
"
Canada
"
:
"
Canada
"
,
"
Canadian
"
:
"
Canada
"
,
'
North American
'
:
'
North America
'
,
'
North America
'
:
"
North America
"
,
"
US
"
:
"
United States of America
"
,
"
USA
"
:
"
United States of America
"
,
"
United States
"
:
"
United States of America
"
,
"
United States of America
"
:
"
United States of America
"
,
"
NZ
"
:
"
New Zealand
"
,
"
New Zealand
"
:
"
New Zealand
"
,
"
Au
"
:
"
Australia
"
,
"
AU
"
:
"
Australia
"
,
"
Australia
"
:
"
Australia
"
,
"
India
"
:
"
India
"
,
"
Indian
"
:
"
India
"
,
"
Nigeria
"
:
"
Nigeria
"
,
"
Nigerian
"
:
"
Nigeria
"
},
"
sense_pattern
"
:[
## structure(s) for sense patterns add_subdef is to be added to def patterns
"
sense_pattern
"
:[
## structure(s) for sense patterns add_subdef is to be added to def patterns
{
"
def
"
:
"
\\
#
"
,
"
ex
"
:
"
\\
#
\\
*
"
,
"
add_subdef
"
:
"
\\
#
"
}
{
"
def
"
:
"
\\
#
"
,
"
ex
"
:
"
\\
#
\\
*
"
,
"
add_subdef
"
:
"
\\
#
"
}
],
],
"
POS
"
:{
"
POS
"
:{
"
a
dj
ectif
"
:[
"
adjectif
"
,
"
adjectif qualificatif
"
,
"
adj
"
],
"
A
dj
"
:[
"
adjectif
"
,
"
adjectif qualificatif
"
,
"
adj
"
],
"
adjectif démonstratif
"
:[
"
adjectif démonstratif
"
,
"
adj-dém
"
,
"
adjectif dém
"
],
"
adjectif démonstratif
"
:[
"
adjectif démonstratif
"
,
"
adj-dém
"
,
"
adjectif dém
"
],
"
adjectif exclamatif
"
:[
"
adjectif exclamatif
"
,
"
adj-excl
"
,
"
adjectif exc
"
],
"
adjectif exclamatif
"
:[
"
adjectif exclamatif
"
,
"
adj-excl
"
,
"
adjectif exc
"
],
"
adjectif indéfini
"
:[
"
adjectif indéfini
"
,
"
adjectif ind
"
,
"
adj-indéf
"
],
"
adjectif indéfini
"
:[
"
adjectif indéfini
"
,
"
adjectif ind
"
,
"
adj-indéf
"
],
...
@@ -21,47 +60,47 @@ string_values = {
...
@@ -21,47 +60,47 @@ string_values = {
"
adjectif numéral
"
:[
"
adjectif numéral
"
,
"
adjectif num
"
,
"
adj-num
"
],
"
adjectif numéral
"
:[
"
adjectif numéral
"
,
"
adjectif num
"
,
"
adj-num
"
],
"
adjectif possessif
"
:[
"
adjectif possessif
"
,
"
adjectif pos
"
,
"
adj-pos
"
],
"
adjectif possessif
"
:[
"
adjectif possessif
"
,
"
adjectif pos
"
,
"
adj-pos
"
],
"
adjectif relatif
"
:[
"
adjectif relatif
"
,
"
adjectif rel
"
,
"
adj-rel
"
],
"
adjectif relatif
"
:[
"
adjectif relatif
"
,
"
adjectif rel
"
,
"
adj-rel
"
],
"
a
dv
erbe
"
:[
"
Adverbe
"
,
"
adv
"
],
"
A
dv
"
:[
"
Adverbe
"
,
"
adv
"
],
"
adverbe indéfini
"
:[
"
adverbe indéfini
"
,
"
adv-ind
"
,
"
adverbe ind
"
],
"
adverbe indéfini
"
:[
"
adverbe indéfini
"
,
"
adv-ind
"
,
"
adverbe ind
"
],
"
adverbe interrogatif
"
:[
"
adverbe interrogatif
"
,
"
dverbe int
"
,
"
adv-int
"
],
"
adverbe interrogatif
"
:[
"
adverbe interrogatif
"
,
"
dverbe int
"
,
"
adv-int
"
],
"
adverbe pronominal
"
:[
"
adverbe pronominal
"
,
"
adv-pron
"
,
"
adverbe pro
"
],
"
adverbe pronominal
"
:[
"
adverbe pronominal
"
,
"
adv-pron
"
,
"
adverbe pro
"
],
"
adverbe relatif
"
:[
"
adverbe relatif
"
,
"
adv-rel
"
,
"
adverbe rel
"
],
"
adverbe relatif
"
:[
"
adverbe relatif
"
,
"
adv-rel
"
,
"
adverbe rel
"
],
"
a
ff
ixe
"
:[
"
affixe
"
,
"
aff
"
],
"
A
ff
"
:[
"
affixe
"
,
"
aff
"
],
"
a
rt
icle
"
:[
"
article
"
,
"
art
"
],
"
A
rt
"
:[
"
article
"
,
"
art
"
],
"
article défini
"
:[
"
article défini
"
,
"
article déf
"
,
"
art-déf
"
],
"
article défini
"
:[
"
article défini
"
,
"
article déf
"
,
"
art-déf
"
],
"
article indéfini
"
:[
"
article indéfini
"
,
"
art-indéf
"
,
"
article ind
"
],
"
article indéfini
"
:[
"
article indéfini
"
,
"
art-indéf
"
,
"
article ind
"
],
"
article partitif
"
:[
"
article partitif
"
,
"
art-part
"
,
"
article par
"
],
"
article partitif
"
:[
"
article partitif
"
,
"
art-part
"
,
"
article par
"
],
"
circonfixe
"
:[
"
circonfixe
"
,
"
circon
"
,
"
circonf
"
],
"
circonfixe
"
:[
"
circonfixe
"
,
"
circon
"
,
"
circonf
"
],
"
classificateur
"
:[
"
classificateur
"
,
"
class
"
,
"
classif
"
],
"
classificateur
"
:[
"
classificateur
"
,
"
class
"
,
"
classif
"
],
"
c
onj
onction
"
:[
"
conjonction
"
,
"
conj
"
],
"
C
onj
"
:[
"
conjonction
"
,
"
conj
"
],
"
conjonction de coordination
"
:[
"
conjonction de coordination
"
,
"
conj-coord
"
,
"
conjonction coo
"
],
"
conjonction de coordination
"
:[
"
conjonction de coordination
"
,
"
conj-coord
"
,
"
conjonction coo
"
],
"
copule
"
:[
"
copule
"
],
"
copule
"
:[
"
copule
"
],
"
déterminan
t
"
:[
"
déterminant
"
,
"
dét
"
],
"
De
t
"
:[
"
déterminant
"
,
"
dét
"
],
"
enclitique
"
:[
"
cnclitique
"
,
"
encl
"
],
"
enclitique
"
:[
"
cnclitique
"
,
"
encl
"
],
"
gismu
"
:[
"
gismu
"
],
"
gismu
"
:[
"
gismu
"
],
"
infixe
"
:[
"
infixe
"
,
"
inf
"
],
"
infixe
"
:[
"
infixe
"
,
"
inf
"
],
"
interfixe
"
:[
"
interfixe
"
,
"
interf
"
],
"
interfixe
"
:[
"
interfixe
"
,
"
interf
"
],
"
i
nterj
ection
"
:[
"
interjection
"
,
"
interj
"
],
"
I
nterj
"
:[
"
interjection
"
,
"
interj
"
],
"
lettre
"
:[
"
lettre
"
],
"
lettre
"
:[
"
lettre
"
],
"
locution
"
:[
"
locution
"
,
"
loc
"
],
"
locution
"
:[
"
locution
"
,
"
loc
"
],
"
locution-phrase
"
:[
"
locution-phrase
"
,
"
loc-phr
"
,
"
phrase locution
"
,
"
phrase
"
,
"
locution-phrase
"
],
"
locution-phrase
"
:[
"
locution-phrase
"
,
"
loc-phr
"
,
"
phrase locution
"
,
"
phrase
"
,
"
locution-phrase
"
],
"
nom commun
"
:[
"
nom
"
,
"
nom commun
"
,
"
substantif
"
],
"
N
"
:[
"
nom
"
,
"
nom commun
"
,
"
substantif
"
],
"
nom de famille
"
:[
"
nom de famille
"
,
"
nom-fam
"
],
"
nom de famille
"
:[
"
nom de famille
"
,
"
nom-fam
"
],
"
nom propre
"
:[
"
nom propre
"
,
"
nom-pr
"
],
"
NP
"
:[
"
nom propre
"
,
"
nom-pr
"
],
"
nom scientifique
"
:[
"
nom scientifique
"
,
"
nom-sciences
"
,
"
nom scient
"
,
"
nom science
"
],
"
nom scientifique
"
:[
"
nom scientifique
"
,
"
nom-sciences
"
,
"
nom scient
"
,
"
nom science
"
],
"
n
um
éral
"
:[
"
numéral
"
,
"
num
"
,
"
numér
"
],
"
N
um
"
:[
"
numéral
"
,
"
num
"
,
"
numér
"
],
"
onomatopée
"
:[
"
onomatopée
"
,
"
onoma
"
,
"
onom
"
],
"
onomatopée
"
:[
"
onomatopée
"
,
"
onoma
"
,
"
onom
"
],
"
p
artic
u
le
"
:[
"
particule
"
,
"
part
"
],
"
P
article
"
:[
"
particule
"
,
"
part
"
],
"
particule numérale
"
:[
"
particule numérale
"
,
"
part-num
"
,
"
particule num
"
],
"
particule numérale
"
:[
"
particule numérale
"
,
"
part-num
"
,
"
particule num
"
],
"
patronyme
"
:[
"
patronyme
"
],
"
patronyme
"
:[
"
patronyme
"
],
"
p
ostp
osition
"
:[
"
postposition
"
,
"
postpos
"
,
"
post
"
],
"
P
ostp
"
:[
"
postposition
"
,
"
postpos
"
,
"
post
"
],
"
pré-nom
"
:[
"
pré-nom
"
],
"
pré-nom
"
:[
"
pré-nom
"
],
"
pré-verbe
"
:[
"
pré-verbe
"
],
"
pré-verbe
"
:[
"
pré-verbe
"
],
"
préfixe
"
:[
"
préfixe
"
,
"
préf
"
],
"
préfixe
"
:[
"
préfixe
"
,
"
préf
"
],
"
prénom
"
:[
"
prénom
"
],
"
prénom
"
:[
"
prénom
"
],
"
préposition
"
:[
"
préposition
"
,
"
prép
"
],
"
Prep
"
:[
"
préposition
"
,
"
prép
"
],
"
proclitique
"
:[
"
proclitique
"
,
"
procl
"
],
"
proclitique
"
:[
"
proclitique
"
,
"
procl
"
],
"
p
ro
nom
"
:[
"
pronom
"
],
"
P
ro
"
:[
"
pronom
"
],
"
pronom démonstratif
"
:[
"
pronom démonstratif
"
,
"
pronom dém
"
,
"
pronom-dém
"
],
"
pronom démonstratif
"
:[
"
pronom démonstratif
"
,
"
pronom dém
"
,
"
pronom-dém
"
],
"
pronom indéfini
"
:[
"
pronom indéfini
"
,
"
pronom ind
"
,
"
pronom-indéf
"
],
"
pronom indéfini
"
:[
"
pronom indéfini
"
,
"
pronom ind
"
,
"
pronom-indéf
"
],
"
pronom interrogatif
"
:[
"
pronom interrogatif
"
,
"
pronom int
"
,
"
pronom-int
"
],
"
pronom interrogatif
"
:[
"
pronom interrogatif
"
,
"
pronom int
"
,
"
pronom-int
"
],
...
@@ -78,6 +117,6 @@ string_values = {
...
@@ -78,6 +117,6 @@ string_values = {
"
symbole
"
:[
"
symbole
"
,
"
symb
"
],
"
symbole
"
:[
"
symbole
"
,
"
symb
"
],
"
variante par contrainte typographique
"
:[
"
variante typographique
"
,
"
variante typo
"
,
"
variante par contrainte typographique
"
,
"
var-typo
"
],
"
variante par contrainte typographique
"
:[
"
variante typographique
"
,
"
variante typo
"
,
"
variante par contrainte typographique
"
,
"
var-typo
"
],
"
verbe pronominal
"
:[
"
verbe pronominal
"
,
"
verb-pr
"
,
"
verbe pr
"
],
"
verbe pronominal
"
:[
"
verbe pronominal
"
,
"
verb-pr
"
,
"
verbe pr
"
],
"
verbe
"
:[
"
verbe
"
,
"
verb
"
]
"
V
"
:[
"
verbe
"
,
"
verb
"
]
}
}
}
}
This diff is collapsed.
Click to expand it.
test_wikstraktor.py
+
1
−
1
View file @
f391b7f3
...
@@ -5,7 +5,7 @@ if __name__ == "__main__":
...
@@ -5,7 +5,7 @@ if __name__ == "__main__":
# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav"))
# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav"))
# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav"))
# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav"))
#e.fetch("water")
#e.fetch("water")
f
.
fetch
(
"
water
"
)
f
.
fetch
(
"
blue
"
)
# print(e.fetch("test"), "entries added")
# print(e.fetch("test"), "entries added")
#print(e)
#print(e)
file_path
=
'
test.json
'
file_path
=
'
test.json
'
...
...
This diff is collapsed.
Click to expand it.
wikstraktor.sqlite
+
0
−
0
View file @
f391b7f3
No preview for this file type
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment