Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
EWOK
Manage
Activity
Members
Labels
Plan
Issues
2
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
EWOK
Commits
482bb453
Commit
482bb453
authored
1 year ago
by
Empiriker
Browse files
Options
Downloads
Patches
Plain Diff
load templates from dumps on server startup
parent
e000302a
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
src/app.py
+3
-0
3 additions, 0 deletions
src/app.py
src/load_templates.py
+82
-0
82 additions, 0 deletions
src/load_templates.py
src/wiktextract_context.py
+27
-0
27 additions, 0 deletions
src/wiktextract_context.py
src/wiktextract_wrapper.py
+4
-21
4 additions, 21 deletions
src/wiktextract_wrapper.py
with
116 additions
and
21 deletions
src/app.py
+
3
−
0
View file @
482bb453
...
...
@@ -6,6 +6,9 @@ from flask_cors import CORS
import
config
from
get_wikicode
import
get_wikicode
from
wiktextract_wrapper
import
Wiktextract
from
load_templates
import
load_templates
load_templates
()
app
=
Flask
(
__name__
)
CORS
(
app
)
...
...
This diff is collapsed.
Click to expand it.
src/load_templates.py
0 → 100644
+
82
−
0
View file @
482bb453
from
wiktextract.wiktionary
import
parse_wiktionary
from
wiktextract_context
import
get_wiktextract_context
import
os
import
re
import
threading
import
time
DUMPS_DIR
=
"
dumps
"
def
start_progress_indicator
():
time_elapsed_indicator
.
stop
=
False
indicator_thread
=
threading
.
Thread
(
target
=
time_elapsed_indicator
)
indicator_thread
.
start
()
return
indicator_thread
def
stop_progress_indicator
(
indicator_thread
):
time_elapsed_indicator
.
stop
=
True
indicator_thread
.
join
()
def
time_elapsed_indicator
():
start_time
=
time
.
time
()
while
not
time_elapsed_indicator
.
stop
:
elapsed_time
=
time
.
time
()
-
start_time
print
(
f
"
\r
Time elapsed:
{
elapsed_time
:
.
2
f
}
seconds
"
,
end
=
""
)
time
.
sleep
(
1
)
print
(
"
\r
Time elapsed: {elapsed_time:.2f} seconds
"
.
format
(
elapsed_time
=
elapsed_time
)
# type: ignore
)
def
get_most_recent_file
(
directory
,
lang_code
):
pattern
=
re
.
compile
(
f
"
{
lang_code
}
wiktionary-(\d+)-pages-articles-multistream.xml.bz2
"
)
matching_files
=
[
f
for
f
in
os
.
listdir
(
directory
)
if
pattern
.
match
(
f
)]
if
not
matching_files
:
return
None
most_recent_file
=
sorted
(
matching_files
,
key
=
lambda
x
:
pattern
.
match
(
x
).
group
(
1
),
reverse
=
True
)[
0
]
return
os
.
path
.
join
(
directory
,
most_recent_file
)
def
load_templates
(
wiktlang
:
str
):
print
(
f
"
Loading templates for
{
wiktlang
}
...
"
)
indicator_thread
=
start_progress_indicator
()
wxr
=
get_wiktextract_context
(
wiktlang
)
wxr
.
wtp
.
db_conn
.
execute
(
"
DELETE FROM pages
"
)
wxr
.
wtp
.
db_conn
.
commit
()
dump_file
=
get_most_recent_file
(
DUMPS_DIR
,
wiktlang
)
if
not
dump_file
:
raise
ValueError
(
f
"
Could not find dump file for
{
wiktlang
}
.
"
)
parse_wiktionary
(
wxr
,
dump_file
,
num_processes
=
1
,
phase1_only
=
True
,
namespace_ids
=
{
10
,
828
},
out_f
=
None
,
# type: ignore
)
wxr
.
wtp
.
db_conn
.
commit
()
stop_progress_indicator
(
indicator_thread
)
print
(
"
Done loading templates.
"
)
This diff is collapsed.
Click to expand it.
src/wiktextract_context.py
0 → 100644
+
27
−
0
View file @
482bb453
from
wiktextract
import
(
WiktextractContext
,
WiktionaryConfig
,
)
from
wikitextprocessor
import
Wtp
from
typing
import
Optional
def
get_wiktextract_context
(
wiktlang
:
str
,
wordlang
:
Optional
[
str
]
=
None
):
db_path
=
f
"
./sqlite-
{
wiktlang
}
.db
"
config
=
WiktionaryConfig
(
dump_file_lang_code
=
wiktlang
,
capture_language_codes
=
[
wordlang
]
if
wordlang
else
None
,
capture_translations
=
True
,
capture_pronunciation
=
True
,
capture_linkages
=
True
,
capture_compounds
=
True
,
capture_redirects
=
True
,
capture_examples
=
True
,
capture_etymologies
=
True
,
capture_descendants
=
True
,
capture_inflections
=
True
,
)
wxr
=
WiktextractContext
(
Wtp
(
db_path
=
db_path
),
config
)
return
wxr
This diff is collapsed.
Click to expand it.
src/wiktextract_wrapper.py
+
4
−
21
View file @
482bb453
from
wiktextract
import
(
WiktextractContext
,
WiktionaryConfig
,
)
from
wiktextract.wiktionary
import
page_handler
from
wikitextprocessor
import
Wtp
,
Page
from
wikitextprocessor
import
Page
from
wiktextract_context
import
get_wiktextract_context
db_path
=
"
./sqlite.db
"
...
...
@@ -18,23 +16,8 @@ class Wiktextract:
self
.
wiktlang
=
wiktlang
self
.
wordlang
=
wordlang
config
=
WiktionaryConfig
(
dump_file_lang_code
=
wiktlang
,
capture_language_codes
=
[
wordlang
],
capture_translations
=
True
,
capture_pronunciation
=
True
,
capture_linkages
=
True
,
capture_compounds
=
True
,
capture_redirects
=
True
,
capture_examples
=
True
,
capture_etymologies
=
True
,
capture_descendants
=
True
,
capture_inflections
=
True
,
)
wxr
=
WiktextractContext
(
Wtp
(
db_path
=
db_path
),
config
)
self
.
page_handler
=
page_handler
self
.
page_handler
.
wxr
=
wxr
self
.
page_handler
.
wxr
=
get_wiktextract_context
(
wiktlang
,
wordlang
)
def
parse_page
(
self
,
title
:
str
,
wikicode
:
str
):
# add page to the database (making it accessible to LUA templates)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment