Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
EWOK
Manage
Activity
Members
Labels
Plan
Issues
2
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
EWOK
Commits
ffe7de7e
Commit
ffe7de7e
authored
1 year ago
by
Empiriker
Browse files
Options
Downloads
Patches
Plain Diff
set supported wiktlangs in config
parent
482bb453
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
dumps/place_dump_files_here.xml.bz2
+0
-0
0 additions, 0 deletions
dumps/place_dump_files_here.xml.bz2
src/app.py
+18
-29
18 additions, 29 deletions
src/app.py
src/config.py
+2
-0
2 additions, 0 deletions
src/config.py
src/load_templates.py
+6
-3
6 additions, 3 deletions
src/load_templates.py
with
26 additions
and
32 deletions
dumps/place_dump_files_here.xml.bz2
0 → 100644
+
0
−
0
View file @
ffe7de7e
This diff is collapsed.
Click to expand it.
src/app.py
+
18
−
29
View file @
ffe7de7e
...
@@ -8,7 +8,8 @@ from get_wikicode import get_wikicode
...
@@ -8,7 +8,8 @@ from get_wikicode import get_wikicode
from
wiktextract_wrapper
import
Wiktextract
from
wiktextract_wrapper
import
Wiktextract
from
load_templates
import
load_templates
from
load_templates
import
load_templates
load_templates
()
for
wiktlang
in
config
.
supported_wiktlangs
:
load_templates
(
wiktlang
)
app
=
Flask
(
__name__
)
app
=
Flask
(
__name__
)
CORS
(
app
)
CORS
(
app
)
...
@@ -23,44 +24,32 @@ def index():
...
@@ -23,44 +24,32 @@ def index():
@app.route
(
"
/search/<wiktlang>/<wordlang>/<word>
"
,
methods
=
[
"
GET
"
])
@app.route
(
"
/search/<wiktlang>/<wordlang>/<word>
"
,
methods
=
[
"
GET
"
])
def
search
(
wiktlang
,
wordlang
,
word
):
def
search
(
wiktlang
,
wordlang
,
word
):
if
wiktlang
not
in
config
.
supported_wiktlangs
:
return
jsonify
({
"
error
"
:
f
"
Language
{
wiktlang
}
not supported
"
}),
400
wikicode
=
get_wikicode
(
word
,
wiktlang
)
wikicode
=
get_wikicode
(
word
,
wiktlang
)
if
wikicode
:
if
wikicode
:
en_
wiktextract
=
Wiktextract
(
"
en
"
,
wordlang
)
wiktextract
or
=
Wiktextract
(
wiktlang
,
wordlang
)
try
:
try
:
resp
=
en_
wiktextract
.
parse_page
(
word
,
wikicode
)
resp
=
wiktextract
or
.
parse_page
(
word
,
wikicode
)
return
jsonify
(
resp
)
return
jsonify
(
resp
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
resp
=
f
"""
<!doctype html>
<html>
return
jsonify
({
"
error
"
:
"
Parsing page resulted in error:
"
+
str
(
e
)}),
500
<head>
<title>Error</title>
</head>
<body>
<h1>
{
word
}
</h1>
<p>
{
e
}
</p>
</body>
</html>
"""
status
=
404
mimetype
=
"
text/html
"
finally
:
finally
:
en_
wiktextract
.
page_handler
.
wxr
.
wtp
.
db_conn
.
close
()
wiktextract
or
.
page_handler
.
wxr
.
wtp
.
db_conn
.
close
()
else
:
else
:
resp
=
f
"""
<!doctype html>
return
(
<html>
jsonify
(
<head>
{
<title>Error</title>
"
error
"
:
f
"
{
word
}
is unknown in “
{
wordlang
}
” in
{
wiktlang
}
.wiktionary.org.
"
</head>
}
<body>
),
<h1>
{
word
}
</h1>
404
,
<p>
{
word
}
is unknown in “
{
wordlang
}
” in
{
wiktlang
}
.wiktionary.org.</p>
)
</body>
</html>
"""
status
=
404
mimetype
=
"
text/html
"
return
Response
(
resp
,
status
=
status
,
mimetype
=
mimetype
)
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
...
...
This diff is collapsed.
Click to expand it.
src/config.py
+
2
−
0
View file @
ffe7de7e
host
=
"
0.0.0.0
"
host
=
"
0.0.0.0
"
port
=
80
port
=
80
debugging
=
True
debugging
=
True
supported_wiktlangs
=
[
"
en
"
]
This diff is collapsed.
Click to expand it.
src/load_templates.py
+
6
−
3
View file @
ffe7de7e
...
@@ -37,7 +37,7 @@ def time_elapsed_indicator():
...
@@ -37,7 +37,7 @@ def time_elapsed_indicator():
def
get_most_recent_file
(
directory
,
lang_code
):
def
get_most_recent_file
(
directory
,
lang_code
):
pattern
=
re
.
compile
(
pattern
=
re
.
compile
(
f
"
{
lang_code
}
wiktionary-(\d+)-pages-articles-multistream.xml.bz2
"
r
""
+
lang_code
+
r
"
wiktionary-(\d+)-pages-articles-multistream.xml.bz2
"
)
)
matching_files
=
[
f
for
f
in
os
.
listdir
(
directory
)
if
pattern
.
match
(
f
)]
matching_files
=
[
f
for
f
in
os
.
listdir
(
directory
)
if
pattern
.
match
(
f
)]
...
@@ -46,7 +46,7 @@ def get_most_recent_file(directory, lang_code):
...
@@ -46,7 +46,7 @@ def get_most_recent_file(directory, lang_code):
return
None
return
None
most_recent_file
=
sorted
(
most_recent_file
=
sorted
(
matching_files
,
key
=
lambda
x
:
pattern
.
match
(
x
).
group
(
1
),
reverse
=
True
matching_files
,
key
=
lambda
x
:
pattern
.
match
(
x
).
group
(
1
),
reverse
=
True
# type: ignore
)[
0
]
)[
0
]
return
os
.
path
.
join
(
directory
,
most_recent_file
)
return
os
.
path
.
join
(
directory
,
most_recent_file
)
...
@@ -72,7 +72,10 @@ def load_templates(wiktlang: str):
...
@@ -72,7 +72,10 @@ def load_templates(wiktlang: str):
dump_file
,
dump_file
,
num_processes
=
1
,
num_processes
=
1
,
phase1_only
=
True
,
phase1_only
=
True
,
namespace_ids
=
{
10
,
828
},
namespace_ids
=
{
10
,
828
,
},
# Template and Module namespaces; ToDo: Get the namespace IDs from the dump file
out_f
=
None
,
# type: ignore
out_f
=
None
,
# type: ignore
)
)
wxr
.
wtp
.
db_conn
.
commit
()
wxr
.
wtp
.
db_conn
.
commit
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment