Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
Place Embedding
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jacques Fize
Place Embedding
Commits
c92b911a
Commit
c92b911a
authored
5 years ago
by
Jacques Fize
Browse files
Options
Downloads
Patches
Plain Diff
Forgot to add helpers.py
parent
34f50041
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
helpers.py
+149
-0
149 additions, 0 deletions
helpers.py
with
149 additions
and
0 deletions
helpers.py
0 → 100755
+
149
−
0
View file @
c92b911a
import
os
import
time
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
def
read_geonames
(
file
):
"""
Return a dataframe that contains Geonames data.
Parameters
----------
file : str
path of the Geonames Csv file
Returns
-------
pd.DataFrame
geonames data
"""
dtypes_dict
=
{
0
:
int
,
# geonameid
1
:
str
,
# name
2
:
str
,
# asciiname
3
:
str
,
# alternatenames
4
:
float
,
# latitude
5
:
float
,
# longitude
6
:
str
,
# feature class
7
:
str
,
# feature code
8
:
str
,
# country code
9
:
str
,
# cc2
10
:
str
,
# admin1 code
11
:
str
,
# admin2 code
12
:
str
,
# admin3 code
13
:
str
,
# admin4 code
14
:
int
,
# population
15
:
str
,
# elevation
16
:
int
,
# dem (digital elevation model)
17
:
str
,
# timezone
18
:
str
# modification date yyyy-MM-dd
}
rename_cols
=
{
0
:
"
geonameid
"
,
# geonameid
1
:
"
name
"
,
# name
2
:
"
asciiname
"
,
# asciiname
3
:
"
alternatenames
"
,
# alternatenames
4
:
"
latitude
"
,
# latitude
5
:
"
longitude
"
,
# longitude
6
:
"
feature_class
"
,
# feature class
7
:
"
feature_code
"
,
# feature code
8
:
"
country_code
"
,
# country code
9
:
"
cc2
"
,
# cc2
10
:
"
admin1_code
"
,
# admin1 code
11
:
"
admin2_code
"
,
# admin2 code
12
:
"
admin3_code
"
,
# admin3 code
13
:
"
admin4_code
"
,
# admin4 code
14
:
"
population
"
,
# population
15
:
"
elevation
"
,
# elevation
16
:
"
dem
"
,
# dem (digital elevation model)
17
:
"
timezone
"
,
# timezone
18
:
"
modification_date
"
# modification date yyyy-MM-dd
}
data
=
pd
.
read_csv
(
file
,
sep
=
"
\t
"
,
header
=
None
,
quoting
=
3
,
dtype
=
dtypes_dict
,
na_values
=
''
,
keep_default_na
=
False
,
error_bad_lines
=
False
)
data
.
rename
(
columns
=
rename_cols
,
inplace
=
True
)
return
data
def
plot_accuracy_from_history
(
model_name
,
history_data
,
output_layer_name
,
outpu_filename
,
parameter_string
,
output_dirname
=
"
outputs
"
,
validation
=
True
,
show
=
False
):
# Plot training & validation loss values
plt
.
gcf
()
plt
.
gca
()
plt
.
plot
(
history_data
[
'
{0}_accuracy
'
.
format
(
output_layer_name
)].
values
,
label
=
"
Train Data
"
)
if
validation
:
plt
.
plot
(
history_data
[
'
val_{0}_accuracy
'
.
format
(
output_layer_name
)].
values
,
label
=
"
Test Data
"
)
plt
.
title
(
'
Layer {0} accuracy
'
.
format
(
output_layer_name
))
plt
.
ylabel
(
'
Accuracy
'
)
plt
.
xlabel
(
'
Epoch
'
)
plt
.
ylim
((
0
,
1.1
))
#1.1 if accuracy = 1
plt
.
legend
()
plt
.
savefig
(
"
outputs/{0}_{1}_{2}.png
"
.
format
(
model_name
,
parameter_string
,
output_layer_name
,))
if
show
:
plt
.
show
()
def
save_embedding
(
model
,
tokenizer
,
layer_idx
,
fn
):
embedding_matrix
=
model
.
get_weights
()[
0
]
with
open
(
os
.
path
.
join
(
fn
),
'
w
'
)
as
f
:
for
word
,
i
in
tokenizer
.
word_index
.
items
():
f
.
write
(
word
)
for
i
in
embedding_matrix
[
i
]:
f
.
write
(
'
'
+
repr
(
i
))
f
.
write
(
'
\n
'
)
class
Chronometer
():
def
__init__
(
self
):
self
.
__task_begin_timestamp
=
{}
def
start
(
self
,
task_name
):
"""
Start a new task chronometer
Parameters
----------
task_name : str
task id
Raises
------
ValueError
if a running task already exists with that name
"""
if
task_name
in
self
.
__task_begin_timestamp
:
raise
ValueError
(
"
A running task exists with the name {0}!
"
.
format
(
task_name
))
self
.
__task_begin_timestamp
[
task_name
]
=
time
.
time
()
def
stop
(
self
,
task_name
):
"""
Stop and return the duration of the task
Parameters
----------
task_name : str
task id
Returns
-------
float
duration of the task in seconds
Raises
------
ValueError
if no task exist with the id `task_name`
"""
if
not
task_name
in
self
.
__task_begin_timestamp
:
raise
ValueError
(
"
The {0} task does not exist!
"
.
format
(
task_name
))
duration
=
time
.
time
()
-
self
.
__task_begin_timestamp
[
task_name
]
del
self
.
__task_begin_timestamp
[
task_name
]
return
duration
if
__name__
==
"
__main__
"
:
chrono
=
Chronometer
()
chrono
.
start
(
"
test
"
)
chrono
.
start
(
"
test2
"
)
time
.
sleep
(
3
)
print
(
chrono
.
stop
(
"
test
"
))
time
.
sleep
(
3
)
print
(
chrono
.
stop
(
"
test2
"
))
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment