Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
Toponym Geocoding
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jacques Fize
Toponym Geocoding
Commits
8b047924
Commit
8b047924
authored
5 years ago
by
Jacques Fize
Browse files
Options
Downloads
Patches
Plain Diff
ADD region prediction network code
parent
565887da
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
lib/data_generator.py
+52
-44
52 additions, 44 deletions
lib/data_generator.py
region_embedding.py
+199
-0
199 additions, 0 deletions
region_embedding.py
scripts/gethealpix.py
+1
-0
1 addition, 0 deletions
scripts/gethealpix.py
with
252 additions
and
44 deletions
lib/data_generator.py
+
52
−
44
View file @
8b047924
...
@@ -11,6 +11,9 @@ from .geo import zero_one_encoding
...
@@ -11,6 +11,9 @@ from .geo import zero_one_encoding
from
helpers
import
parse_title_wiki
,
read_geonames
from
helpers
import
parse_title_wiki
,
read_geonames
from
gensim.models.keyedvectors
import
KeyedVectors
from
gensim.models.keyedvectors
import
KeyedVectors
from
sklearn.preprocessing
import
LabelEncoder
def
wc_l
(
filename
,
gzip
=
True
):
def
wc_l
(
filename
,
gzip
=
True
):
lc
=
0
lc
=
0
if
not
gzip
:
if
not
gzip
:
...
@@ -40,7 +43,9 @@ class DataSource(object):
...
@@ -40,7 +43,9 @@ class DataSource(object):
self
.
name
=
name
self
.
name
=
name
assert
os
.
path
.
exists
(
input_filename
)
assert
os
.
path
.
exists
(
input_filename
)
self
.
input_filename
=
input_filename
self
.
input_filename
=
input_filename
self
.
len
=
0
self
.
len
=
0
self
.
is_there_healpix
=
False
def
__next__
(
self
):
def
__next__
(
self
):
raise
NotImplementedError
()
raise
NotImplementedError
()
...
@@ -112,29 +117,6 @@ class Adjacency(DataSource):
...
@@ -112,29 +117,6 @@ class Adjacency(DataSource):
return
(
self
.
geonames_data_dict
[
self
.
topo
],
return
(
self
.
geonames_data_dict
[
self
.
topo
],
self
.
geonames_data_dict
[
self
.
context_topo_context
[
self
.
i
-
1
]],
self
.
geonames_data_dict
[
self
.
context_topo_context
[
self
.
i
-
1
]],
self
.
lat
,
self
.
lon
)
self
.
lat
,
self
.
lon
)
def
__nextv2__
(
self
):
if
self
.
i
>=
len
(
self
.
context_topo_context
):
line
=
self
.
data_src
.
readline
()
if
not
line
:
self
.
is_over
=
True
raise
StopIteration
line
=
line
.
decode
(
"
utf-8
"
).
rstrip
(
"
\n
"
)
geonameid
,
adjacent_geoname_id
,
latitude
,
longitude
=
tuple
(
line
.
split
(
"
,
"
))
self
.
topo
=
int
(
geonameid
)
self
.
context_topo_context
=
[
int
(
x
)
for
x
in
adjacent_geoname_id
.
split
(
"
|
"
)]
if
self
.
sampling
:
self
.
curr_probs
=
[
self
.
probs_storage
(
x
)
for
x
in
self
.
context_topo_context
]
self
.
context_topo_context
=
np
.
random
.
choice
(
self
.
context_topo_context
,
self
.
sampling
,
self
.
curr_probs
)
self
.
lat
,
self
.
lon
=
float
(
latitude
),
float
(
longitude
)
self
.
i
=
0
self
.
i
+=
1
return
(
self
.
topo
,
self
.
context_topo_context
[
self
.
i
-
1
],
self
.
lat
,
self
.
lon
)
def
__reset__
(
self
):
def
__reset__
(
self
):
if
not
self
.
gzip
:
if
not
self
.
gzip
:
...
@@ -193,40 +175,48 @@ class Inclusion(DataSource):
...
@@ -193,40 +175,48 @@ class Inclusion(DataSource):
return
(
self
.
i
==
self
.
len
)
return
(
self
.
i
==
self
.
len
)
from
sklearn.preprocessing
import
LabelEncoder
class
CoOccurrences
(
DataSource
):
class
CoOccurrences
(
DataSource
):
def
__init__
(
self
,
filename
,
label_encoder
,
sampling
=
3
):
def
__init__
(
self
,
filename
,
label_encoder
,
sampling
=
3
,
resolution
=
1
):
super
().
__init__
(
"
Co-Occurrence data
"
,
filename
)
super
().
__init__
(
"
Co-Occurrence data
"
,
filename
)
self
.
is_there_healpix
=
True
# LOAD DATA
try
:
try
:
self
.
data_src
=
pd
.
read_csv
(
filename
)
self
.
data_src
=
pd
.
read_csv
(
filename
)
except
:
except
:
self
.
data_src
=
pd
.
read_csv
(
filename
,
sep
=
"
\t
"
)
self
.
data_src
=
pd
.
read_csv
(
filename
,
sep
=
"
\t
"
)
# CHECK IF THE HEALPIX RESOLUTION DATA APPEARS IN THE DATA
if
not
"
healpix_{0}
"
.
format
(
resolution
)
in
self
.
data_src
.
columns
:
raise
KeyError
(
"
healpix_{0} column does not exists !
"
.
format
(
resolution
))
# PARSE TOPONYMS
self
.
data_src
[
"
title
"
]
=
self
.
data_src
.
title
.
apply
(
parse_title_wiki
)
self
.
data_src
[
"
title
"
]
=
self
.
data_src
.
title
.
apply
(
parse_title_wiki
)
try
:
try
:
self
.
data_src
[
"
interlinks
"
]
=
self
.
data_src
.
interlinks
.
apply
(
parse_title_wiki
)
self
.
data_src
[
"
interlinks
"
]
=
self
.
data_src
.
interlinks
.
apply
(
parse_title_wiki
)
except
:
except
:
pass
pass
# LOOP parameter
self
.
sampling
=
sampling
if
self
.
sampling
:
self
.
probs_storage
=
SamplingProbabilities
()
# LOOP INDICES
self
.
i
=
0
self
.
i
=
0
self
.
j
=
0
self
.
j
=
0
self
.
is_over
=
False
self
.
is_over
=
False
self
.
sampling
=
sampling
self
.
len
=
len
(
self
.
data_src
)
*
self
.
sampling
self
.
len
=
len
(
self
.
data_src
)
*
self
.
sampling
if
self
.
sampling
:
self
.
probs_storage
=
SamplingProbabilities
()
# BUFFER VARIABLE
self
.
topo
=
None
self
.
topo
=
None
self
.
context_topo_context
=
[]
self
.
context_topo_context
=
[]
self
.
curr_probs
=
None
self
.
curr_probs
=
None
self
.
lat
,
self
.
lon
=
None
,
None
self
.
lat
,
self
.
lon
=
None
,
None
self
.
resolution
=
64
#fixed for now
self
.
resolution
=
resolution
self
.
classes
=
self
.
data_src
[
"
healpix_{0}
"
.
format
(
self
.
resolution
)].
unique
().
tolist
()
self
.
classes
=
self
.
data_src
[
"
healpix_{0}
"
.
format
(
self
.
resolution
)].
unique
().
tolist
()
self
.
class_encoder
=
label_encoder
self
.
class_encoder
=
label_encoder
...
@@ -248,7 +238,9 @@ class CoOccurrences(DataSource):
...
@@ -248,7 +238,9 @@ class CoOccurrences(DataSource):
self
.
curr_probs
=
[
self
.
probs_storage
(
x
)
for
x
in
self
.
context_topo_context
]
self
.
curr_probs
=
[
self
.
probs_storage
(
x
)
for
x
in
self
.
context_topo_context
]
self
.
context_topo_context
=
np
.
random
.
choice
(
self
.
context_topo_context
,
self
.
sampling
,
self
.
curr_probs
)
self
.
context_topo_context
=
np
.
random
.
choice
(
self
.
context_topo_context
,
self
.
sampling
,
self
.
curr_probs
)
self
.
lat
,
self
.
lon
=
line
.
latitude
,
line
.
longitude
self
.
lat
,
self
.
lon
=
line
.
latitude
,
line
.
longitude
self
.
healpix
=
line
[
"
healpix_{0}
"
.
format
(
self
.
resolution
)]
self
.
healpix
=
line
[
"
healpix_{0}
"
.
format
(
self
.
resolution
)]
self
.
i
+=
1
self
.
i
+=
1
self
.
j
=
0
self
.
j
=
0
...
@@ -264,9 +256,6 @@ class CoOccurrences(DataSource):
...
@@ -264,9 +256,6 @@ class CoOccurrences(DataSource):
def
isOver
(
self
):
def
isOver
(
self
):
return
self
.
is_over
return
self
.
is_over
class
DataGenerator
(
keras
.
utils
.
Sequence
):
class
DataGenerator
(
keras
.
utils
.
Sequence
):
'
Generates data for Keras
'
'
Generates data for Keras
'
def
__init__
(
self
,
data_sources
,
ngram_index
,
class_encoder
,
**
kwargs
):
def
__init__
(
self
,
data_sources
,
ngram_index
,
class_encoder
,
**
kwargs
):
...
@@ -275,49 +264,68 @@ class DataGenerator(keras.utils.Sequence):
...
@@ -275,49 +264,68 @@ class DataGenerator(keras.utils.Sequence):
self
.
ngram_index
=
ngram_index
self
.
ngram_index
=
ngram_index
self
.
batch_size
=
kwargs
.
get
(
"
batch_size
"
,
1000
)
self
.
batch_size
=
kwargs
.
get
(
"
batch_size
"
,
1000
)
self
.
only_healpix
=
kwargs
.
get
(
"
only_healpix
"
,
False
)
self
.
len
=
sum
([
len
(
d
)
for
d
in
self
.
data_src
])
self
.
len
=
sum
([
len
(
d
)
for
d
in
self
.
data_src
])
self
.
datasrc_index
=
0
self
.
datasrc_index
=
0
self
.
num_classes
=
class_encoder
.
get_num_classes
()
self
.
num_classes
=
class_encoder
.
get_num_classes
()
#
self.
on_epoch_end()
self
.
is_there_healpix
=
self
.
data_src
[
self
.
datasrc_index
].
is_there_healpix
def
__len__
(
self
):
def
__len__
(
self
):
'
Denotes the number of batches per epoch
'
'
Denotes the number of batches per epoch
'
return
int
(
np
.
floor
(
self
.
len
/
self
.
batch_size
))
return
int
(
np
.
floor
(
self
.
len
/
self
.
batch_size
))
def
return_
(
self
,
X
,
y
,
y2
=
None
):
if
self
.
is_there_healpix
and
self
.
only_healpix
:
return
[
X
[:,
0
],
X
[:,
1
]],
y2
if
self
.
is_there_healpix
:
return
[
X
[:,
0
],
X
[:,
1
]],[
y
,
y2
]
else
:
return
[
X
[:,
0
],
X
[:,
1
]],
y
def
__getitem__
(
self
,
index
):
def
__getitem__
(
self
,
index
):
'
Generate one batch of data
'
'
Generate one batch of data
'
X
=
np
.
empty
((
self
.
batch_size
,
2
,
self
.
ngram_index
.
max_len
),
dtype
=
np
.
int32
)
# toponym
X
=
np
.
empty
((
self
.
batch_size
,
2
,
self
.
ngram_index
.
max_len
),
dtype
=
np
.
int32
)
# toponym
y
=
np
.
empty
((
self
.
batch_size
,
2
),
dtype
=
float
)
#lat lon coord
y
=
np
.
empty
((
self
.
batch_size
,
2
),
dtype
=
float
)
#lat lon coord
y2
=
np
.
empty
((
self
.
batch_size
,
self
.
num_classes
),
dtype
=
float
)
# healpix class
y2
=
None
# For healpix
if
self
.
is_there_healpix
:
y2
=
np
.
empty
((
self
.
batch_size
,
self
.
num_classes
),
dtype
=
float
)
# healpix class
if
self
.
data_src
[
self
.
datasrc_index
].
isOver
():
if
self
.
data_src
[
self
.
datasrc_index
].
isOver
():
self
.
datasrc_index
+=
1
self
.
datasrc_index
+=
1
self
.
is_there_healpix
=
self
.
data_src
[
self
.
datasrc_index
].
is_there_healpix
if
self
.
datasrc_index
>=
len
(
self
.
data_src
):
if
self
.
datasrc_index
>=
len
(
self
.
data_src
):
return
X
,
[
y
,
y2
]
self
.
return
_
(
X
,
y
,
y2
)
for
i
in
range
(
self
.
batch_size
):
for
i
in
range
(
self
.
batch_size
):
if
self
.
data_src
[
self
.
datasrc_index
].
isOver
():
if
self
.
data_src
[
self
.
datasrc_index
].
isOver
():
return
X
,
y
return
self
.
return_
(
X
,
y
,
y2
)
try
:
try
:
topo
,
topo_context
,
latitude
,
longitude
,
healpix_class
=
self
.
data_src
[
self
.
datasrc_index
].
__next__
()
topo
,
topo_context
,
latitude
,
longitude
,
healpix_class
=
self
.
data_src
[
self
.
datasrc_index
].
__next__
()
except
StopIteration
as
e
:
except
StopIteration
as
e
:
return
X
,
[
y
,
y2
]
return
self
.
return_
(
X
,
y
,
y2
)
X
[
i
]
=
[
self
.
ngram_index
.
encode
(
topo
),
self
.
ngram_index
.
encode
(
topo_context
)]
X
[
i
]
=
[
self
.
ngram_index
.
encode
(
topo
),
self
.
ngram_index
.
encode
(
topo_context
)]
y
[
i
]
=
[
*
zero_one_encoding
(
longitude
,
latitude
)]
y
[
i
]
=
[
*
zero_one_encoding
(
longitude
,
latitude
)]
y2
[
i
]
=
to_categorical
(
healpix_class
,
num_classes
=
self
.
num_classes
,
dtype
=
'
int32
'
if
self
.
is_there_healpix
:
y2
[
i
]
=
to_categorical
(
healpix_class
,
num_classes
=
self
.
num_classes
,
dtype
=
'
int32
'
)
)
#y[i] = [longitude,latitude]
#y[i] = [longitude,latitude]
return
[
X
[:,
0
],
X
[:,
1
]],
[
y
,
y2
]
#[y[:,0],y[:,1]]
return
self
.
return_
(
X
,
y
,
y2
)
def
on_epoch_end
(
self
):
def
on_epoch_end
(
self
):
'
Updates indexes after each epoch
'
'
Updates indexes after each epoch
'
[
d
.
__reset__
()
for
d
in
self
.
data_src
]
[
d
.
__reset__
()
for
d
in
self
.
data_src
]
self
.
datasrc_index
=
0
self
.
datasrc_index
=
0
def
load_embedding
(
model_fn
,
dim_vector
=
100
):
def
load_embedding
(
model_fn
,
dim_vector
=
100
):
model
=
KeyedVectors
.
load
(
model_fn
)
model
=
KeyedVectors
.
load
(
model_fn
)
...
...
This diff is collapsed.
Click to expand it.
region_embedding.py
0 → 100644
+
199
−
0
View file @
8b047924
# Base module
import
os
# Structure
import
pandas
as
pd
# DEEPL module
from
keras.layers
import
Dense
,
Input
,
Embedding
,
concatenate
,
Bidirectional
,
LSTM
,
Dropout
from
keras.models
import
Model
from
keras.callbacks
import
ModelCheckpoint
from
tensorflow.keras.layers
import
Lambda
import
keras.backend
as
K
import
tensorflow
as
tf
from
lib.custom_layer
import
*
# Custom module
from
lib.ngram_index
import
NgramIndex
from
lib.utils
import
ConfigurationReader
,
MetaDataSerializer
,
LabelEncoder
from
lib.metrics
import
lat_accuracy
,
lon_accuracy
from
lib.data_generator
import
DataGenerator
,
CoOccurrences
,
load_embedding
,
Inclusion
,
Adjacency
from
lib.geo
import
haversine_tf
,
accuracy_k
,
haversine_tf_1circle
# Logging
import
logging
logging
.
getLogger
(
'
gensim
'
).
setLevel
(
logging
.
WARNING
)
from
helpers
import
EpochTimer
# LOGGING CONF
logging
.
basicConfig
(
format
=
'
[%(asctime)s][%(levelname)s] %(message)s
'
,
datefmt
=
'
%m/%d/%Y %I:%M:%S %p
'
,
level
=
logging
.
INFO
)
args
=
ConfigurationReader
(
"
./parser_config/toponym_combination_embedding_v2.json
"
)
\
.
parse_args
()
#("-i --inclusion-fn ../data/geonamesData/hierarchy.txt ../data/geonamesData/allCountries.txt ../data/embeddings/word2vec4gram/4gramWiki+geonames_index.json ../data/embeddings/word2vec4gram/embedding4gramWiki+Geonames.bin".split())
#.parse_args("-w --wikipedia-cooc-fn subsetCoocALLv2.csv ../data/geonamesData/allCountries.txt ../data/embeddings/word2vec4gram/4gramWiki+geonames_index.json ../data/embeddings/word2vec4gram/embedding4gramWiki+Geonames.bin".split())
#
#################################################
############# MODEL TRAINING PARAMETER ##########
#################################################
NGRAM_SIZE
=
args
.
ngram_size
ACCURACY_TOLERANCE
=
args
.
k_value
EPOCHS
=
args
.
epochs
ADJACENCY_SAMPLING
=
args
.
adjacency_sample
COOC_SAMPLING
=
args
.
cooc_sample
WORDVEC_ITER
=
50
EMBEDDING_DIM
=
args
.
dimension
BATCH_SIZE
=
args
.
batch_size
#################################################
########## FILENAME VARIABLE ####################
#################################################
# check for output dir
if
not
os
.
path
.
exists
(
"
outputs/
"
):
os
.
makedirs
(
"
outputs/
"
)
GEONAME_FN
=
args
.
geoname_input
DATASET_NAME
=
args
.
geoname_input
.
split
(
"
/
"
)[
-
1
]
GEONAMES_HIERARCHY_FN
=
args
.
inclusion_fn
ADJACENCY_REL_FILENAME
=
args
.
adjacency_fn
COOC_FN
=
args
.
wikipedia_cooc_fn
PREFIX_OUTPUT_FN
=
"
REGION_{0}_{1}_{2}_{3}
"
.
format
(
GEONAME_FN
.
split
(
"
/
"
)[
-
1
],
EPOCHS
,
NGRAM_SIZE
,
ACCURACY_TOLERANCE
)
REL_CODE
=
""
if
args
.
adjacency
:
PREFIX_OUTPUT_FN
+=
"
_A
"
REL_CODE
+=
"
A
"
if
args
.
inclusion
:
PREFIX_OUTPUT_FN
+=
"
_I
"
REL_CODE
+=
"
I
"
if
args
.
wikipedia_cooc
:
PREFIX_OUTPUT_FN
+=
"
_C
"
REL_CODE
+=
"
C
"
MODEL_OUTPUT_FN
=
"
outputs/{0}.h5
"
.
format
(
PREFIX_OUTPUT_FN
)
INDEX_FN
=
"
outputs/{0}_index
"
.
format
(
PREFIX_OUTPUT_FN
)
HISTORY_FN
=
"
outputs/{0}.csv
"
.
format
(
PREFIX_OUTPUT_FN
)
meta_data
=
MetaDataSerializer
(
DATASET_NAME
,
REL_CODE
,
COOC_SAMPLING
,
ADJACENCY_SAMPLING
,
NGRAM_SIZE
,
ACCURACY_TOLERANCE
,
EPOCHS
,
EMBEDDING_DIM
,
WORDVEC_ITER
,
INDEX_FN
,
MODEL_OUTPUT_FN
,
HISTORY_FN
)
meta_data
.
save
(
"
outputs/{0}.json
"
.
format
(
PREFIX_OUTPUT_FN
))
### PUT DATASRC + GENERATOR
index
=
NgramIndex
.
load
(
args
.
ngram_index_fn
)
train_src
=
[]
test_src
=
[]
class_encoder
=
LabelEncoder
()
if
args
.
wikipedia_cooc
:
train_src
.
append
(
CoOccurrences
(
COOC_FN
+
"
_train.csv
"
,
class_encoder
,
sampling
=
4
))
test_src
.
append
(
CoOccurrences
(
COOC_FN
+
"
_test.csv
"
,
class_encoder
,
sampling
=
4
))
if
args
.
adjacency
:
a_train
=
Adjacency
(
ADJACENCY_REL_FILENAME
+
"
_train.csv
"
,
GEONAME_FN
,
sampling
=
ADJACENCY_SAMPLING
,
gzip
=
False
)
a_test
=
Adjacency
(
ADJACENCY_REL_FILENAME
+
"
_test.csv
"
,
GEONAME_FN
,
sampling
=
ADJACENCY_SAMPLING
,
gzip
=
False
)
train_src
.
append
(
a_train
)
test_src
.
append
(
a_test
)
if
args
.
inclusion
:
i_train
=
Inclusion
(
GEONAME_FN
,
GEONAMES_HIERARCHY_FN
+
"
_train.csv
"
)
i_test
=
Inclusion
(
GEONAME_FN
,
GEONAMES_HIERARCHY_FN
+
"
_test.csv
"
)
train_src
.
append
(
i_train
)
test_src
.
append
(
i_test
)
#Adjacency
d_train
=
DataGenerator
(
train_src
,
index
,
class_encoder
,
batch_size
=
BATCH_SIZE
,
only_healpix
=
True
)
d_test
=
DataGenerator
(
test_src
,
index
,
class_encoder
,
batch_size
=
BATCH_SIZE
,
only_healpix
=
True
)
num_words
=
len
(
index
.
index_ngram
)
#############################################################################################
################################# NGRAM EMBEDDINGS ##########################################
#############################################################################################
embedding_weights
=
load_embedding
(
args
.
embedding_fn
)
#############################################################################################
################################# MODEL DEFINITION ##########################################
#############################################################################################
from
keras
import
regularizers
input_1
=
Input
(
shape
=
(
index
.
max_len
,))
input_2
=
Input
(
shape
=
(
index
.
max_len
,))
embedding_layer
=
Embedding
(
num_words
,
EMBEDDING_DIM
,
input_length
=
index
.
max_len
,
trainable
=
False
)
#, trainable=True)
x1
=
embedding_layer
(
input_1
)
x2
=
embedding_layer
(
input_2
)
# Each LSTM learn on a permutation of the input toponyms
biLSTM
=
Bidirectional
(
LSTM
(
32
,
activation
=
"
pentanh
"
,
recurrent_activation
=
"
pentanh
"
))
x1
=
biLSTM
(
x1
)
x2
=
biLSTM
(
x2
)
x
=
concatenate
([
x1
,
x2
])
#,x3])
#x = Dense(class_encoder.get_num_classes()*2,activation="relu")(x)
aux_layer
=
Dense
(
class_encoder
.
get_num_classes
(),
activation
=
"
softmax
"
,
name
=
"
aux_layer
"
)(
x
)
model
=
Model
(
inputs
=
[
input_1
,
input_2
],
outputs
=
aux_layer
)
#input_3
model
.
compile
(
loss
=
{
"
aux_layer
"
:
"
categorical_crossentropy
"
},
optimizer
=
'
adam
'
,
metrics
=
{
"
aux_layer
"
:
"
accuracy
"
})
#############################################################################################
################################# TRAINING LAUNCH ###########################################
#############################################################################################
checkpoint
=
ModelCheckpoint
(
MODEL_OUTPUT_FN
+
"
.part
"
,
monitor
=
'
loss
'
,
verbose
=
1
,
save_best_only
=
True
,
mode
=
'
auto
'
,
period
=
1
)
epoch_timer
=
EpochTimer
(
"
outputs/
"
+
PREFIX_OUTPUT_FN
+
"
_epoch_timer_output.csv
"
)
history
=
model
.
fit_generator
(
generator
=
d_train
,
validation_data
=
d_test
,
verbose
=
True
,
epochs
=
EPOCHS
,
callbacks
=
[
checkpoint
,
epoch_timer
])
hist_df
=
pd
.
DataFrame
(
history
.
history
)
hist_df
.
to_csv
(
HISTORY_FN
)
model
.
save
(
MODEL_OUTPUT_FN
)
# Erase Model Checkpoint file
if
os
.
path
.
exists
(
MODEL_OUTPUT_FN
+
"
.part
"
):
os
.
remove
(
MODEL_OUTPUT_FN
+
"
.part
"
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
scripts/gethealpix.py
+
1
−
0
View file @
8b047924
...
@@ -27,5 +27,6 @@ df = pd.read_csv(args.input_file,sep="\t")
...
@@ -27,5 +27,6 @@ df = pd.read_csv(args.input_file,sep="\t")
df
[
"
healpix_256
"
]
=
df
.
progress_apply
(
lambda
row
:
latlon2healpix
(
lat
=
row
.
latitude
,
lon
=
row
.
longitude
,
res
=
256
),
axis
=
1
)
df
[
"
healpix_256
"
]
=
df
.
progress_apply
(
lambda
row
:
latlon2healpix
(
lat
=
row
.
latitude
,
lon
=
row
.
longitude
,
res
=
256
),
axis
=
1
)
df
[
"
healpix_64
"
]
=
df
.
progress_apply
(
lambda
row
:
latlon2healpix
(
lat
=
row
.
latitude
,
lon
=
row
.
longitude
,
res
=
64
),
axis
=
1
)
df
[
"
healpix_64
"
]
=
df
.
progress_apply
(
lambda
row
:
latlon2healpix
(
lat
=
row
.
latitude
,
lon
=
row
.
longitude
,
res
=
64
),
axis
=
1
)
df
[
"
healpix_32
"
]
=
df
.
progress_apply
(
lambda
row
:
latlon2healpix
(
lat
=
row
.
latitude
,
lon
=
row
.
longitude
,
res
=
32
),
axis
=
1
)
df
[
"
healpix_32
"
]
=
df
.
progress_apply
(
lambda
row
:
latlon2healpix
(
lat
=
row
.
latitude
,
lon
=
row
.
longitude
,
res
=
32
),
axis
=
1
)
df
[
"
healpix_1
"
]
=
df
.
progress_apply
(
lambda
row
:
latlon2healpix
(
lat
=
row
.
latitude
,
lon
=
row
.
longitude
,
res
=
1
),
axis
=
1
)
df
.
to_csv
(
args
.
output_file
,
sep
=
"
\t
"
,
index
=
False
)
df
.
to_csv
(
args
.
output_file
,
sep
=
"
\t
"
,
index
=
False
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment