Skip to content
Snippets Groups Projects
Commit 5ea67e5e authored by Jacques Fize's avatar Jacques Fize
Browse files

add new version of the model. NEW : loss is based on heversine distance

parent 810b1dcf
No related branches found
No related tags found
No related merge requests found
...@@ -25,6 +25,8 @@ from lib.geo import Grid,zero_one_encoding, get_adjacency_rels, get_geonames_inc ...@@ -25,6 +25,8 @@ from lib.geo import Grid,zero_one_encoding, get_adjacency_rels, get_geonames_inc
from lib.ngram_index import NgramIndex from lib.ngram_index import NgramIndex
from lib.utils import ConfigurationReader from lib.utils import ConfigurationReader
from lib.metrics import lat_accuracy,lon_accuracy from lib.metrics import lat_accuracy,lon_accuracy
from lib.geo import haversine_tf,accuracy_k,haversine_tf_1circle
# Logging # Logging
from tqdm import tqdm from tqdm import tqdm
...@@ -300,11 +302,13 @@ X_1_train = np.array(X_1_train) ...@@ -300,11 +302,13 @@ X_1_train = np.array(X_1_train)
X_2_train = np.array(X_2_train) X_2_train = np.array(X_2_train)
y_lat_train = np.array(y_lat_train) y_lat_train = np.array(y_lat_train)
y_lon_train = np.array(y_lon_train) y_lon_train = np.array(y_lon_train)
y_train = np.array(y_train)
X_1_test = np.array(X_1_test) X_1_test = np.array(X_1_test)
X_2_test = np.array(X_2_test) X_2_test = np.array(X_2_test)
y_lat_test = np.array(y_lat_test) y_lat_test = np.array(y_lat_test)
y_lon_test = np.array(y_lon_test) y_lon_test = np.array(y_lon_test)
y_test = np.array(y_test)
logging.info("Data prepared !") logging.info("Data prepared !")
...@@ -354,6 +358,8 @@ x2 = Dense(500,activation="relu")(x2) ...@@ -354,6 +358,8 @@ x2 = Dense(500,activation="relu")(x2)
output_lon = Dense(1,activation="sigmoid",name="Output_LON")(x1) output_lon = Dense(1,activation="sigmoid",name="Output_LON")(x1)
output_lat = Dense(1,activation="sigmoid",name="Output_LAT")(x2) output_lat = Dense(1,activation="sigmoid",name="Output_LAT")(x2)
model = Model(inputs = [input_1,input_2], outputs = [output_lon,output_lat])#input_3 model = Model(inputs = [input_1,input_2], outputs = [output_lon,output_lat])#input_3
model.compile(loss=['mean_squared_error','mean_squared_error'], optimizer='adam',metrics={"Output_LON":lon_accuracy(),"Output_LAT":lat_accuracy()}) model.compile(loss=['mean_squared_error','mean_squared_error'], optimizer='adam',metrics={"Output_LON":lon_accuracy(),"Output_LAT":lat_accuracy()})
......
# Base module # Base module
import re
import os import os
import json
# Structure # Structure
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import geopandas as gpd
# DEEPL module # DEEPL module
from keras.layers import Dense, Input, Embedding,concatenate,Bidirectional,LSTM,Dropout,GRU from keras.layers import Dense, Input, Embedding,concatenate,Bidirectional,LSTM, Dropout
from keras.models import Model from keras.models import Model
from keras import backend as K
from keras.callbacks import ModelCheckpoint from keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Lambda
import keras.backend as K import tensorflow as tf
import tensorflow as tf
from lib.custom_layer import * # Geometry
from shapely.geometry import Point
# Custom module # Custom module
from helpers import read_geonames
from lib.geo import Grid,zero_one_encoding, get_adjacency_rels, get_geonames_inclusion_rel,get_bounds
from lib.ngram_index import NgramIndex from lib.ngram_index import NgramIndex
from lib.utils import ConfigurationReader, MetaDataSerializer,LabelEncoder from lib.utils import ConfigurationReader
from lib.metrics import lat_accuracy,lon_accuracy from lib.metrics import lat_accuracy,lon_accuracy
from lib.data_generator import DataGenerator,CoOccurrences,load_embedding,Inclusion,Adjacency
from lib.geo import haversine_tf,accuracy_k,haversine_tf_1circle from lib.geo import haversine_tf,accuracy_k,haversine_tf_1circle
# Logging # Logging
from tqdm import tqdm
import logging import logging
from helpers import parse_title_wiki,EpochTimer
logging.getLogger('gensim').setLevel(logging.WARNING) logging.getLogger('gensim').setLevel(logging.WARNING)
from helpers import EpochTimer def get_new_ids(cooc_data,id_first_value):
"""
Return new ids from cooccurrence data
Parameters
----------
cooc_data : pd.DataFrame
cooccurrence da
id_first_value : int
id beginning value
Returns
-------
dict
new ids for each toponyms
"""
topo_id = {}
id_ = id_first_value
for title in cooc_data.title.values:
if not title in topo_id:
id_+=1
topo_id[id_]=title
for interlinks in cooc_data.interlinks.values:
for interlink in interlinks.split("|"):
if not interlink in topo_id:
id_+=1
topo_id[id_]=interlink
return topo_id
# LOGGING CONF # LOGGING CONF
logging.basicConfig( logging.basicConfig(
...@@ -35,41 +71,40 @@ logging.basicConfig( ...@@ -35,41 +71,40 @@ logging.basicConfig(
level=logging.INFO level=logging.INFO
) )
args = ConfigurationReader("./parser_config/toponym_combination_embedding_v2.json")\ args = ConfigurationReader("./parser_config/toponym_combination_embedding.json")\
.parse_args()#("-i --inclusion-fn ../data/geonamesData/hierarchy.txt ../data/geonamesData/allCountries.txt ../data/embeddings/word2vec4gram/4gramWiki+geonames_index.json ../data/embeddings/word2vec4gram/embedding4gramWiki+Geonames.bin".split()) .parse_args()#("-w --wikipedia-cooc-fn subsetCoocALL.csv ../data/geonamesData/allCountries.txt ../data/geonamesData/hierarchy.txt".split())
#.parse_args("-w --wikipedia-cooc-fn subsetCoocALLv2.csv ../data/geonamesData/allCountries.txt ../data/embeddings/word2vec4gram/4gramWiki+geonames_index.json ../data/embeddings/word2vec4gram/embedding4gramWiki+Geonames.bin".split())
# #
################################################# #################################################
############# MODEL TRAINING PARAMETER ########## ############# MODEL TRAINING PARAMETER ##########
################################################# #################################################
MODEL_NAME = "Bi-LSTM_NGRAM"
NGRAM_SIZE = args.ngram_size NGRAM_SIZE = args.ngram_size
ACCURACY_TOLERANCE = args.k_value ACCURACY_TOLERANCE = args.tolerance_value
EPOCHS = args.epochs EPOCHS = args.epochs
ADJACENCY_SAMPLING = args.adjacency_sample ITER_ADJACENCY = args.adjacency_iteration
COOC_SAMPLING = args.cooc_sample COOC_SAMPLING_NUMBER = args.cooc_sample_size
WORDVEC_ITER = 50 WORDVEC_ITER = args.ngram_word2vec_iter
EMBEDDING_DIM = args.dimension EMBEDDING_DIM = 256
BATCH_SIZE = args.batch_size
################################################# #################################################
########## FILENAME VARIABLE #################### ########## FILENAME VARIABLE ####################
################################################# #################################################
# check for output dir
if not os.path.exists("outputs/"):
os.makedirs("outputs/")
GEONAME_FN = args.geoname_input GEONAME_FN = args.geoname_input
DATASET_NAME = args.geoname_input.split("/")[-1] DATASET_NAME = args.geoname_input.split("/")[-1]
GEONAMES_HIERARCHY_FN = args.inclusion_fn GEONAMES_HIERARCHY_FN = args.geoname_hierachy_input
ADJACENCY_REL_FILENAME = args.adjacency_fn REGION_SUFFIX_FN = "" if args.admin_code_1 == "None" else "_" + args.admin_code_1
COOC_FN = args.wikipedia_cooc_fn ADJACENCY_REL_FILENAME = "{0}_{1}{2}adjacency.json".format(
GEONAME_FN,
ITER_ADJACENCY,
REGION_SUFFIX_FN)
PREFIX_OUTPUT_FN = "{0}_{1}_{2}_{3}".format( COOC_FN = args.wikipedia_cooc_fn
PREFIX_OUTPUT_FN = "{0}_{1}_{2}_{3}_{4}".format(
GEONAME_FN.split("/")[-1], GEONAME_FN.split("/")[-1],
EPOCHS, EPOCHS,
NGRAM_SIZE, NGRAM_SIZE,
ACCURACY_TOLERANCE) ACCURACY_TOLERANCE,
REGION_SUFFIX_FN)
REL_CODE="" REL_CODE=""
if args.adjacency: if args.adjacency:
...@@ -86,12 +121,14 @@ MODEL_OUTPUT_FN = "outputs/{0}.h5".format(PREFIX_OUTPUT_FN) ...@@ -86,12 +121,14 @@ MODEL_OUTPUT_FN = "outputs/{0}.h5".format(PREFIX_OUTPUT_FN)
INDEX_FN = "outputs/{0}_index".format(PREFIX_OUTPUT_FN) INDEX_FN = "outputs/{0}_index".format(PREFIX_OUTPUT_FN)
HISTORY_FN = "outputs/{0}.csv".format(PREFIX_OUTPUT_FN) HISTORY_FN = "outputs/{0}.csv".format(PREFIX_OUTPUT_FN)
from lib.utils import MetaDataSerializer
meta_data = MetaDataSerializer( meta_data = MetaDataSerializer(
MODEL_NAME,
DATASET_NAME, DATASET_NAME,
REL_CODE, REL_CODE,
COOC_SAMPLING, COOC_SAMPLING_NUMBER,
ADJACENCY_SAMPLING, ITER_ADJACENCY,
NGRAM_SIZE, NGRAM_SIZE,
ACCURACY_TOLERANCE, ACCURACY_TOLERANCE,
EPOCHS, EPOCHS,
...@@ -103,92 +140,238 @@ meta_data = MetaDataSerializer( ...@@ -103,92 +140,238 @@ meta_data = MetaDataSerializer(
) )
meta_data.save("outputs/{0}.json".format(PREFIX_OUTPUT_FN)) meta_data.save("outputs/{0}.json".format(PREFIX_OUTPUT_FN))
#############################################################################################
################################# LOAD DATA #################################################
#############################################################################################
### PUT DATASRC + GENERATOR # LOAD Geonames DATA
logging.info("Load Geonames data...")
geoname_data = read_geonames(GEONAME_FN).fillna("")
index = NgramIndex.load(args.ngram_index_fn) train_indices = set(pd.read_csv(GEONAME_FN+"_train.csv").geonameid.values)
test_indices = set(pd.read_csv(GEONAME_FN+"_test.csv").geonameid.values)
train_src = [] logging.info("Geonames data loaded!")
test_src = []
class_encoder = LabelEncoder() # SELECT ENTRY with class == to A and P (Areas and Populated Places)
filtered = geoname_data[geoname_data.feature_class.isin("A P".split())].copy() # Only take area and populated places
#CLEAR RAM
del geoname_data
if args.wikipedia_cooc:
train_src.append(CoOccurrences(COOC_FN + "_train.csv",class_encoder,sampling=4,use_healpix=False))
test_src.append(CoOccurrences(COOC_FN + "_test.csv",class_encoder,sampling=4,use_healpix=False))
# IF REGION
if args.admin_code_1 != "None":
filtered = filtered[filtered.admin1_code == args.admin_code_1].copy()
# GET BOUNDS AND REDUCE DATA AVAILABLE FIELDS
filtered = filtered["geonameid name longitude latitude".split()] # KEEP ONLY ID LABEL AND COORD
#############################################################################################
################################# RETRIEVE RELATIONSHIPS ####################################
#############################################################################################
# INITIALIZE RELATION STORE
rel_store = []
# Retrieve adjacency relationships
if args.adjacency: if args.adjacency:
a_train = Adjacency(ADJACENCY_REL_FILENAME + "_train.csv",GEONAME_FN,sampling=ADJACENCY_SAMPLING,gzip=False) logging.info("Retrieve adjacency relationships ! ")
a_test = Adjacency(ADJACENCY_REL_FILENAME + "_test.csv",GEONAME_FN,sampling=ADJACENCY_SAMPLING,gzip=False)
train_src.append(a_train) if not os.path.exists(ADJACENCY_REL_FILENAME):
test_src.append(a_test) bounds = get_bounds(filtered) # Required to get adjacency relationships
rel_store.extend(get_adjacency_rels(filtered,bounds,[360,180],ITER_ADJACENCY))
json.dump(rel_store,open(ADJACENCY_REL_FILENAME,'w'))
else:
logging.info("Open and load data from previous computation!")
rel_store=json.load(open(ADJACENCY_REL_FILENAME))
logging.info("{0} adjacency relationships retrieved ! ".format(len(rel_store)))
# Retrieve inclusion relationships
if args.inclusion: if args.inclusion:
i_train = Inclusion(GEONAME_FN,GEONAMES_HIERARCHY_FN+"_train.csv") logging.info("Retrieve inclusion relationships ! ")
i_test = Inclusion(GEONAME_FN,GEONAMES_HIERARCHY_FN+"_test.csv")
train_src.append(i_train) cpt_rel = len(rel_store)
test_src.append(i_test) rel_store.extend(get_geonames_inclusion_rel(filtered,GEONAMES_HIERARCHY_FN))
#Adjacency
print("Number of classes:",class_encoder.get_num_classes()) logging.info("{0} inclusion relationships retrieved ! ".format(len(rel_store)-cpt_rel))
d_train = DataGenerator(train_src,index,class_encoder,batch_size=BATCH_SIZE)
d_test = DataGenerator(test_src,index,class_encoder,batch_size=BATCH_SIZE)
num_words = len(index.index_ngram)
if args.wikipedia_cooc:
logging.info("Load Wikipedia Cooccurrence data and merge with geonames")
cooc_data = pd.read_csv(COOC_FN,sep="\t")
cooc_data["title"] = cooc_data.title.apply(parse_title_wiki)
cooc_data["interlinks"] = cooc_data.interlinks.apply(parse_title_wiki)
id_wikipediatitle = get_new_ids(cooc_data,filtered.geonameid.max())
wikipediatitle_id = {v:k for k,v in id_wikipediatitle.items()}
title_coord = {row.title: (row.longitude,row.latitude) for _,row in tqdm(cooc_data.iterrows(),total=len(cooc_data))}
cooc_data["geonameid"] = cooc_data.title.apply(lambda x: wikipediatitle_id[x])
filtered = pd.concat((filtered,cooc_data["geonameid title longitude latitude".split()].rename(columns={"title":"name"}).copy()))
train_cooc_indices,test_cooc_indices = pd.read_csv(COOC_FN+"_train.csv",sep="\t"), pd.read_csv(COOC_FN+"_test.csv",sep="\t")
if not "title" in train_cooc_indices:
train_cooc_indices,test_cooc_indices = pd.read_csv(COOC_FN+"_train.csv"), pd.read_csv(COOC_FN+"_test.csv")
train_indices = train_indices.union(set(train_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]).values))
test_indices = test_indices.union(set(test_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]).values))
logging.info("Merged with Geonames data !")
# EXTRACT rel
logging.info("Extracting cooccurrence relationships")
cpt=0
for ix, row in tqdm(cooc_data.iterrows(),total=len(cooc_data),desc="Extracting Wikipedia Cooccurrence"):
for inter in np.random.choice(row.interlinks.split("|"),COOC_SAMPLING_NUMBER):
cpt+=1
rel_store.extend([[row.geonameid,wikipediatitle_id[inter]]])
logging.info("Extract {0} cooccurrence relationships !".format(cpt))
# STORE ID to name
geoname2name = dict(filtered["geonameid name".split()].values)
# ENCODING NAME USING N-GRAM SPLITTING
logging.info("Encoding toponyms to ngram...")
index = NgramIndex(NGRAM_SIZE)
# Identify all ngram available
filtered.name.apply(lambda x : index.split_and_add(x))
if args.wikipedia_cooc:[index.split_and_add(k) for k in wikipediatitle_id]
geoname2encodedname = {row.geonameid : index.encode(row.name) for row in filtered.itertuples()} #init a dict with the 'geonameid' --> 'encoded toponym' association
if args.wikipedia_cooc:
geoname2encodedname.update({v:index.encode(k) for k,v in wikipediatitle_id.items()})
# SAVE THE INDEX TO REUSE THE MODEL
index.save(INDEX_FN)
logging.info("Done !")
#############################################################################################
################################# ENCODE COORDINATES ########################################
#############################################################################################
# Encode each geonames entry coordinates
geoname_vec = {row.geonameid : zero_one_encoding(row.longitude,row.latitude) for row in filtered.itertuples()}
# CLEAR RAM
del filtered
EMBEDDING_DIM = 256
num_words = len(index.index_ngram) # necessary for the embedding matrix
logging.info("Preparing Input and Output data...")
#############################################################################################
################################# BUILD TRAIN/TEST DATASETS #################################
#############################################################################################
X_1_train,X_2_train,y_lat_train,y_lon_train=[],[],[],[]
X_1_test,X_2_test,y_lat_test,y_lon_test=[],[],[],[]
y_train,y_test = [],[]
for couple in rel_store:
geonameId_1,geonameId_2 = couple[0],couple[1]
if not geonameId_1 in geoname2encodedname:
continue
top1,top2 = geoname2encodedname[geonameId_1],geoname2encodedname[geonameId_2]
if geonameId_1 in train_indices: #and geonameId_2 in train_indices:
X_1_train.append(top1)
X_2_train.append(top2)
y_train.append([geoname_vec[geonameId_1][0],geoname_vec[geonameId_1][1]])
#y_lon_train.append(geoname_vec[geonameId_1][0])
#y_lat_train.append(geoname_vec[geonameId_1][1])
else:
X_1_test.append(top1)
X_2_test.append(top2)
y_test.append([geoname_vec[geonameId_1][0],geoname_vec[geonameId_1][1]])
#y_lon_test.append(geoname_vec[geonameId_1][0])
#y_lat_test.append(geoname_vec[geonameId_1][1])
# NUMPYZE inputs and output lists
X_1_train = np.array(X_1_train)
X_2_train = np.array(X_2_train)
y_lat_train = np.array(y_lat_train)
y_lon_train = np.array(y_lon_train)
y_train = np.array(y_train)
X_1_test = np.array(X_1_test)
X_2_test = np.array(X_2_test)
y_lat_test = np.array(y_lat_test)
y_lon_test = np.array(y_lon_test)
y_test = np.array(y_test)
logging.info("Data prepared !")
# check for output dir
if not os.path.exists("outputs/"):
os.makedirs("outputs/")
############################################################################################# #############################################################################################
################################# NGRAM EMBEDDINGS ########################################## ################################# NGRAM EMBEDDINGS ##########################################
############################################################################################# #############################################################################################
embedding_weights = load_embedding(args.embedding_fn)
EMBEDDING_DIM = len(embedding_weights[0]) logging.info("Generating N-GRAM Embedding...")
embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= EMBEDDING_DIM,iter=WORDVEC_ITER)
logging.info("Embedding generated !")
############################################################################################# #############################################################################################
################################# MODEL DEFINITION ########################################## ################################# MODEL DEFINITION ##########################################
############################################################################################# #############################################################################################
from keras import regularizers
####
input_1 = Input(shape=(index.max_len,)) input_1 = Input(shape=(index.max_len,))
input_2 = Input(shape=(index.max_len,)) input_2 = Input(shape=(index.max_len,))
embedding_layer = Embedding(num_words, EMBEDDING_DIM,input_length=index.max_len,trainable=False)#, trainable=True) embedding_layer = Embedding(num_words, EMBEDDING_DIM,input_length=index.max_len,weights=[embedding_weights],trainable=False)#, trainable=True)
x1 = embedding_layer(input_1) x1 = embedding_layer(input_1)
x2 = embedding_layer(input_2) x2 = embedding_layer(input_2)
# Each LSTM learn on a permutation of the input toponyms # Each LSTM learn on a permutation of the input toponyms
biLSTM = Bidirectional(GRU(128,activation="pentanh", recurrent_activation="pentanh")) x1 = Bidirectional(LSTM(98))(x1)
x1 = biLSTM(x1) x2 = Bidirectional(LSTM(98))(x2)
x2 = biLSTM(x2)
x = concatenate([x1,x2])#,x3]) x = concatenate([x1,x2])#,x3])
x1 = Dense(500,activation="relu")(x) x1 = Dense(500,activation="relu")(x)
x1 = Dropout(0.3)(x1) # x1 = Dropout(0.3)(x1)
x1 = Dense(500,activation="relu")(x1) x1 = Dense(500,activation="relu")(x1)
x1 = Dropout(0.3)(x1) # x1 = Dropout(0.3)(x1)
x2 = Dense(500,activation="relu")(x) x2 = Dense(500,activation="relu")(x)
x2 = Dropout(0.3)(x2) # x2 = Dropout(0.3)(x2)
x2 = Dense(500,activation="relu")(x2) x2 = Dense(500,activation="relu")(x2)
x2 = Dropout(0.3)(x2) # x2 = Dropout(0.3)(x2)
#aux_layer = Dense(class_encoder.get_num_classes(),activation="softmax",name="aux_layer")(D)
output_lon = Dense(1,activation="sigmoid")(x1) output_lon = Dense(1,activation="sigmoid",name="Output_LON")(x1)
output_lat = Dense(1,activation="sigmoid")(x2) output_lat = Dense(1,activation="sigmoid",name="Output_LAT")(x2)
output_coord = concatenate([output_lon,output_lat],name="output_coord") output_coord = concatenate([output_lon,output_lat],name="output_coord")
#####
model = Model(inputs = [input_1,input_2], outputs = output_coord)#input_3 model = Model(inputs = [input_1,input_2], outputs = output_coord)#input_3
model.compile(loss={"output_coord":haversine_tf_1circle}, optimizer='adam',metrics={"output_coord":accuracy_k(ACCURACY_TOLERANCE)}) model.compile(loss={"output_coord":haversine_tf_1circle}, optimizer='adam',metrics={"output_coord":accuracy_k(ACCURACY_TOLERANCE)})
model.summary() # model = Model(inputs = [input_1,input_2], outputs = [output_lon,output_lat])#input_3
# model.compile(loss=['mean_squared_error','mean_squared_error'], optimizer='adam',metrics={"Output_LON":lon_accuracy(),"Output_LAT":lat_accuracy()})
############################################################################################# #############################################################################################
################################# TRAINING LAUNCH ########################################### ################################# TRAINING LAUNCH ###########################################
############################################################################################# #############################################################################################
...@@ -199,10 +382,11 @@ checkpoint = ModelCheckpoint(MODEL_OUTPUT_FN + ".part", monitor='loss', verbose= ...@@ -199,10 +382,11 @@ checkpoint = ModelCheckpoint(MODEL_OUTPUT_FN + ".part", monitor='loss', verbose=
epoch_timer = EpochTimer("outputs/"+PREFIX_OUTPUT_FN+"_epoch_timer_output.csv") epoch_timer = EpochTimer("outputs/"+PREFIX_OUTPUT_FN+"_epoch_timer_output.csv")
history = model.fit_generator(generator=d_train, history = model.fit(x=[X_1_train,X_2_train],
validation_data=d_test, y=y_train,#[y_lon_train,y_lat_train],
verbose=True, verbose=True, batch_size=100,
epochs=EPOCHS, epochs=EPOCHS,
validation_data=([X_1_test,X_2_test],y_test),#[y_lon_test,y_lat_test]),
callbacks=[checkpoint,epoch_timer]) callbacks=[checkpoint,epoch_timer])
...@@ -213,4 +397,5 @@ model.save(MODEL_OUTPUT_FN) ...@@ -213,4 +397,5 @@ model.save(MODEL_OUTPUT_FN)
# Erase Model Checkpoint file # Erase Model Checkpoint file
if os.path.exists(MODEL_OUTPUT_FN + ".part"): if os.path.exists(MODEL_OUTPUT_FN + ".part"):
os.remove(MODEL_OUTPUT_FN + ".part") import shutil
\ No newline at end of file shutil.rmtree(MODEL_OUTPUT_FN + ".part")
\ No newline at end of file
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
{"long": "--adjacency-iteration", "type":"int","default":1}, {"long": "--adjacency-iteration", "type":"int","default":1},
{ "short": "-n", "long": "--ngram-size", "type": "int", "default": 2 }, { "short": "-n", "long": "--ngram-size", "type": "int", "default": 2 },
{ "long": "--ngram-word2vec-iter", "type": "int", "default": 50 }, { "long": "--ngram-word2vec-iter", "type": "int", "default": 50 },
{ "short": "-t", "long": "--tolerance-value", "type": "float", "default": 0.002 }, { "short": "-t", "long": "--tolerance-value", "type": "float", "default": 100 },
{ "short": "-e", "long": "--epochs", "type": "int", "default": 100 }, { "short": "-e", "long": "--epochs", "type": "int", "default": 100 },
{ "short": "-d", "long": "--dimension", "type": "int", "default": 256 }, { "short": "-d", "long": "--dimension", "type": "int", "default": 256 },
{ "long": "--admin_code_1", "default": "None" } { "long": "--admin_code_1", "default": "None" }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment