Skip to content
Snippets Groups Projects
Commit 70d5b190 authored by Jacques Fize's avatar Jacques Fize
Browse files

Final Debug (problem split)

parent 8e0560b4
No related branches found
No related tags found
No related merge requests found
......@@ -166,5 +166,4 @@ To train the network with default parameter use the following command :
| -t,--tolerance-value | K-value in the computation of the accuracy@k |
| -e,--epochs | number of epochs |
| -d,--dimension | size of the ngram embeddings |
| -m,--model | Neural Network architecture used |
| --admin_code_1 | (Optional) If you wish to train the network on a specificate region |
......@@ -97,7 +97,8 @@ logging.basicConfig(
)
chrono = Chronometer()
args = ConfigurationReader("./parser_config/toponym_combination_embedding.json").parse_args("-n 4 -t 0.002 -e 100 -m LSTM -a data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split())
args = ConfigurationReader("./parser_config/toponym_combination_embedding.json")\
.parse_args("-n 4 -t 0.002 -e 20 -a -w -i data/geonamesData/FR.txt data/geonamesData/hierarchy.txt".split())
# Initialisee CONSTANTS
GEONAME_FN = args.geoname_input
......@@ -106,6 +107,7 @@ NGRAM_SIZE = args.ngram_size
ACCURACY_TOLERANCE = args.tolerance_value
EPOCHS = args.epochs
ITER_ADJACENCY = args.adjacency_iteration
COOC_SAMPLING_NUMBER = 3
# check for output dir
if not os.path.exists("outputs/"):
......@@ -180,8 +182,8 @@ if args.wikipedia_cooc:
filtered = pd.concat((filtered,cooc_data["geonameid title longitude latitude".split()].rename(columns={"title":"name"}).copy()))
train_cooc_indices,test_cooc_indices = pd.read_csv(COOC_FN+"_train.csv"), pd.read_csv(COOC_FN+"_test.csv")
train_indices.union(train_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]))
test_indices.union(test_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]))
train_indices = train_indices.union(set(train_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]).values))
test_indices = test_indices.union(set(test_cooc_indices.title.apply(lambda x: wikipediatitle_id[parse_title_wiki(x)]).values))
logging.info("Merged with Geonames data !")
......@@ -189,7 +191,7 @@ if args.wikipedia_cooc:
logging.info("Extracting cooccurrence relationships")
cpt=0
for ix, row in tqdm(cooc_data.iterrows(),total=len(cooc_data),desc="Extracting Wikipedia Cooccurrence"):
for inter in row.interlinks.split("|"):
for inter in np.random.choice(row.interlinks.split("|"),COOC_SAMPLING_NUMBER):
cpt+=1
rel_store.extend([[row.geonameid,wikipediatitle_id[inter]]])
logging.info("Extract {0} cooccurrence relationships !".format(cpt))
......@@ -283,13 +285,15 @@ logging.info("Data prepared !")
# OUTPUT FN BASE
name = "{0}_{1}_{2}_{3}{4}".format(GEONAME_FN.split("/")[-1],EPOCHS,NGRAM_SIZE,ACCURACY_TOLERANCE,region_fn)
if args.adjacency:
name+="_A"
name += "_A"
if args.inclusion:
name+="_I"
name += "_I"
if args.wikipedia_cooc:
name += "_C"
# NGRAM EMBDEDDING
logging.info("Generating N-GRAM Embedding...")
embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim,iter=5)
embedding_weights = index.get_embedding_layer(geoname2encodedname.values(),dim= embedding_dim,iter=50)
logging.info("Embedding generated !")
# DEEP MODEL
......
......@@ -7,12 +7,11 @@
{ "short": "-i", "long": "--inclusion", "action": "store_true" },
{ "short": "-a", "long": "--adjacency", "action": "store_true" },
{ "short": "-w", "long": "--wikipedia-cooc", "action": "store_true" },
{"long": "--adjacency-iteration", "type":"int","default":5},
{"long": "--adjacency-iteration", "type":"int","default":1},
{ "short": "-n", "long": "--ngram-size", "type": "int", "default": 2 },
{ "short": "-t", "long": "--tolerance-value", "type": "float", "default": 0.002 },
{ "short": "-e", "long": "--epochs", "type": "int", "default": 100 },
{ "short": "-d", "long": "--dimension", "type": "int", "default": 256 },
{ "short": "-m", "long": "--model", "choices": ["CNN", "LSTM"], "default": "CNN" },
{ "long": "--admin_code_1", "default": "None" }
]
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment