DEBUG

b594cc5e · Jacques Fize · 1ec9430b · b594cc5e
Commit b594cc5e authored 5 years ago by Jacques Fize
--- a/geocoding_result.ipynb
+++ b/geocoding_result.ipynb
@@ -23,29 +23,6 @@
    "sns.set_context('paper')"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "nice_fonts = {\n",
-    "        # Use LaTeX to write all text\n",
-    "        \"text.usetex\": True,\n",
-    "        \"font.family\": \"serif\",\n",
-    "        # Use 10pt font in plots, to match 10pt font in document\n",
-    "        \"axes.labelsize\": 10,\n",
-    "        \"font.size\": 10,\n",
-    "        # Make the legend/label fonts a little smaller\n",
-    "        \"legend.fontsize\": 8,\n",
-    "        \"xtick.labelsize\": 8,\n",
-    "        \"ytick.labelsize\": 8,\n",
-    "}\n",
-    "\n",
-    "mpl.rcParams.update(nice_fonts)"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 11,

 %% Cell type:code id: tags:
 ``` python
 from glob import glob
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 from ipywidgets import interact, interactive, fixed, interact_manual
 import ipywidgets as widgets
 from glob import glob
 import json
 import seaborn as sns
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 sns.set(style="whitegrid")
 sns.set_context('paper')
 ```
 %% Cell type:code id: tags:
 ``` python
-nice_fonts = {
-        # Use LaTeX to write all text
-        "text.usetex": True,
-        "font.family": "serif",
-        # Use 10pt font in plots, to match 10pt font in document
-        "axes.labelsize": 10,
-        "font.size": 10,
-        # Make the legend/label fonts a little smaller
-        "legend.fontsize": 8,
-        "xtick.labelsize": 8,
-        "ytick.labelsize": 8,
-}
-mpl.rcParams.update(nice_fonts)
-```
-%% Cell type:code id: tags:
-``` python
 fns = glob("./geocoding_data/RESULTS/*.csv")
 df =  pd.read_csv(fns[0],index_col=0)
 df["dataset"] = fns[0].split("/")[-1].rstrip("_RESULT.csv")
 for fn in fns[1:]:
    name = fn.split("/")[-1].rstrip("_RESULT.csv")
    new_df = pd.read_csv(fn,index_col=0)
    new_df["dataset"]=name
    df = pd.concat((df,new_df))
 ```
 %% Cell type:code id: tags:
 ``` python
 df.head(2)
 ```
 %% Output
      dataset_name rel_code  cooc_sample_size  adj_iteration  ngram_size  \
    0       FR.txt        A                 3              1           4
    1       FR.txt       AC                 3              1           4
       tolerance_value  epochs  embedding_dim  word2vec_iter_nb  \
    0            0.002     100            256                50
    1            0.002     100            256                50
                                    index_fn                      keras_model_fn  \
    0    outputs/FR.txt_100_4_0.002__A_index    outputs/FR.txt_100_4_0.002__A.h5
    1  outputs/FR.txt_100_4_0.002__A_C_index  outputs/FR.txt_100_4_0.002__A_C.h5
                     train_test_history_fn  acccuracy@100km  acccuracy@50km  \
    0    outputs/FR.txt_100_4_0.002__A.csv         0.369215        0.165106
    1  outputs/FR.txt_100_4_0.002__A_C.csv         0.790145        0.471967
       acccuracy@25km       dataset
    0        0.053217  fr_cooc_test
    1        0.187430  fr_cooc_test
 %% Cell type:code id: tags:
 ``` python
 df = df.rename(columns={"rel_code":"Relation(s) used","dataset":"Dataset Used"})
 ```
 %% Cell type:code id: tags:
 ``` python
 print(df["Dataset Used"].unique())
 new_labels = {'fr_cooc_test':"Cooccurrence Dataset (FR)",
              'fr_dataset_ambiguity_sample10percent':"Ambiguity Dataset (FR)",
              'us_fr_cooc_test':"Cooccurrence Dataset (IDF+TEXAS)",
             'us_fr_dataset_ambiguity': "Ambiguity Dataset (IDF+TEXAS)"}
 df["Dataset Used"] = df["Dataset Used"].apply(lambda x: new_labels[x])
 ```
 %% Output
    ['fr_cooc_test' 'fr_dataset_ambiguity_sample10percent' 'us_fr_cooc_test'
     'us_fr_dataset_ambiguity']
 %% Cell type:code id: tags:
 ``` python
 g = sns.FacetGrid(df, col="Dataset Used", col_wrap=2, height=4, ylim=(0, 1))
 g.map(sns.barplot,"Relation(s) used", "acccuracy@100km", color="#26a65b", ci=None,label="Accuracy@100km");
 g.map(sns.barplot,"Relation(s) used", "acccuracy@50km", color="#e74c3c", ci=None,label="Accuracy@50km")
 g.map(sns.barplot,"Relation(s) used", "acccuracy@25km", color="#2980b9", ci=None,label="Accuracy@25km")
 g.add_legend(fontsize="large",title="Legend",title_fontsize="40",frameon=True)
 g.axes[0].set_ylabel('Accuracy')
 plt.setp(g._legend.get_title(), fontsize=15)
 #plt.savefig("../resulat_viz/geocoding_result.pdf",bbox_layout="tight")
 ```
 %% Output
    /home/jacques/.local/lib/python3.6/site-packages/seaborn/axisgrid.py:728: UserWarning: Using the barplot function without specifying `order` is likely to produce an incorrect plot.
      warnings.warn(warning)
 %% Cell type:code id: tags:
 ``` python
 ```