Skip to content
Snippets Groups Projects
Commit 2240661f authored by Fize Jacques's avatar Fize Jacques
Browse files

Update

parent 1bf80ff7
No related branches found
No related tags found
No related merge requests found
...@@ -3,27 +3,48 @@ from evalne.evaluation.split import EvalSplit as LPEvalSplit ...@@ -3,27 +3,48 @@ from evalne.evaluation.split import EvalSplit as LPEvalSplit
from evalne.evaluation.score import Scoresheet from evalne.evaluation.score import Scoresheet
from evalne.utils import preprocess as pp from evalne.utils import preprocess as pp
import networkx as nx import networkx as nx
from tqdm import tqdm
from lib.utils import load_edgelist from lib.utils import load_edgelist
import argparse import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("edgelist_graph_filename") parser.add_argument("graph_filename")
parser.add_argument("--ne","--network-embedding",action="store_true",help="If you want to use neural network embedding for link prediction") parser.add_argument("-n","--network-embedding",action="store_true",help="If you want to use neural network embedding for link prediction")
parser.add_argument("-v","--verbose",action="store_true") parser.add_argument("-v","--verbose",action="store_true")
parser.add_argument("-f","--format",default="gexf",choices=["gexf","gml","txt"])
args = parser.parse_args()#("data/fb_country_country_sample_6_size1000.txt".split()) args = parser.parse_args()
def log(x):
if args.verbose:
print(x)
# Load and preprocess the network # Load and preprocess the network
G = nx.read_gexf(args.edgelist_graph_filename)#load_edgelist(args.edgelist_graph_filename,is_directed=True,weighted=True) log("Load Input Graph...")
G, _ = pp.prep_graph(G,maincc=True) G = None
if args.format == "txt":
G = load_edgelist(path=args.graph_filename,weighted=True)
elif args.format == "gml":
G = nx.read_gml(args.graph_filename)
else:
G = nx.read_gexf(args.graph_filename)
G, _ = pp.prep_graph(G,maincc=True)
log("Graph Loaded !")
log("Size "+str(len(G)))
log("Nb of Edges "+str(len(list(G.edges()))))
log("Density "+ str(len(G)/len(list(G.edges()))))
log("Building link prediction dataset...")
# Create an evaluator and generate train/test edge split # Create an evaluator and generate train/test edge split
traintest_split = LPEvalSplit() traintest_split = LPEvalSplit()
traintest_split.compute_splits(G,split_alg="spanning_tree",train_frac=0.8,fe_ratio=1) traintest_split.compute_splits(G,split_alg="spanning_tree",train_frac=0.8,fe_ratio=1)
nee = LPEvaluator(traintest_split) nee = LPEvaluator(traintest_split)
log("Dataset Built !")
# Create a Scoresheet to store the results # Create a Scoresheet to store the results
scoresheet = Scoresheet() scoresheet = Scoresheet()
...@@ -40,7 +61,9 @@ methods = ['random_prediction', ...@@ -40,7 +61,9 @@ methods = ['random_prediction',
] ]
# Evaluate baselines # Evaluate baselines
for method in methods: pbar = tqdm(methods,disable= (not args.verbose))
for method in pbar:
pbar.set_description("Evaluate "+method)
result = nee.evaluate_baseline(method=method, ) result = nee.evaluate_baseline(method=method, )
scoresheet.log_results(result) scoresheet.log_results(result)
...@@ -63,7 +86,9 @@ if args.network_embedding: ...@@ -63,7 +86,9 @@ if args.network_embedding:
edge_emb = ['average', 'hadamard'] edge_emb = ['average', 'hadamard']
# Evaluate embedding methods # Evaluate embedding methods
for i in range(len(methods)): pbar = tqdm(enumerate(methods), disable=(not args.verbose))
for i,method in pbar:
pbar.set_description("Evaluate "+method)
command = commands[i] + " --input {} --output {} --representation-size {}" command = commands[i] + " --input {} --output {} --representation-size {}"
results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command, results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command,
edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ', verbose=args.verbose) edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ', verbose=args.verbose)
...@@ -76,4 +101,6 @@ if args.network_embedding: ...@@ -76,4 +101,6 @@ if args.network_embedding:
# Get output # Get output
if args.verbose: if args.verbose:
scoresheet.print_tabular() scoresheet.print_tabular()
scoresheet.write_all(args.edgelist_graph_filename+"_results_lp") log("Saving Output in " + args.graph_filename + "_results_lp")
\ No newline at end of file scoresheet.write_all(args.graph_filename+"_results_lp")
log("Output Saved !")
\ No newline at end of file
...@@ -19,13 +19,13 @@ def generate_sbm_prob_matrix(nb_of_blocks,prob_btw_block=0.1): ...@@ -19,13 +19,13 @@ def generate_sbm_prob_matrix(nb_of_blocks,prob_btw_block=0.1):
M[j,i] = prob_btw_block M[j,i] = prob_btw_block
return M return M
GRAPH_SIZE = [50,100,200,500] GRAPH_SIZE = [50,75,100]
OUTPUT_DIR = "test_dataset/" OUTPUT_DIR = "data/theoric_graph_1/"
parameters = { parameters = {
"planted_partition_graph": { "planted_partition_graph": {
"l": [3,5,8], "l": [3,5,8],
"k": [10,20,30], "k": [10,20],
"p_in": [0.2,0.5,0.7], "p_in": [0.2,0.5,0.7],
"p_out": [0.1,0.2,0.3] "p_out": [0.1,0.2,0.3]
}, },
...@@ -52,7 +52,7 @@ parameters = { ...@@ -52,7 +52,7 @@ parameters = {
}, },
"geographical_threshold_graph": { "geographical_threshold_graph": {
"n": GRAPH_SIZE, "n": GRAPH_SIZE,
"theta": [0.1,0.2,0.4,0.6] "theta": [0.1,0.2,0.3]
}, },
} }
# Generating transition matrices for stochastic block model # Generating transition matrices for stochastic block model
......
...@@ -14,22 +14,32 @@ import argparse ...@@ -14,22 +14,32 @@ import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("dataset_dir") parser.add_argument("dataset_dir")
parser.add_argument("output_filename") parser.add_argument("output_filename")
parser.add_argument("-f", "--format", default="gexf", choices=["gexf", "gml", "txt"])
args = parser.parse_args() args = parser.parse_args()
fns = glob.glob(args.dataset_dir + "/*.gexf") fns = glob.glob(args.dataset_dir + "/*." + args.format)
all_res = [] all_res = []
for fn in tqdm(fns): pbar = tqdm(fns)
print("run eval on ", fn) for fn in pbar:
command = "python evalNE_script.py {0} -v".format(fn).split() pbar.set_description("run eval on "+ fn)
command = "python evalNE_script.py {0} -f {1} -n".format(fn, args.format).split()
output = subprocess.run(command) output = subprocess.run(command)
if not output.returncode == 0: if not output.returncode == 0:
print("Error! for the command :", " ".join(command)) print("Error! for the command :", " ".join(command))
continue continue
df_results = parse_evalne_output(open(fn + "_results_lp").read()) df_results = parse_evalne_output(open(fn + "_results_lp").read())
name = os.path.basename(fn) name = os.path.basename(fn)
G = nx.read_gexf(fn) G = None
top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree",ascending=False).head(10).node.values if args.format == "edgelist":
G = load_edgelist(path=fn)
elif args.format == "gml":
G = nx.read_gml(fn)
else:
G = nx.read_gexf(fn)
top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree",
ascending=False).head(10).node.values
df_results["nb_edge"] = len(list(G.edges())) df_results["nb_edge"] = len(list(G.edges()))
df_results["transitivity"] = nx.transitivity(G) df_results["transitivity"] = nx.transitivity(G)
df_results["density"] = nx.density(G) df_results["density"] = nx.density(G)
...@@ -38,4 +48,4 @@ for fn in tqdm(fns): ...@@ -38,4 +48,4 @@ for fn in tqdm(fns):
df_results["filename"] = name df_results["filename"] = name
all_res.append(df_results) all_res.append(df_results)
pd.concat(all_res).to_csv(args.output_filename,sep="\t",index=None) pd.concat(all_res).to_csv(args.output_filename, sep="\t", index=False)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment