Skip to content
Snippets Groups Projects
Commit 2240661f authored by Fize Jacques's avatar Fize Jacques
Browse files

Update

parent 1bf80ff7
No related branches found
No related tags found
No related merge requests found
......@@ -3,27 +3,48 @@ from evalne.evaluation.split import EvalSplit as LPEvalSplit
from evalne.evaluation.score import Scoresheet
from evalne.utils import preprocess as pp
import networkx as nx
from tqdm import tqdm
from lib.utils import load_edgelist
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("edgelist_graph_filename")
parser.add_argument("--ne","--network-embedding",action="store_true",help="If you want to use neural network embedding for link prediction")
parser.add_argument("graph_filename")
parser.add_argument("-n","--network-embedding",action="store_true",help="If you want to use neural network embedding for link prediction")
parser.add_argument("-v","--verbose",action="store_true")
parser.add_argument("-f","--format",default="gexf",choices=["gexf","gml","txt"])
args = parser.parse_args()#("data/fb_country_country_sample_6_size1000.txt".split())
args = parser.parse_args()
def log(x):
if args.verbose:
print(x)
# Load and preprocess the network
G = nx.read_gexf(args.edgelist_graph_filename)#load_edgelist(args.edgelist_graph_filename,is_directed=True,weighted=True)
G, _ = pp.prep_graph(G,maincc=True)
log("Load Input Graph...")
G = None
if args.format == "txt":
G = load_edgelist(path=args.graph_filename,weighted=True)
elif args.format == "gml":
G = nx.read_gml(args.graph_filename)
else:
G = nx.read_gexf(args.graph_filename)
G, _ = pp.prep_graph(G,maincc=True)
log("Graph Loaded !")
log("Size "+str(len(G)))
log("Nb of Edges "+str(len(list(G.edges()))))
log("Density "+ str(len(G)/len(list(G.edges()))))
log("Building link prediction dataset...")
# Create an evaluator and generate train/test edge split
traintest_split = LPEvalSplit()
traintest_split.compute_splits(G,split_alg="spanning_tree",train_frac=0.8,fe_ratio=1)
nee = LPEvaluator(traintest_split)
log("Dataset Built !")
# Create a Scoresheet to store the results
scoresheet = Scoresheet()
......@@ -40,7 +61,9 @@ methods = ['random_prediction',
]
# Evaluate baselines
for method in methods:
pbar = tqdm(methods,disable= (not args.verbose))
for method in pbar:
pbar.set_description("Evaluate "+method)
result = nee.evaluate_baseline(method=method, )
scoresheet.log_results(result)
......@@ -63,7 +86,9 @@ if args.network_embedding:
edge_emb = ['average', 'hadamard']
# Evaluate embedding methods
for i in range(len(methods)):
pbar = tqdm(enumerate(methods), disable=(not args.verbose))
for i,method in pbar:
pbar.set_description("Evaluate "+method)
command = commands[i] + " --input {} --output {} --representation-size {}"
results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command,
edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ', verbose=args.verbose)
......@@ -76,4 +101,6 @@ if args.network_embedding:
# Get output
if args.verbose:
scoresheet.print_tabular()
scoresheet.write_all(args.edgelist_graph_filename+"_results_lp")
\ No newline at end of file
log("Saving Output in " + args.graph_filename + "_results_lp")
scoresheet.write_all(args.graph_filename+"_results_lp")
log("Output Saved !")
\ No newline at end of file
......@@ -19,13 +19,13 @@ def generate_sbm_prob_matrix(nb_of_blocks,prob_btw_block=0.1):
M[j,i] = prob_btw_block
return M
GRAPH_SIZE = [50,100,200,500]
OUTPUT_DIR = "test_dataset/"
GRAPH_SIZE = [50,75,100]
OUTPUT_DIR = "data/theoric_graph_1/"
parameters = {
"planted_partition_graph": {
"l": [3,5,8],
"k": [10,20,30],
"k": [10,20],
"p_in": [0.2,0.5,0.7],
"p_out": [0.1,0.2,0.3]
},
......@@ -52,7 +52,7 @@ parameters = {
},
"geographical_threshold_graph": {
"n": GRAPH_SIZE,
"theta": [0.1,0.2,0.4,0.6]
"theta": [0.1,0.2,0.3]
},
}
# Generating transition matrices for stochastic block model
......
......@@ -14,22 +14,32 @@ import argparse
parser = argparse.ArgumentParser()
parser.add_argument("dataset_dir")
parser.add_argument("output_filename")
parser.add_argument("-f", "--format", default="gexf", choices=["gexf", "gml", "txt"])
args = parser.parse_args()
fns = glob.glob(args.dataset_dir + "/*.gexf")
fns = glob.glob(args.dataset_dir + "/*." + args.format)
all_res = []
for fn in tqdm(fns):
print("run eval on ", fn)
command = "python evalNE_script.py {0} -v".format(fn).split()
pbar = tqdm(fns)
for fn in pbar:
pbar.set_description("run eval on "+ fn)
command = "python evalNE_script.py {0} -f {1} -n".format(fn, args.format).split()
output = subprocess.run(command)
if not output.returncode == 0:
print("Error! for the command :", " ".join(command))
continue
df_results = parse_evalne_output(open(fn + "_results_lp").read())
name = os.path.basename(fn)
G = nx.read_gexf(fn)
top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree",ascending=False).head(10).node.values
G = None
if args.format == "edgelist":
G = load_edgelist(path=fn)
elif args.format == "gml":
G = nx.read_gml(fn)
else:
G = nx.read_gexf(fn)
top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree",
ascending=False).head(10).node.values
df_results["nb_edge"] = len(list(G.edges()))
df_results["transitivity"] = nx.transitivity(G)
df_results["density"] = nx.density(G)
......@@ -38,4 +48,4 @@ for fn in tqdm(fns):
df_results["filename"] = name
all_res.append(df_results)
pd.concat(all_res).to_csv(args.output_filename,sep="\t",index=None)
\ No newline at end of file
pd.concat(all_res).to_csv(args.output_filename, sep="\t", index=False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment