''' Reference implementation of node2vec. Author: Aditya Grover For more details, refer to the paper: node2vec: Scalable Feature Learning for Networks Aditya Grover and Jure Leskovec Knowledge Discovery and Data Mining (KDD), 2016 ''' import time import argparse import numpy as np import networkx as nx import node2vec from gensim.models import Word2Vec GRAPH_SIZE = 100 def parse_args(): ''' Parses the node2vec arguments. ''' print("Parses the node2vec arguments.") parser = argparse.ArgumentParser(description="Run node2vec.") #graph/dataset/data_train.npy parser.add_argument('--input', nargs='?', default='graph/size_val.npy', help='Input graph path') parser.add_argument('--output', nargs='?', default='emb/karate.emb', help='Embeddings path') parser.add_argument('--dimensions', type=int, default=6, help='Number of dimensions. Default is 180.') parser.add_argument('--walk-length', type=int, default=5, help='Length of walk per source. Default is 80.') parser.add_argument('--num-walks', type=int, default=10, help='Number of walks per source. Default is 10.') parser.add_argument('--window-size', type=int, default=2, help='Context size for optimization. Default is 10.') parser.add_argument('--iter', default=1, type=int, help='Number of epochs in SGD') parser.add_argument('--workers', type=int, default=16, help='Number of parallel workers. Default is 16.') parser.add_argument('--p', type=float, default=1, help='Return hyperparameter. Default is 1.') parser.add_argument('--q', type=float, default=1, help='Inout hyperparameter. Default is 1.') parser.add_argument('--weighted', dest='weighted', action='store_true', help='Boolean specifying (un)weighted. Default is unweighted.') parser.add_argument('--unweighted', dest='unweighted', action='store_false') parser.set_defaults(weighted=False) parser.add_argument('--directed', dest='directed', action='store_true', help='Graph is (un)directed. Default is undirected.') parser.add_argument('--undirected', dest='undirected', action='store_false') parser.set_defaults(directed=False) return parser.parse_args() def read(arr): G = nx.Graph() for a,b in arr: if not G.has_node(a): G.add_node(a) if not G.has_node(b): G.add_node(b) G.add_edge(a,b,weight=1) return G def read_all(): print("input",args.input) data = np.load(args.input,allow_pickle=True) id=0 print(data) for x in data: nx_G=read(x) G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) result=learn_embeddings(walks) result = np.asarray(result) name = str(args.input) name = name[:name.index('.')] np.save("C:\\Users\\LENOVO\\PycharmProjects\\walid\\data_val"+"\\transformed_"+str(id),result) print(id,"DONE") id+=1 def read_graph(): ''' Reads the input network in networkx. ''' print("Reads the input network in networkx.") if args.weighted: G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph()) else: G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph()) for edge in G.edges(): G[edge[0]][edge[1]]['weight'] = 1 if not args.directed: G = G.to_undirected() return G def learn_embeddings(walks): ''' Learn embeddings by optimizing the Skipgram objective using SGD. ''' print("Learn embeddings by optimizing the Skipgram objective using SGD.") walks = [list(map(str, walk)) for walk in walks] model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, iter=args.iter) #model.wv.save_word2vec_format('C:/Users/LENOVO/Desktop/aves-sparrow-social.csv') result = np.zeros((GRAPH_SIZE,int(args.dimensions))) for i in range(len(model.wv.vectors)): key = str(i+1) if key in model.wv.index2entity: result[model.wv.index2entity.index(key)]=model.wv.vectors[model.wv.index2entity.index(key)] if 0 in model.wv.index2entity: result[model.wv.index2entity.index(0)] = model.wv.vectors[model.wv.index2entity.index(0)] result = np.asarray(result) return result def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' print("Pipeline for representational learning for all nodes in a graph.") nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) learn_embeddings(walks) def main2(args): ''' Pipeline for representational learning for all nodes in a graph. ''' print("Pipeline for representational learning for all nodes in a graph.") start = time.time() read_all() end = time.time() print("Time",end-start) if __name__ == "__main__": args = parse_args() import os print(os.getcwd()) main2(args)