From 06b30a66c029b9f923c5720f3f22564ff8f3bc20 Mon Sep 17 00:00:00 2001 From: Ikenna Oluigbo <ikenna-victor.oluigbo@etu.univ-lyon1.fr> Date: Fri, 17 Jun 2022 08:56:42 +0000 Subject: [PATCH] Upload identitywalker file --- identity2vec.py | 148 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 identity2vec.py diff --git a/identity2vec.py b/identity2vec.py new file mode 100644 index 0000000..0eac056 --- /dev/null +++ b/identity2vec.py @@ -0,0 +1,148 @@ +import numpy as np +import networkx as nx +from tqdm import tqdm +import time +from decimal import Decimal + + +class Graph(): + def __init__(self, nx_Graph, e): + self.G = nx_Graph + self.e = e + + + def degree_node(self): + G = self.G + deg_dict = {} + for node, deg in G.degree: + deg_dict[node] = deg + return deg_dict + + + def eigenvector_centrality(self): + G = self.G + ev = nx.eigenvector_centrality(G, max_iter=1000) + return ev + + + def node_neighbors(self): + G = self.G + node_neigh = {} + for node in G.nodes: + node_neigh[node] = list(G.neighbors(node)) + return node_neigh + + + def test_source(self, s): + mnn = self.node_neighbors()[s] + arr_new_nn = np.array(mnn) + np.random.shuffle(arr_new_nn) + next_node = np.random.choice(arr_new_nn) + return next_node + + + def skip_visited(self, snn, visited): + if len(snn) != 1: + if len(visited) > 1: + last_visit = visited[-2] + if last_visit in snn: + snn.remove(last_visit) + self.skip_visited(snn, visited) + return snn + + + def identity_walker(self, node, walk_length): + walk = [node] + visited = [node] + while len(walk) < walk_length: + current_node = walk[-1] + nn = self.node_neighbors()[current_node] + if len(nn) == 0: + break + if visited[-1] == node: + next_node = self.test_source(visited[-1]) + walk.append(next_node) + visited.append(next_node) + else: + nn = self.skip_visited(nn, visited) + bounded_curr = walk[-2] + pdn = self.poisson_dist(nn, bounded_curr) + next_node = min(pdn, key=pdn.get) + walk.append(next_node) + visited.append(next_node) + + return walk + + + def s_path(self, source, destination): + G = self.G + if nx.has_path(G, source, destination): + path = nx.shortest_path(G, source, destination) + path_length = nx.shortest_path_length(G, source, destination) + else: + path = [] + path_length = len(path) + return path, path_length + + + def get_prob(self, n, curr): + G = self.G + neigh = list(G.neighbors(n)) + p_val = self.degree_node() + ev = self.eigenvector_centrality() + p = [] + q = [] + for node in neigh: + _, path_length = self.s_path(curr, node) + path_length += 0.01 #Prevent PathLength 0 + p.append(p_val[node] * ev[node]) + q.append(path_length) + return p, q + + + def poisson_dist(self, mnn, bounded_curr): + #k = Number of adjacent neighbors + #λ = Divergence rate + #pdn = (λ**k * e**-λ) / k! + #KLDivergence = #sum(p(x) * log(p(x)/q(x))) + e = self.e + pdn = {} + k = len(mnn) + for node in mnn: + rt = 0 + p, q = self.get_prob(node, bounded_curr) + for i in range(len(p)): + t = p[i] * np.log(p[i] / q[i]) + rt += t + drt = (1 / (self.degree_node()[bounded_curr]+self.eigenvector_centrality()[bounded_curr])) * rt + #poiss = Decimal((np.power(drt, k) * np.power(e, -drt))) / Decimal(np.math.factorial(k)) + poiss = (np.power(drt, k) * np.power(e, -drt)) / (np.math.factorial(k)) + pdn[node] = poiss + return pdn + + + def identity2vec_walk(self, num_walk, walk_length): + + G = self.G + print("STARTING RANDOM WALK... ") + print("Number of Nodes:", len(G.nodes)) + time.sleep(3) + + nodes = list(G.nodes) + walk_corpus = [] + + for cw in range(1, num_walk+1): + print("\n") + print("Current Walk: " + str(cw) + " of " + str(num_walk)) + for node in tqdm(nodes): + node_walk = self.identity_walker(node, walk_length) + walk_corpus.append(node_walk) + + return walk_corpus + + + + + + + \ No newline at end of file -- GitLab