Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • main
1 result

Target

Select target project
No results found
Select Git revision
  • main
1 result
Show changes
148 files
+ 51859
0
Compare changes
  • Side-by-side
  • Inline

Files

.idea/.gitignore

0 → 100644
+3 −0
Original line number Original line Diff line number Diff line
# Default ignored files
/shelf/
/workspace.xml

.idea/.name

0 → 100644
+1 −0
Original line number Original line Diff line number Diff line
ANN_CLIQUES.py
 No newline at end of file

.idea/encodings.xml

0 → 100644
+7 −0
Original line number Original line Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="Encoding">
    <file url="file://$PROJECT_DIR$/node2vec/src/graph/test.npy" charset="windows-1252" />
    <file url="file://$PROJECT_DIR$/node2vec/src/graph/test_Cliques.npy" charset="UTF-16" />
  </component>
</project>
 No newline at end of file
Original line number Original line Diff line number Diff line
<component name="InspectionProjectProfileManager">
  <settings>
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
</component>
 No newline at end of file

.idea/misc.xml

0 → 100644
+4 −0
Original line number Original line Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (pythonProject2)" project-jdk-type="Python SDK" />
</project>
 No newline at end of file

.idea/modules.xml

0 → 100644
+8 −0
Original line number Original line Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/walid.iml" filepath="$PROJECT_DIR$/.idea/walid.iml" />
    </modules>
  </component>
</project>
 No newline at end of file

.idea/vcs.xml

0 → 100644
+6 −0
Original line number Original line Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
</project>
 No newline at end of file

.idea/walid.iml

0 → 100644
+8 −0
Original line number Original line Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="jdk" jdkName="Python 3.7 (pythonProject2)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>
 No newline at end of file

ANN.py

0 → 100644
+112 −0
Original line number Original line Diff line number Diff line
import random
import numpy as np,os
import numba as nb
import time
import networkx as nx

def delta(A,B,U,m):
    for i in nb.prange(len(U)):
        z = np.zeros(len(U))
        x = np.zeros(len(U))
        c = np.asarray([i+1], dtype=np.str)
        if( np.count_nonzero(V == 1) > 0):
            if c[0] in O.nodes():
                for j in range(len(U)):
                    m = np.asarray([i+1 , j+1], dtype=np.str)
                    if V[j] != 0 and (m[1] in O.nodes() and O.number_of_edges(m[0], m[1]) != 0):
                        x[i] = x[i]
                    else:
                        x[i] = x[i] + V[j]
        U[i] = U[i] + (-x[i] + B * h(i,x[i]))
def h(i,r):
    if r + V[i] == 0:
        return 1
    else:
        return 0
def output(X):
    for i in range(len(X)):
        if (X[i] > 0):
            V[i] = 1
        else:
            V[i] = 0

def CHANGE(A):
    N = []
    E = A
    R = O.edges
    x = list(O.nodes())
    for i in range(len(U)):
        if U[i] > 0:
            print("true")
            N.append(i+1)
    if len(N) > 0:
        for k in x:
            for v in x:
                if v in x and k in x and O.number_of_edges(k, v) > 0:
                    O.remove_edge(k, v)
        A = O.edges

    print("new len A",len(A))
    return A
def Remplire(i):
    x = lab[i]
    for i in range(len(U)):
        if U[i] >= 0 and x[i] > 0:
            Ufin[i] = U[i]

def Arrange(x,i):
    t=0
    y=lab[i]
    for i in range(len(x)):
        if y[i] == 1:
                x[i] = B
        else:
                x[i] = random.uniform(-400.5,-0.5)

lab = np.load("data/clique_2/labels.npy",allow_pickle=True)
dat = np.load("data/clique_2/sam.npy",allow_pickle=True)
start = time.time()
outputs = []

for i in range(len(lab)):

    print(dat[i])
    O = nx.Graph()
    O.add_edges_from(dat[i], nodetype=int)
    m = np.array(O.nodes)
    size = O.number_of_nodes()
    print("====== Increasing embedding step =======")
    adj = np.count_nonzero(lab[i] == 1)
    size = len(lab[i])

    Ufin = np.random.uniform(-19,-1,size)*0
    x = 1
    U = np.random.uniform(-19,-1,size)
    V = np.random.randint(1,size=size)
    B = (adj / (size * (len(dat[i]) * 2 / (size * (size - 1))))) * 20
    while len(dat[0]) > 0:
        x = x+1
        U = np.random.uniform(-19,-1,size)
        delta(dat,B,U,m)
        output(U)
        dat[i] = CHANGE(dat[i])
        Remplire(i)

        O = nx.Graph()
        O.add_edges_from(dat[i])
        m = np.array(O.nodes)
    out = np.asarray(Ufin)
    Arrange(Ufin,i)
    output(Ufin)
    outputs.append(out)
    print("les resultats")
    print(np.count_nonzero(Ufin > 0))
    print(np.count_nonzero(V == 1))
    print(np.count_nonzero(lab[i] == 1))
end = time.time()
print("====== End of increasing ======")
print("Time", end-start)

out = np.asarray(outputs)
print(out.shape)
np.save(os.path.join("INoutput_data.npy"), out)  # generation des outputs"""

ANN_CLIQUES.py

0 → 100644
+132 −0
Original line number Original line Diff line number Diff line
import random
import numpy as np,os
import numba as nb
import time
import networkx as nx

def delta(A,B,U,m,V,O):
    for i in nb.prange(len(U)):
        z = np.zeros(len(U))
        x = np.zeros(len(U))
        c = np.asarray([i+1], dtype=np.str)
        if( np.count_nonzero(V == 1) > 0):
            if c[0] in O.nodes():
                for j in range(len(U)):
                    m = np.asarray([i+1 , j+1], dtype=np.str)
                    if V[j] != 0 and (m[1] in O.nodes() and O.number_of_edges(m[0], m[1]) != 0):
                        x[i] = x[i]
                    else:
                        x[i] = x[i] + V[j]
        U[i] = U[i] + (-x[i] + B * h(i,x[i],V))
def h(i,r,V):
    if r + V[i] == 0:
        return 1
    else:
        return 0
def output(X,V):
    for i in range(len(X)):
        if (X[i] > 0):
            V[i] = 1
        else:
            V[i] = 0

def CHANGE(A,O,U):
    N = []
    E = A
    R = O.edges
    x = list(O.nodes())
    for i in range(len(U)):
        if U[i] > 0:
            N.append(i+1)

    if len(N) > 0:
        for k in x:
            for v in x:
                if v in x and k in x and O.number_of_edges(k,v) > 0:
                    O.remove_edge(k, v)
        A = O.edges


    return A
def Remplire(U,Ufin,lab):
    for i in range(len(U)):
        if U[i] >= 0 and lab[i] > 0:
            Ufin[i] = U[i]
"""
        else:
            if lab[i] == 0:
                Ufin[i] = random.uniform(-400.5, -0.5)
            else:
                Ufin[i] = random.uniform(0.5, 400.5)
"""
def Arrange(lab, x, B, V):
    t=0
    y=0
    for i in range(len(x)):
        if lab[i] == 1:
                x[i] = B
                V[i] = 1
        else:
                x[i] = -B
                V[i] = 0


def PatternFinding(dat,lab):

    O = nx.Graph(dat)
    m = np.array(O.nodes)
    size = O.number_of_nodes()
    print("====== Increasing embedding step =======")
    adj = np.count_nonzero(lab == 1)
    size = len(lab)
    for i in range(1):
        Ufin = np.random.uniform(-1, 0, size) * 0
        #print("ufin",Ufin)
        #print(len(dat) * 2 / ((size-1) * (size - 1)))
        x = 1
        U = np.random.uniform(-1, 0, size)
        V = np.random.randint(1, size=size)
        B = (adj / (size * (len(list(O.edges)) * 2 / (size * (size - 1)))))
        #print("B",B)
        Arrange(lab,Ufin,B,V)
        #print(np.count_nonzero(V == 1))
        #print(np.count_nonzero(lab == 1))
        """
        while len(dat) > 0:
            x = x + 1
            U = np.random.uniform(-19, -1, size)
            delta(dat, B, U, m, V, O)
            output(U, V)
            # print(np.count_nonzero(U >= 0))
            # print(np.count_nonzero(lab == 1))
            dat = CHANGE(dat, O, U)
            print("hna")
            Remplire(U,Ufin,lab)
            # print("size",np.count_nonzero(Ufin >= 0),np.count_nonzero(U >= 0))
            # print(len(dat))
            O = nx.Graph(dat)
            #O.add_edges_from(dat)
            m = np.array(O.nodes)
        out = np.asarray(Ufin)
        Arrange(lab, Ufin, B)
        output(Ufin, V)
        outputs.append(out)
        print(np.count_nonzero(Ufin > 0))
        print(np.count_nonzero(V == 1))
        print(np.count_nonzero(lab == 1))1
        """
    #end = time.time()
    #print("====== End of increasing ======")
    #print("Time", end - start)
    out = np.asarray(Ufin)

    #out = np.asarray(outputs)
    # print(outputs)
    # print(lab)
    np.save(os.path.join("INoutput_data_val.npy"), out)  # generation des outputs"""

#lab = np.load("node2vec/src/graph/labfin.npy",allow_pickle=True)
#dat = np.load("node2vec/src/graph/sam.npy",allow_pickle=True)
#print(lab)
#print(type(dat))
#PatternFinding(dat,lab)
 No newline at end of file

Bip_selection.py

0 → 100644
+35 −0
Original line number Original line Diff line number Diff line

import random,numpy as np,os

def Voisin(x,k):
    if len(k) > 0:
        for i in k:
            if set(x).issubset(set(i)):
                return False
        return True
    return True

V = np.load("node2vec/src/graph/test_Bip.npy", allow_pickle=True)
V = list(V)
k = []
T = []
fo = open("example1.model", "w")
stri = "bc "
compteur = 0

for i in range(len(V)):
    print(V[i+compteur])
    x = V[i+compteur]
    k = x[:2]
    if Voisin(x,T):
        for j in range(V.index(x)+1 ,len(V)):
            y = V[j]
            compteur = compteur + 1
            if x[2:] == y[2:] and x[0] == y[0]:
                k.append(y[1])
            else:
                break
        if len(k) > 1:
            strt = ' '.join(map(str, k))
            stry = ' '.join(map(str, x[2:]))
            fo.write(stri + strt + ',' + stry + "\n")

Bipartie_etiquetage.py

0 → 100644
+26 −0
Original line number Original line Diff line number Diff line
import networkx as nx

def ensemble(v,K):
    for j in range(len(K)):
        print(type(v),type(K[j]),K[j])
        if B.number_of_edges(v, K[j]) == 1:
            return False
    return True
fh = open("C:/Users/LENOVO/Desktop/karate.edgelist", "rb")
B = nx.read_edgelist(fh,nodetype=int)# Add edges only between nodes of opposite node sets
nx.draw(B,with_labels=True)
K = list(B.nodes)
V = []
V.append([K[0]])
print(K)
print(V,V[0])
for i in range(len(K)):
    print(K[i],V)
    add = False
    for j in range(len(V)):
       if ensemble(K[i],V[j]) == True:
           V[j].append(K[i])
           add = True
    if add == False:
        V.append([K[i]])

Combine.py

0 → 100644
+35 −0
Original line number Original line Diff line number Diff line
import numpy as np,os
import time

nodes = []
Embedd2 = np.load("INoutput_data_val.npy",allow_pickle=True)
Embedd = np.load("data_val/transformed_0.npy",allow_pickle=True)
print(Embedd.shape)
print(Embedd2.shape)
"""
for i in range(len(Embedd)):
    liste = []
    #print(Embedd)
    for j in range(len(Embedd2[i])):
        nx = np.append(Embedd[i][j],Embedd2[i][j])
        liste.append(nx)
    nodes.append(liste)
    print(i)
node = np.asarray(nodes)
print(node.shape)
np.save(("data_val.npy"),node) #generation des outputs
"""
liste = []
start = time.time()

for i in range(len(Embedd)):
    nx = np.append(Embedd[i],Embedd2[0][i])
    liste.append(nx)
    #print(nx)
    #print(i)
node = np.asarray(liste)
end = time.time()
print(node.shape)
print("Time",end - start)
#print(node)
np.save(("data_tr.npy"),node) #generation des outputs"""
 No newline at end of file

Generate.py

0 → 100644
+118 −0
Original line number Original line Diff line number Diff line
from networkx.generators import community
from networkx.generators import random_graphs
from networkx.algorithms import clique
import networkx as nx
import random,numpy as np,os
import copy
import matplotlib.pyplot as plt

def generate_clique(nb,size,total_size):
    sub=community.caveman_graph(nb,size)#Generation de de graphes en forme de cliques
    G=random_graphs.fast_gnp_random_graph(total_size,0.1)#Generation de graphes aleatoires avec 0.1% de liens
    G=nx.compose(G,sub) #fusion des deux graphes, obtention d'un graphe aleatoire avec nb cliques
    node_mapping = dict(zip(G.nodes(), sorted(G.nodes(), key=lambda k: random.random())))#creation du mapping
    G_new = nx.relabel_nodes(G, node_mapping)#application du mapping
    cliques=list(clique.find_cliques(G_new))
    cliques=np.asarray(([y for x in cliques for y in x  if len(x)>=4]))
    nodes_cliques = np.unique(cliques)
    x = len(nodes_cliques)
    #print("nodes_cliques",x)
    output=np.zeros(total_size)
    output[nodes_cliques]=1
    return G_new,output,x,nodes_cliques,size,nb

def generate_without_clique(total_size):#generation de graphes aleatoires sans cliques
    while True:
        G=random_graphs.fast_gnp_random_graph(total_size,0.04)
        cliques=list(clique.find_cliques(G))
        cliques=[x for x in cliques if len(x)>=6]
        if len(cliques)==0:
            break
    return G, np.zeros(total_size)


def to_input_shape(G):# remplissage du fichier .edgelist format noeud -> noeud-voisin
    tab=[]
    for a,b in G.edges():
        tab.append([a,b])
    return tab

BASE_PATH = "data"
DIR ="clique_1"

if(not os.path.exists(BASE_PATH)):
    os.mkdir(BASE_PATH)
PATH = os.path.join(BASE_PATH,DIR)

if(not os.path.exists(PATH)):
    os.mkdir(PATH)

total_size = 100
max_size_clique = 10
max_clique_count = 10
outputs = []
Gr_size = 1000
graph = []
data = []
lab = []
nodes = []
input = []
sz = []
B = [None]*total_size
x = 0
for id in range(Gr_size):
    G,labels,y,z,s,ng = generate_clique(random.randint(4,max_clique_count),random.randint(4,max_size_clique),total_size)

    tab = to_input_shape(G)
    graph.append(tab)
    A = nx.adjacency_matrix(G, nodelist=range(total_size), weight='weight')
    A.setdiag(A.diagonal() * 2)
    A = A.todense()
    B = copy.deepcopy(A)
    for i in range(len(B)):
        if i not in z:
            B[i] = 0
    outputs.append(y)
    lab.append(labels)
    data.append(B)
    T = nx.edges(G)
    T = np.asarray(T)
    E = T
    for i in range(len(E)):
        x = E[i,0]
        c = E[i,1]
        if (x not in z) and (c not in z):
            w = -1
            t = np.argwhere(T == (x, y))
            d = np.argwhere(T == (c, x))
            t = np.concatenate((t, d))

            for r in range(len(t)):
                for k in range(len(t)):
                    if (t[r, 0] == t[k, 0]) and r != k and w != t[r, 0]:
                        w = t[r, 0]
                        #print(w)
            P = np.delete(T,w,axis=0)
            T=P
    print("id",id)
    sz.append(T)


np.save(os.path.join(PATH, "size.npy"), np.asarray(sz)) ###########################
#np.save(os.path.join(PATH, "data.npy"), np.asarray(graph)) ############################
#np.save(os.path.join(PATH, "data2.npy"), np.asarray(data)) ##########################
#print("out",sz[0])
#print("out",graph[0])
#print("out",data[0])

output = np.asarray(outputs)
#np.save(os.path.join(PATH,"output.npy"),output) #generation des outputs #######################
#print("out",output[0])
labs = np.asarray(lab)
np.save(os.path.join(PATH,"labels2.npy"),labs) #generation des outputs ##########################
#print("labs",labs[0])
#print(s)
print(len(sz[0]))

#nx.draw(G,with_labels=True)
#plt.show()
 No newline at end of file

Generate_biparti.py

0 → 100644
+102 −0
Original line number Original line Diff line number Diff line

import networkx as nx
import matplotlib.pyplot as plt
from networkx.generators import random_graphs
import random,numpy as np,os
import copy

def generate_clique(nb,size,total_size):
    sub = nx.complete_bipartite_graph(nb, size)
    G=random_graphs.fast_gnp_random_graph(total_size,0.01)#Generation de graphes aleatoires avec 0.1% de liens
    GS=nx.compose(G,sub) #fusion des deux graphes, obtention d'un graphe aleatoire avec nb cliques
    node_mapping = dict(zip(GS.nodes(), sorted(GS.nodes(), key=lambda k: random.random())))#creation du mapping
    G_new = nx.relabel_nodes(GS, node_mapping)#application du mapping
    A = nx.adjacency_matrix(G_new,nodelist=sorted(G.nodes()),weight='weight')
    A.setdiag(A.diagonal() * 2)
    A = A.todense()
    for i in range(len(A)):
        if (np.count_nonzero(A[i] == 1) > 4):
            Bipartie.append(i)
    output=np.zeros(total_size)
    output[Bipartie]=1
    return G_new,output,len(Bipartie),Bipartie,(size+nb),A

def to_input_shape(G):# remplissage du fichier .edgelist format noeud -> noeud-voisin
    tab=[]
    for a,b in G.edges():
        tab.append([a,b])
    return tab

BASE_PATH = "data"
DIR ="Bipartie"

if(not os.path.exists(BASE_PATH)):
    os.mkdir(BASE_PATH)
PATH = os.path.join(BASE_PATH,DIR)

if(not os.path.exists(PATH)):
    os.mkdir(PATH)

total_size = 100
max_size_clique = 30
max_clique_count = 30
outputs = []
Gr_size = 1
graph = []
data = []
lab = []
nodes = []
input = []
sz = []
B = [None]*total_size
x = 0
for id in range(Gr_size):
    Bipartie = []
    G,labels,y,z,s,A = generate_clique(random.randint(5,max_clique_count),random.randint(5,max_size_clique),total_size)
    tab = to_input_shape(G)
    graph.append(tab)
    B = copy.deepcopy(A)
    input.append(A)
    for i in range(len(B)):
        if i not in z:
            B[i] = 0
    outputs.append(y)
    lab.append(labels)
    data.append(B)
    T = nx.edges(G)
    T = np.asarray(T)
    E = T

    for i in range(len(E)):
        x = E[i, 0]
        c = E[i, 1]
        if (x not in z) and (c not in z):
            w = -1
            t = np.argwhere(T == (x, c))
            d = np.argwhere(T == (c, x))
            t = np.concatenate((t, d))
            for r in range(len(t)):
                for k in range(len(t)):
                    if (t[r, 0] == t[k, 0]) and r != k and w != t[r, 0]:
                        w = t[r, 0]
                        print("w", w)
            P = np.delete(T, w, axis=0)
            print(len(P), E[i])
            T = P

    sz.append(T)


output = np.asarray(outputs)
labs = np.asarray(lab)
node = np.asarray(input)


nx.draw(G, with_labels=True)
plt.show()
np.save(os.path.join(PATH, "size.npy"), np.asarray(sz[0])) ###########################
np.save(os.path.join(PATH, "data.npy"), np.asarray(graph)) ############################
np.save(os.path.join(PATH, "data2.npy"), np.asarray(data)) ##########################
np.save(os.path.join(PATH,"output.npy"),output) #generation des outputs #######################
np.save(os.path.join(PATH,"labels2.npy"),labs) #generation des outputs ##########################
np.save(os.path.join(PATH,"nodes.npy"),node) #generation des outputs

Generate_chaines.py

0 → 100644
+128 −0
Original line number Original line Diff line number Diff line
import networkx as nx
import matplotlib.pyplot as plt
from networkx.generators import random_graphs
import random,numpy as np,os
import copy

def find_all_paths(graph, start, end, path=[]):
    path = path + [start]
    if start == end:
        return [path]
    paths = []
    for node in graph[start]:
        if node not in path:
            newpaths = find_all_paths(graph, node, end, path)
            for newpath in newpaths:
                paths.append(newpath)
    return paths
def Chaines():
    G = random_graphs.fast_gnp_random_graph(100, 0.01)
    nx.draw(G,with_labels=True)
    plt.show()

    chaine = nx.chain_decomposition(G,1)
    y = []
    for i in range(100):
        print("hnaya")
        for j in range(100):
            if i != j:
                x = find_all_paths( G, i, j)
                if(len(x) > 0):
                    y.append(x)
    tab = []
    R = []
    for i in range(len(y)):
        if len(y[i]) > 1:
            x = y[i]
            for j in range(len(y[i])):
                for z in range(len(y[i])):
                    if set(x[j]).issubset(set(x[z])) and len(x[z]) > len(x[j]):
                        tab.append(j)
                    else:
                        if set(x[z]).issubset(set(x[j])) and len(x[z]) < len(x[j]):
                            tab.append(z)
            for k in range(len(x)):
                if k not in tab:
                    R.append(x[k])
            tab = []
    print(R)
    return G,R
def generate_clique(nb,size,total_size):
    Chaine = []
    G,ch = Chaines()
    A = nx.adjacency_matrix(G,nodelist=sorted(G.nodes()),weight='weight')
    A.setdiag(A.diagonal() * 2)
    A = A.todense()
    for i in range(len(ch)):
        x = ch[i]
        for j in range(len(x)):
            if x[j] not in Chaine:
                Chaine.append(x[j])
    print("hay la chaine",Chaine)
    output=np.zeros(total_size)
    output[Chaine]=1
    return G,output,len(Chaine),Chaine,(size+nb),A

def to_input_shape(G):# remplissage du fichier .edgelist format noeud -> noeud-voisin
    tab=[]
    for a,b in G.edges():
        tab.append([a,b])
    return tab

BASE_PATH = "data"
DIR ="Bipartie"

if(not os.path.exists(BASE_PATH)):
    os.mkdir(BASE_PATH)
PATH = os.path.join(BASE_PATH,DIR)

if(not os.path.exists(PATH)):
    os.mkdir(PATH)

total_size = 100
max_size_clique = 30
max_clique_count = 30
outputs = []
Gr_size = 1000
graph = []
data = []
lab = []
nodes = []
input = []
sz = []
B = [None]*total_size
x = 0
for id in range(1):
    Bipartie = []
    G,labels,y,z,s,A = generate_clique(random.randint(5,max_clique_count),random.randint(5,max_size_clique),total_size)
    tab = to_input_shape(G)
    graph.append(tab)
    B = copy.deepcopy(A)
    input.append(A)
    for i in range(len(B)):
        if i not in z:
            B[i] = 0
    outputs.append(y)
    lab.append(labels)
    data.append(B)
    sz.append(s)
    print(id)

output = np.asarray(outputs)
labs = np.asarray(lab)
node = np.asarray(input)
print("sz",sz[0])
print("graphe",graph[0])
print("matrix",data[0])
print("out",output[0])
print("labs",labs[0])
print("nodes",node[0])

"""
np.save(os.path.join(PATH, "size.npy"), np.asarray(sz)) ###########################
np.save(os.path.join(PATH, "data.npy"), np.asarray(graph)) ############################
np.save(os.path.join(PATH, "data2.npy"), np.asarray(data)) ##########################
np.save(os.path.join(PATH,"output.npy"),output) #generation des outputs #######################
np.save(os.path.join(PATH,"labels2.npy"),labs) #generation des outputs ##########################
np.save(os.path.join(PATH,"nodes.npy"),node) #generation des outputs
"""

Generate_stars.py

0 → 100644
+124 −0
Original line number Original line Diff line number Diff line
from networkx.generators import community
from networkx.generators import random_graphs
from networkx.algorithms import clique
import networkx as nx
import random,numpy as np,os
import copy
import matplotlib.pyplot as plt

def generate_clique(nb,size,total_size):
    j = 0
    sub=nx.Graph()#Generation de de graphes en forme de cliques
    for i in range(nb):
        nx.add_star(sub,[j,j+1,j+2,j+3,j+4,j+5])
        j = j + 6
    #nx.draw(sub)
    #plt.show()
    G=random_graphs.fast_gnp_random_graph(total_size,0.000000001)#Generation de graphes aleatoires avec 0.1% de liens
    G=nx.compose(G,sub) #fusion des deux graphes, obtention d'un graphe aleatoire avec nb cliques
    #nx.draw(G)
    #plt.show()
    node_mapping = dict(zip(G.nodes(), sorted(G.nodes(), key=lambda k: random.random())))#creation du mapping
    G_new = nx.relabel_nodes(G, node_mapping)#application du mapping
    A = nx.adjacency_matrix(G_new, nodelist=range(total_size), weight='weight')
    A.setdiag(A.diagonal() * 2)
    A = A.todense()
    B = copy.deepcopy(A)
    output=np.zeros(total_size)
    sortie=np.zeros(total_size)
    k = []
    for i in range(len(B)):
        if (np.count_nonzero(A[i] == 1) < 5):
            B[i] = 0
        else:
            sortie[i] = 1
            k.append(i)
            for j in range(len(B)):
                if B[i,j] == 1:
                    sortie[j] = 1
                    k.append(j)
    print("k",len(k),k)
    return G_new,sortie,4,B,5,A,k

def to_input_shape(G):# remplissage du fichier .edgelist format noeud -> noeud-voisin
    tab=[]
    for a,b in G.edges():
        tab.append([a,b])
    return tab

BASE_PATH = "data"
DIR ="star"

if(not os.path.exists(BASE_PATH)):
    os.mkdir(BASE_PATH)
PATH = os.path.join(BASE_PATH,DIR)

if(not os.path.exists(PATH)):
    os.mkdir(PATH)

total_size = 100
max_star_clique = 20
max_star_count = 12
outputs = []
Gr_size = 100
graph = []
data = []
lab = []
nodes = []
input = []
sz = []
x = 0
for id in range(Gr_size):
    G,labels,y,B,s,A,o = generate_clique(random.randint(4,max_star_count),random.randint(4,max_star_clique),total_size)
    #G,labels,y,z,s = generate_clique(,4,total_size)
    tab = to_input_shape(G)
    graph.append(tab)
    outputs.append(y)
    lab.append(labels)
    input.append(A)
    data.append(B)
    T = nx.edges(G)
    T = np.asarray(T)
    print("hay len ya t7a7na",len(T),T)
    E = T
    print("hay len ya t7a7na",len(T))

    for i in range(len(E)):
        x = E[i,0]
        c = E[i,1]
        if (x not in o) and (c not in o):
            w = -1
            t = np.argwhere(T == (x, c))
            d = np.argwhere(T == (c, x))
            t = np.concatenate((t, d))
            print("madkhelch")
            for r in range(len(t)):
                for k in range(len(t)):
                    if (t[r, 0] == t[k, 0]) and r != k and w != t[r, 0]:
                        w = t[r, 0]
                        print("w",w)
            P = np.delete(T,w,axis=0)
            print(len(P),E[i])
            T=P
    print("hay len ya t7a7na",len(T))

    sz.append(T)
    print(T)
    print(y)
    print(id)


print("graphe",len(sz[0]),len(sz))
print("matrix",np.count_nonzero(data[0]==1))
np.save(os.path.join(PATH, "size.npy"), np.asarray(sz[0])) ###########################
np.save(os.path.join(PATH, "data.npy"), np.asarray(graph)) ############################
np.save(os.path.join(PATH, "data2.npy"), np.asarray(data)) ##########################
output = np.asarray(outputs)
np.save(os.path.join(PATH,"output.npy"),output) #generation des outputs #######################
print("out",output[0])
labs = np.asarray(lab)
np.save(os.path.join(PATH,"labels2.npy"),labs) #generation des outputs ##########################
print("labs",np.count_nonzero(labs[0]==1))
node = np.asarray(input)
np.save(os.path.join(PATH,"nodes.npy"),node) #generation des outputs
print("nodes",np.count_nonzero(node[0]==1))
 No newline at end of file
+2 −0
Original line number Original line Diff line number Diff line
# Auto detect text files and perform LF normalization
* text=auto
+114 −0
Original line number Original line Diff line number Diff line
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
+21 −0
Original line number Original line Diff line number Diff line
MIT License

Copyright (c) 2019 Weichen Shen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
 No newline at end of file
+107 −0
Original line number Original line Diff line number Diff line
# GraphEmbedding

# Method


|   Model   | Paper                                                                                                                      | Note                                                                                        |
| :-------: | :------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------ |
| DeepWalk  | [KDD 2014][DeepWalk: Online Learning of Social Representations](http://www.perozzi.net/publications/14_kdd_deepwalk.pdf)   | [【Graph Embedding】DeepWalk:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56380812)  |
|   LINE    | [WWW 2015][LINE: Large-scale Information Network Embedding](https://arxiv.org/pdf/1503.03578.pdf)                          | [【Graph Embedding】LINE:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56478167)      |
| Node2Vec  | [KDD 2016][node2vec: Scalable Feature Learning for Networks](https://www.kdd.org/kdd2016/papers/files/rfp0218-groverA.pdf) | [【Graph Embedding】Node2Vec:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56542707)  |
|   SDNE    | [KDD 2016][Structural Deep Network Embedding](https://www.kdd.org/kdd2016/papers/files/rfp0191-wangAemb.pdf)               | [【Graph Embedding】SDNE:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56637181)      |
| Struc2Vec | [KDD 2017][struc2vec: Learning Node Representations from Structural Identity](https://arxiv.org/pdf/1704.03165.pdf)        | [【Graph Embedding】Struc2Vec:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56733145) |


# How to run examples
1. clone the repo and make sure you have installed `tensorflow` or `tensorflow-gpu` on your local machine. 
2. run following commands
```bash
python setup.py install
cd examples
python deepwalk_wiki.py
```

## DisscussionGroup & Related Projects

<html>
    <table style="margin-left: 20px; margin-right: auto;">
        <tr>
            <td>
                公众号:<b>浅梦的学习笔记</b><br><br>
                <a href="https://github.com/shenweichen/GraphEmbedding">
  <img align="center" src="./pics/code.png" />
</a>
            </td>
            <td>
                微信:<b>deepctrbot</b><br><br>
 <a href="https://github.com/shenweichen/GraphEmbedding">
  <img align="center" src="./pics/deepctrbot.png" />
</a>
            </td>
            <td>
<ul>
<li><a href="https://github.com/shenweichen/AlgoNotes">AlgoNotes</a></li>
<li><a href="https://github.com/shenweichen/DeepCTR">DeepCTR</a></li>
<li><a href="https://github.com/shenweichen/DeepMatch">DeepMatch</a></li>
<li><a href="https://github.com/shenweichen/DeepCTR-Torch">DeepCTR-Torch</a></li>
</ul>
            </td>
        </tr>
    </table>
</html>

# Usage
The design and implementation follows simple principles(**graph in,embedding out**) as much as possible.
## Input format
we use `networkx`to create graphs.The input of networkx graph is as follows:
`node1 node2 <edge_weight>`

![](./pics/edge_list.png)
## DeepWalk

```python
G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])# Read graph

model = DeepWalk(G,walk_length=10,num_walks=80,workers=1)#init model
model.train(window_size=5,iter=3)# train model
embeddings = model.get_embeddings()# get embedding vectors
```

## LINE

```python
G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph

model = LINE(G,embedding_size=128,order='second') #init model,order can be ['first','second','all']
model.train(batch_size=1024,epochs=50,verbose=2)# train model
embeddings = model.get_embeddings()# get embedding vectors
```
## Node2Vec
```python
G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                        create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)])#read graph

model = Node2Vec(G, walk_length = 10, num_walks = 80,p = 0.25, q = 4, workers = 1)#init model
model.train(window_size = 5, iter = 3)# train model
embeddings = model.get_embeddings()# get embedding vectors
```
## SDNE

```python
G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph

model = SDNE(G,hidden_size=[256,128]) #init model
model.train(batch_size=3000,epochs=40,verbose=2)# train model
embeddings = model.get_embeddings()# get embedding vectors
```

## Struc2Vec


```python
G = nx.read_edgelist('../data/flight/brazil-airports.edgelist',create_using=nx.DiGraph(),nodetype=None,data=[('weight',int)])#read graph

model = model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) #init model
model.train(window_size = 5, iter = 3)# train model
embeddings = model.get_embeddings()# get embedding vectors
```
Original line number Original line Diff line number Diff line
7 77
29 50
3 35
9 84
25 82
6 28
64 51
108 74
19 4
2 84
7 25
56 25
24 66
23 26
30 51
10 61
50 84
65 84
53 53
9 0
30 15
45 35
10 7
87 52
106 19
70 53
58 130
6 98
103 80
7 19
36 40
25 15
15 4
4 50
2 27
7 98
9 123
1 54
58 65
4 5
96 77
9 127
50 27
1 120
55 2
79 1
3 40
36 67
75 15
67 7
0 67
61 69
9 39
6 41
36 41
49 71
31 51
46 2
49 67
9 9
57 7
6 67
36 5
34 3
58 4
30 69
80 25
61 25
10 66
15 71
36 29
97 25
2 18
69 58
71 71
9 124
51 5
54 54
43 91
38 51
29 4
40 51
30 27
4 42
34 70
7 15
22 45
61 94
3 55
18 5
0 84
25 70
4 48
117 71
15 70
9 50
66 37
3 68
54 64
58 15
106 5
45 47
10 19
25 25
49 52
63 66
0 5
24 2
98 25
83 87
27 31
21 77
54 21
38 40
2 7
74 30
29 29
3 22
77 3
10 71
8 8
6 1
30 30
87 87
9 121
7 4
65 5
5 20
3 60
24 87
6 27
54 33
112 24
129 9
61 49
5 71
74 75
75 75
6 61
36 37
2 117
108 108
125 125
87 49
9 29
80 80
6 87
36 3
1 103
27 52
127 42
61 5
41 71
74 63
69 69
25 4
36 8
78 25
2 30
0 50
74 25
3 31
1 51
58 68
30 5
40 5
2 56
50 30
9 90
66 29
87 74
42 43
15 84
7 27
50 50
29 42
18 17
32 50
70 70
6 52
36 44
6 21
8 7
50 82
122 126
61 36
56 40
36 10
10 5
65 65
63 50
61 30
0 19
94 117
97 6
2 25
94 94
74 0
98 5
74 74
3 4
18 84
95 4
25 55
67 120
54 25
28 4
77 15
51 36
40 31
4 45
1 46
3 48
9 37
24 75
36 43
2 71
9 128
22 2
37 42
61 61
27 117
42 39
25 101
3 65
69 45
2 97
15 97
54 27
80 27
45 0
63 15
36 25
45 52
36 50
15 15
63 1
7 49
0 0
50 41
3 13
51 7
15 53
28 3
122 124
31 61
5 24
101 27
2 36
36 38
65 2
79 54
3 57
27 84
24 82
25 68
47 25
36 82
43 9
41 41
0 2
79 92
33 26
58 120
50 102
25 110
33 84
3 70
36 56
60 45
2 120
67 58
7 61
68 61
24 30
5 68
6 82
45 45
25 31
37 91
0 7
74 50
9 82
51 9
25 1
15 50
21 75
2 5
7 22
83 83
3 24
77 1
10 69
6 7
123 123
88 7
8 51
2 63
31 5
79 63
3 62
77 27
92 21
25 77
6 25
1 84
84 99
63 67
6 83
94 7
127 127
23 25
0 71
3 79
36 39
1 67
8 2
120 50
44 39
89 89
62 4
45 2
72 5
54 97
53 87
1 31
49 5
97 3
93 93
7 97
74 7
66 91
79 30
3 1
49 49
102 2
85 74
31 42
5 27
79 4
3 39
101 23
96 51
120 84
36 0
58 53
0 64
53 48
29 40
51 68
6 42
45 5
1 68
2 74
8 9
50 80
64 21
9 4
24 108
66 42
60 25
108 3
30 70
61 28
76 84
7 64
54 63
25 19
21 93
53 45
6 120
77 51
25 53
15 46
21 71
50 5
67 51
113 51
9 65
0 30
46 71
10 42
8 42
7 18
3 50
29 39
24 77
25 65
81 108
4 53
1 93
2 69
15 69
7 40
22 0
29 91
33 24
25 107
6 71
3 67
54 77
10 10
24 25
3 105
45 50
58 77
68 15
58 108
80 51
7 87
24 63
53 5
3 15
10 38
75 4
58 84
28 29
76 7
5 13
79 10
3 21
9 74
6 2
30 31
41 5
29 58
3 59
7 130
24 84
6 36
60 49
50 66
94 2
9 41
74 76
122 66
3 72
36 58
93 55
7 63
75 63
2 61
23 15
10 31
64 64
26 26
0 25
36 71
1 4
66 74
103 21
2 3
0 63
29 25
25 41
6 5
21 51
65 130
65 27
84 87
40 9
56 50
5 16
113 7
25 83
6 31
64 50
36 91
19 5
67 31
7 30
63 77
113 25
94 5
18 18
23 27
6 49
21 7
2 113
58 18
0 97
79 77
122 122
77 77
121 122
71 27
45 36
130 84
23 23
7 69
42 91
62 30
99 76
69 65
15 5
97 1
7 99
24 51
3 3
51 61
25 50
58 64
46 46
95 69
22 51
3 41
67 2
25 84
6 22
45 105
58 51
76 33
22 21
64 1
68 7
66 51
6 40
30 53
45 3
32 24
80 2
9 10
25 96
81 3
119 66
10 1
58 7
21 113
22 97
10 94
7 66
32 58
38 4
96 27
2 21
39 65
24 58
9 125
51 2
2 15
9 126
33 75
46 55
79 15
9 71
10 95
25 93
6 9
4 41
7 106
84 67
58 58
41 2
46 25
3 52
18 4
24 79
25 71
34 35
8 50
7 42
46 3
94 25
18 30
74 67
66 36
6 69
3 69
84 23
67 57
106 4
24 27
66 82
3 107
45 48
25 26
0 4
24 1
123 125
1 1
64 97
41 39
28 31
120 120
0 58
5 11
77 4
1 71
54 84
6 0
7 5
65 6
5 21
79 50
3 61
77 30
24 86
15 1
23 6
10 41
67 24
22 25
7 80
94 0
18 25
42 42
47 3
70 2
36 36
10 51
58 9
45 21
18 51
10 120
10 29
41 28
8 25
0 27
57 51
98 19
25 5
98 2
2 1
74 24
6 130
58 71
30 4
4 31
84 111
50 97
63 97
59 48
9 91
57 49
6 29
21 27
36 69
0 77
0 75
29 43
127 123
65 51
4 67
10 58
32 3
79 79
9 7
5 73
38 41
36 4
36 9
10 4
71 84
52 58
30 67
69 1
9 63
7 71
24 15
5 83
71 51
120 3
51 25
79 71
125 89
7 93
24 53
98 4
3 5
9 122
51 63
25 48
75 51
21 58
90 129
2 50
0 46
61 106
1 47
57 58
43 123
41 31
50 51
69 67
3 49
59 60
83 27
6 46
36 42
38 106
58 27
7 47
37 43
98 98
25 102
6 64
3 94
29 53
10 15
2 96
58 5
70 45
42 64
48 51
43 125
119 82
9 1
87 5
0 15
66 0
74 3
3 10
75 3
21 67
88 25
28 2
45 15
51 42
75 25
6 15
64 2
17 5
31 29
24 81
25 69
40 43
123 89
74 71
50 71
79 93
94 31
24 4
3 71
79 23
5 69
45 46
51 84
15 21
39 56
22 80
7 83
74 53
6 119
15 51
75 77
28 25
71 4
6 51
79 22
3 25
77 2
51 51
6 6
23 3
84 84
76 25
7 7
71 30
3 63
6 24
45 115
108 87
10 82
27 130
42 40
6 50
45 13
119 10
0 23
37 39
89 90
6 84
3 82
1 102
15 18
30 94
61 4
24 10
5 104
77 33
8 65
27 33
19 61
2 31
0 51
96 96
3 30
1 50
21 63
54 7
2 57
50 31
45 116
79 5
9 89
25 87
6 19
21 25
88 51
75 67
0 79
5 38
57 25
6 53
45 6
66 127
58 30
7 48
9 5
25 99
6 79
10 2
15 27
4 4
45 1
81 32
75 54
24 9
127 125
50 60
74 1
41 69
126 66
3 7
66 123
99 27
30 13
28 5
2 48
85 84
31 8
5 5
79 2
9 66
51 39
6 10
19 41
36 119
2 42
4 69
3 51
25 66
10 98
19 19
2 68
58 25
7 41
56 9
7 84
33 25
25 100
90 90
3 64
45 25
31 71
24 24
62 5
45 51
45 27
0 1
51 120
53 26
27 27
25 63
58 87
6 18
66 4
5 14
70 84
3 18
97 84
82 82
45 78
66 66
31 31
55 50
3 56
27 87
24 83
66 6
6 39
21 21
4 51
60 48
84 53
46 7
59 45
25 109
6 57
3 73
60 46
67 71
28 71
9 25
5 67
7 102
36 7
10 30
25 30
114 25
68 28
70 60
0 24
74 51
70 48
7 120
1 5
80 79
2 2
7 107
48 24
3 27
51 53
6 4
30 1
2 60
76 27
7 1
53 55
92 22
66 1
6 30
19 2
7 31
97 92
94 4
83 84
51 69
23 24
40 58
10 63
7 53
37 37
59 24
69 27
6 74
3 84
10 25
2 106
0 31
74 58
96 3
53 28
49 4
61 45
7 96
24 50
79 25
3 0
10 77
127 89
42 5
4 27
79 7
3 38
125 121
36 65
58 50
79 45
21 1
76 76
60 60
58 28
7 50
97 79
80 1
79 75
57 1
45 84
89 124
10 0
58 6
39 40
63 63
124 123
23 7
48 48
54 76
1 23
122 89
74 15
3 9
77 50
15 41
4 2
28 7
84 32
22 53
91 91
58 31
51 41
6 8
53 27
4 40
2 40
55 55
3 53
9 42
25 64
40 40
21 10
1 92
2 66
22 1
55 45
61 58
6 68
3 66
69 71
58 1
64 84
42 58
90 66
45 49
25 27
22 93
50 53
36 31
75 7
4 84
28 30
2 9
5 12
79 21
53 2
27 51
25 39
6 3
4 23
65 7
66 58
79 51
3 58
77 31
24 85
25 73
38 69
45 118
10 40
2 93
67 25
50 67
46 5
74 77
3 29
119 9
52 5
97 51
78 79
27 70
9 31
0 21
53 71
10 28
66 81
67 67
41 42
26 27
41 65
24 7
6 38
1 27
29 37
29 5
78 27
45 7
74 27
111 3
25 40
49 27
21 50
54 0
28 51
2 58
Original line number Original line Diff line number Diff line
node label
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 1
9 0
10 0
11 3
12 3
13 2
14 3
15 0
16 3
17 3
18 1
19 1
20 3
21 0
22 1
23 1
24 0
25 0
26 1
27 0
28 0
29 1
30 0
31 0
32 2
33 1
34 3
35 3
36 0
37 2
38 1
39 1
40 1
41 0
42 1
43 2
44 3
45 1
46 2
47 2
48 1
49 1
50 0
51 0
52 1
53 0
54 1
55 2
56 2
57 2
58 0
59 2
60 1
61 0
62 2
63 1
64 0
65 1
66 0
67 3
68 3
69 0
70 1
71 0
72 3
73 3
74 0
75 1
76 2
77 0
78 2
79 0
80 1
81 2
82 1
83 2
84 1
85 2
86 2
87 1
88 3
89 2
90 2
91 2
92 2
93 1
94 1
95 3
96 1
97 1
98 1
99 2
100 2
101 2
102 2
103 2
104 3
105 3
106 2
107 3
108 3
109 3
110 3
111 3
112 3
113 2
114 3
115 3
116 3
117 3
118 3
119 2
120 3
121 3
122 3
123 2
124 3
125 2
126 3
127 1
128 3
129 3
130 2
Original line number Original line Diff line number Diff line
node label
0 1
1 1
2 2
3 1
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 1
14 0
15 0
16 0
17 1
18 0
19 0
20 0
21 0
22 0
23 0
24 1
25 1
26 0
27 0
28 3
29 0
30 1
31 0
32 1
33 0
34 0
35 0
36 0
37 0
38 0
39 0
40 0
41 0
42 3
43 0
44 0
45 0
46 1
47 0
48 3
49 1
50 0
51 0
52 1
53 0
54 0
55 0
56 0
57 0
58 0
59 1
60 0
61 0
62 0
63 0
64 0
65 0
66 0
67 0
68 1
69 1
70 0
71 0
72 0
73 0
74 0
75 0
76 1
77 0
78 0
79 0
80 0
81 2
82 0
83 0
84 1
85 2
86 0
87 0
88 0
89 0
90 0
91 1
92 1
93 0
94 0
95 1
96 0
97 0
98 1
99 0
100 0
101 0
102 1
103 0
104 0
105 0
106 0
107 2
108 3
109 3
110 1
111 2
112 3
113 2
114 1
115 3
116 2
117 1
118 1
119 0
120 1
121 1
122 2
123 3
124 3
125 1
126 3
127 3
128 3
129 1
130 1
131 1
132 1
133 1
134 1
135 2
136 1
137 2
138 1
139 0
140 1
141 0
142 0
143 0
144 2
145 3
146 0
147 1
148 2
149 2
150 3
151 1
152 2
153 1
154 1
155 0
156 3
157 3
158 1
159 1
160 2
161 2
162 1
163 0
164 3
165 1
166 1
167 0
168 1
169 1
170 0
171 0
172 0
173 1
174 1
175 0
176 2
177 1
178 0
179 1
180 3
181 2
182 1
183 0
184 1
185 1
186 2
187 1
188 2
189 2
190 2
191 1
192 2
193 2
194 1
195 1
196 3
197 2
198 2
199 2
200 2
201 2
202 1
203 2
204 2
205 3
206 3
207 2
208 1
209 3
210 3
211 3
212 3
213 2
214 1
215 1
216 2
217 2
218 2
219 3
220 3
221 0
222 3
223 2
224 2
225 3
226 3
227 3
228 1
229 2
230 2
231 1
232 3
233 2
234 3
235 3
236 3
237 1
238 3
239 2
240 0
241 1
242 1
243 1
244 0
245 2
246 1
247 2
248 1
249 1
250 3
251 2
252 1
253 3
254 2
255 3
256 3
257 2
258 3
259 3
260 2
261 3
262 3
263 3
264 2
265 2
266 2
267 2
268 3
269 3
270 3
271 2
272 1
273 2
274 3
275 2
276 3
277 2
278 1
279 3
280 1
281 2
282 2
283 3
284 2
285 1
286 3
287 3
288 3
289 3
290 1
291 1
292 0
293 3
294 2
295 2
296 3
297 2
298 3
299 1
300 3
301 3
302 2
303 2
304 3
305 2
306 3
307 2
308 1
309 2
310 1
311 3
312 2
313 2
314 1
315 3
316 3
317 3
318 1
319 3
320 2
321 3
322 3
323 3
324 3
325 2
326 2
327 2
328 3
329 3
330 3
331 2
332 3
333 2
334 2
335 3
336 3
337 3
338 3
339 3
340 3
341 3
342 3
343 2
344 2
345 2
346 2
347 2
348 3
349 1
350 2
351 2
352 3
353 2
354 0
355 1
356 2
357 3
358 3
359 3
360 1
361 1
362 2
363 1
364 2
365 1
366 1
367 3
368 2
369 3
370 2
371 2
372 3
373 2
374 1
375 1
376 1
377 2
378 1
379 2
380 2
381 3
382 3
383 3
384 3
385 3
386 3
387 3
388 3
389 2
390 1
391 2
392 2
393 3
394 1
395 1
396 3
397 2
398 2
Original line number Original line Diff line number Diff line
node label
10241 1
10243 2
10245 0
16390 1
10247 1
12297 2
16727 3
10257 0
12307 2
16729 3
12314 3
12315 2
10268 1
10272 1
12321 1
12323 0
12324 3
12294 2
10278 2
10279 0
11612 0
16428 1
16429 2
12335 1
12339 0
12638 2
12343 1
16441 3
10298 1
10299 0
12348 2
14397 2
10304 1
15030 3
16454 3
12641 2
10313 2
11834 2
12363 1
14412 3
12365 1
16463 3
10322 2
14691 3
10324 2
10325 2
12375 1
14424 3
10329 2
10333 1
14430 3
16479 2
15376 0
14435 3
12388 3
12389 0
12390 2
12391 0
12392 3
12394 3
16743 3
10348 3
12397 0
14447 3
16744 3
12402 0
14457 0
12412 1
16746 3
16515 3
10372 0
14470 1
15041 0
16520 3
14474 1
14475 2
10380 3
14477 2
16527 3
10385 3
12436 3
14485 1
14487 0
14488 1
12441 0
14492 0
10397 0
16543 3
12448 0
12451 0
10405 1
12455 1
10408 0
10409 1
10414 1
15389 0
14512 0
14736 2
10419 3
15154 2
10423 0
14520 1
12207 1
14524 0
14259 1
12478 0
10431 0
12320 2
10434 0
15051 1
12484 3
14533 2
14534 1
16584 2
11980 0
14539 1
12492 1
12494 1
14543 1
14254 0
10275 0
14551 2
14553 2
16603 2
10460 3
14557 3
12511 1
10466 0
12078 3
10469 0
12519 1
14716 0
14570 0
12523 0
14572 2
14574 0
15741 0
14576 0
14577 2
12206 0
15011 2
14582 1
16634 3
14588 0
15061 2
12544 0
16643 3
12549 1
11697 0
16647 1
12553 0
16651 3
11637 0
16654 3
10511 3
14262 0
14617 3
14618 2
12571 2
16668 3
11057 0
12574 2
12576 3
10529 0
14627 1
14628 2
14630 2
10535 2
16680 3
14633 0
16682 3
14635 0
10540 1
12591 2
13704 2
15411 0
14046 2
12598 3
10551 0
14648 1
15412 0
16698 3
16737 3
10557 1
10558 1
10559 1
14656 2
10561 0
10562 2
16709 3
16710 3
12615 1
10569 2
16715 3
12621 2
12622 3
14672 2
10577 0
14674 0
16723 3
16724 3
10581 0
14679 0
16728 3
12633 2
16730 3
14683 0
14685 0
10590 0
10221 1
16736 3
14689 0
16738 3
16739 3
16741 3
16742 3
10599 0
14696 0
12649 1
14698 0
12652 1
12653 2
14704 0
14706 3
14709 0
14711 0
14712 2
10617 3
10620 0
14718 1
12671 2
14400 3
10627 0
12676 2
10630 2
10631 1
15497 1
14730 0
10640 1
14738 1
10643 1
10990 0
10647 1
14744 3
14745 1
14747 0
15325 2
10654 1
14751 3
12704 1
12705 2
12708 1
12709 1
14761 0
10666 0
10994 0
10670 2
12719 1
12720 2
12721 1
14771 0
10676 0
14750 3
11337 0
12728 1
10996 2
14778 3
10683 1
10685 0
14783 0
12737 2
10693 0
12743 2
14792 3
12745 2
14794 0
12748 2
14797 1
11341 3
14802 1
12755 2
14804 1
14805 1
12758 0
10713 0
10715 3
14812 3
14814 0
10661 0
14816 2
10721 0
14819 1
12772 2
12773 2
12774 3
10728 1
10731 1
14828 0
12370 2
12782 2
14831 0
12784 2
12785 2
10739 1
10744 1
14842 0
10747 0
10754 0
14853 2
16725 2
12807 1
14856 1
10327 2
12815 1
10770 2
12819 0
14869 0
12822 1
10775 1
10778 3
10779 1
10781 0
10783 1
14880 2
10785 0
14769 3
10792 0
12841 1
12844 1
14893 0
14895 1
10800 0
14897 2
14898 2
12851 1
12853 1
10589 1
12855 1
14905 0
12635 2
14908 0
10815 2
10817 3
12866 2
10819 1
12868 2
10821 0
12870 3
14919 1
14922 3
12878 1
13795 0
12882 2
12883 2
12884 0
10838 3
12888 1
12889 0
14938 3
12891 0
12892 0
15023 0
14942 1
14943 1
14944 3
10849 0
12898 0
12899 1
12902 1
14952 0
10857 3
14955 0
15841 1
14960 0
12915 0
10868 0
12917 0
13076 0
10874 0
10687 3
14972 3
10347 3
12932 1
12054 3
12934 3
14785 3
14986 0
14987 2
10349 1
12944 2
12945 0
11615 1
12947 3
14996 2
12951 0
12953 0
12954 0
12955 3
12956 3
10910 2
12847 1
15008 1
11376 2
15010 3
14107 0
15012 2
12965 2
10918 1
15016 0
11719 1
15020 3
10925 1
10926 1
10927 3
15024 0
13768 1
10930 1
15027 0
10184 1
10933 2
12982 0
12983 2
16731 3
12992 0
10945 3
10946 3
15043 2
15045 2
10950 1
14113 1
15048 1
15049 1
13002 1
13003 1
11042 0
15054 3
11617 0
13008 3
10961 1
10965 2
10967 1
13016 2
15069 3
15070 0
13024 2
15074 1
10980 0
13029 0
10982 3
13032 1
13034 0
10140 0
15085 1
15086 2
15090 3
15091 2
13044 3
15093 3
13046 3
10999 3
15096 0
11002 3
11003 0
13052 3
14295 2
13055 2
15108 3
11013 1
11278 0
11619 2
13072 2
15370 0
11027 1
15124 1
13077 2
14468 1
11036 2
13087 2
11041 2
15138 1
10139 1
10938 2
11049 0
11050 3
11053 2
15153 1
11058 3
13109 1
13111 2
15160 2
13871 0
11066 0
13115 3
13117 3
11007 2
13121 1
15171 3
11076 1
13125 3
12769 1
15177 2
16345 2
12087 2
11085 1
11086 3
13139 0
11092 1
11097 0
11098 2
13151 3
11106 3
11067 0
11109 0
13158 0
11111 0
15215 0
15855 1
11122 0
11123 1
13873 0
11126 1
13176 1
15167 1
13182 1
11135 3
13184 0
15235 3
15236 2
13192 3
10732 0
11146 0
13195 3
13196 1
15245 1
11150 0
15248 2
15249 0
15250 2
13203 2
13204 0
14489 0
15257 1
13211 1
15511 3
11165 3
13219 3
13221 2
11174 3
11176 3
10396 1
13226 3
13127 0
15278 2
14493 2
13232 0
15282 1
11188 3
13241 1
11423 0
13244 0
11197 1
13246 3
15295 0
11200 2
10400 3
11203 0
13255 2
15304 0
13259 3
11214 1
13264 0
14150 1
15323 0
13277 0
11230 3
11233 1
13282 1
14843 0
10065 2
13891 0
11241 2
13290 0
15339 2
13292 3
11245 2
14559 2
13295 2
15344 3
13297 3
11252 0
13303 0
11259 0
15356 0
11267 0
10141 1
13484 1
11274 0
12119 1
11764 1
15374 1
11280 2
11282 3
11283 1
15380 0
15381 1
11288 1
15385 2
13487 0
11292 0
13341 1
13342 0
13344 1
10416 1
11298 0
13347 0
13348 1
11833 1
15401 1
14855 2
11308 0
15406 1
13360 0
13361 3
11315 1
13061 0
11317 2
13367 0
13369 1
15422 3
11445 1
13377 0
15427 2
13381 2
12129 1
11336 1
15433 3
13495 0
13388 1
13389 3
11789 2
15440 3
10424 3
16540 2
15203 3
13397 2
15446 2
15447 2
15448 2
14863 2
11450 3
15454 1
10085 2
15458 3
14182 2
14222 1
13415 2
13418 2
13422 0
11375 2
13424 2
13502 0
15478 1
15481 2
13434 1
11388 3
11391 2
11392 1
13504 1
13200 1
11778 0
11399 1
11401 1
13450 3
14871 2
14530 2
15502 1
15504 2
11411 1
13461 1
13463 3
13464 2
13467 1
11421 2
16581 2
14875 1
13476 0
15525 3
11430 3
11431 2
11433 0
15532 2
13485 0
13486 0
14877 1
11441 1
13490 1
15539 3
13493 3
11447 1
15546 3
12831 1
15548 3
11453 2
15550 3
14197 2
11456 2
13505 3
15554 1
13507 2
11463 2
16588 2
11468 1
11470 0
11471 1
13520 1
15569 1
15570 1
14542 2
11479 2
11481 0
15579 1
11484 2
15581 1
15582 1
11487 2
13536 2
13537 3
11492 1
13541 0
13543 1
13546 3
11503 0
15601 2
11506 3
13555 3
10135 0
11510 2
15607 0
11512 2
15231 2
13564 2
11517 2
13525 3
15232 3
15618 2
11525 1
13574 2
10613 1
15624 0
13577 0
15626 0
13579 1
15628 1
11535 1
12250 1
11537 0
15634 3
13587 3
11540 0
11140 0
11823 0
11550 1
13599 2
15650 3
11555 0
11559 2
15656 2
11563 0
10478 2
13619 3
11577 0
10463 3
13194 1
13630 3
10615 2
11588 1
12854 2
15798 1
11592 3
16553 3
12173 0
11603 0
15700 2
13198 0
13655 2
13540 3
15706 3
15707 2
15708 2
15709 1
14565 2
15713 3
11618 0
13667 3
12857 2
11624 0
15721 1
13674 2
11627 3
13676 2
15725 2
11630 0
11633 3
13682 3
15731 2
13685 3
11638 0
11641 0
11643 2
13693 2
11646 2
11648 0
11299 3
11650 3
12663 2
13700 3
11656 3
10476 2
15754 2
11659 3
11842 2
13710 1
11663 3
12184 0
13714 2
13715 2
11669 3
14314 0
11673 2
11675 2
10317 3
15774 3
12869 3
13398 1
11684 1
13736 3
15785 1
11695 0
15793 2
15794 1
11699 0
11028 2
13756 2
11711 2
11714 3
14467 3
13767 1
11720 2
11721 0
13770 1
11511 2
13772 3
11725 1
11726 1
11728 2
15825 2
15939 3
11732 3
13781 3
13785 3
13788 3
15838 3
13689 2
14371 3
15842 3
15843 2
13796 0
14303 0
15847 3
11493 2
11756 3
15853 2
13806 2
15599 2
15856 2
13809 2
11762 1
11344 2
15861 1
15862 1
15273 3
11415 3
15163 3
10154 0
11775 0
16498 2
13826 2
15275 3
13829 1
13830 0
15879 3
13832 1
15881 3
15882 3
15885 2
11790 3
15887 1
13827 3
13841 3
13230 0
14100 0
15897 1
13851 0
11867 1
11813 3
13862 1
13863 2
13865 3
13791 3
15919 0
11824 3
11825 3
11827 2
11828 3
14601 3
13881 3
15930 3
10267 3
13884 2
15934 2
11840 3
12896 0
13890 3
11531 1
11844 1
11845 1
13894 1
14945 1
12124 1
13898 2
13899 3
13731 2
15534 3
11856 3
14102 1
11859 2
11331 3
11193 0
11865 1
15057 2
12559 1
11869 0
11870 3
11871 3
15971 3
15973 3
11879 1
10011 1
14268 2
13930 0
13931 0
11413 0
13933 0
13934 2
15293 1
13936 3
11198 3
13942 1
15991 1
11898 0
11899 2
13949 2
11905 1
13459 0
11883 3
11908 1
13958 2
11495 2
11884 0
13964 1
13763 3
11885 3
11921 0
13970 0
11924 2
15982 3
11545 2
11931 1
13983 1
13984 2
11938 3
13987 2
11941 1
11947 3
12332 3
11952 1
13256 0
14004 2
12264 0
14006 1
11538 3
12916 2
14013 2
14646 3
14965 3
12235 2
11973 0
11975 3
14024 3
14025 0
14027 0
14028 2
11982 0
11986 0
15310 3
11992 2
12771 2
11994 3
16091 1
11996 0
11997 1
13105 1
14288 1
14050 2
12003 0
16101 0
14054 1
12007 0
14057 0
12583 2
12012 1
14062 2
12016 0
12018 3
16681 3
12244 1
12028 3
16665 3
12245 1
14081 1
14082 0
12587 3
10923 2
16133 0
12039 3
14088 1
15472 2
14091 3
14092 0
15063 3
14098 0
11977 0
10005 2
10006 3
12055 2
10543 3
14108 0
14109 1
10015 3
10016 2
10017 3
11227 1
12068 3
10886 3
11995 0
14119 2
14120 1
10204 1
14122 0
14125 1
10030 3
12079 3
10033 3
14130 1
14131 1
10039 3
10040 3
10041 3
10042 2
12255 1
12094 0
10050 3
10052 3
10053 3
11367 2
12103 3
10056 3
12599 2
12108 2
10064 3
16696 3
14307 0
14695 3
14167 2
16218 0
10076 2
12222 2
12127 3
14992 1
12131 2
12132 3
13535 2
12134 3
14993 2
12263 3
12141 2
13629 3
14193 0
12147 3
12149 2
10361 1
12155 3
12156 0
16567 1
13771 2
14112 0
16704 3
14044 3
14218 2
12171 1
12610 1
12174 1
12175 1
12177 1
11587 1
14229 2
14231 1
10136 0
12185 2
14234 1
12187 2
14236 2
14237 1
12190 1
12191 0
13296 0
10146 1
12195 1
12197 0
14321 0
12779 1
11522 3
10155 0
14252 0
10157 0
10158 0
14255 2
14256 1
13947 3
12211 2
10165 1
12214 1
16311 2
12217 0
10170 0
10171 2
10172 3
14270 3
12223 0
14273 2
14275 1
12228 2
12232 3
10185 0
14282 2
14283 3
11953 0
16336 3
13501 2
10194 1
16339 2
16340 2
14670 3
10198 2
13433 0
11961 2
16346 2
12252 2
12253 1
10917 1
10208 0
16720 3
14306 1
13307 2
14320 2
16721 3
10216 2
12265 0
12266 0
16363 3
12268 1
16722 3
11702 3
10224 3
10225 1
12278 0
12280 0
14332 3
10237 2
16353 2
Original line number Original line Diff line number Diff line
from .models import *
from .deepwalk import DeepWalk
from .node2vec import Node2Vec
from .line import LINE
from .sdne import SDNE
from .struc2vec import Struc2Vec


__all__ = ["DeepWalk", "Node2Vec", "LINE", "SDNE", "Struc2Vec"]
 No newline at end of file
+54 −0
Original line number Original line Diff line number Diff line
import numpy as np


def create_alias_table(area_ratio):
    """

    :param area_ratio: sum(area_ratio)=1
    :return: accept,alias
    """
    l = len(area_ratio)
    accept, alias = [0] * l, [0] * l
    small, large = [], []
    area_ratio_ = np.array(area_ratio) * l
    for i, prob in enumerate(area_ratio_):
        if prob < 1.0:
            small.append(i)
        else:
            large.append(i)

    while small and large:
        small_idx, large_idx = small.pop(), large.pop()
        accept[small_idx] = area_ratio_[small_idx]
        alias[small_idx] = large_idx
        area_ratio_[large_idx] = area_ratio_[large_idx] - \
            (1 - area_ratio_[small_idx])
        if area_ratio_[large_idx] < 1.0:
            small.append(large_idx)
        else:
            large.append(large_idx)

    while large:
        large_idx = large.pop()
        accept[large_idx] = 1
    while small:
        small_idx = small.pop()
        accept[small_idx] = 1

    return accept, alias


def alias_sample(accept, alias):
    """

    :param accept:
    :param alias:
    :return: data_train index
    """
    N = len(accept)
    i = int(np.random.random()*N)
    r = np.random.random()
    if r < accept[i]:
        return i
    else:
        return alias[i]
Original line number Original line Diff line number Diff line
from __future__ import print_function


import numpy
from sklearn.metrics import f1_score, accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer


class TopKRanker(OneVsRestClassifier):
    def predict(self, X, top_k_list):
        probs = numpy.asarray(super(TopKRanker, self).predict_proba(X))
        all_labels = []
        for i, k in enumerate(top_k_list):
            probs_ = probs[i, :]
            labels = self.classes_[probs_.argsort()[-k:]].tolist()
            probs_[:] = 0
            probs_[labels] = 1
            all_labels.append(probs_)
        return numpy.asarray(all_labels)


class Classifier(object):

    def __init__(self, embeddings, clf):
        self.embeddings = embeddings
        self.clf = TopKRanker(clf)
        self.binarizer = MultiLabelBinarizer(sparse_output=True)

    def train(self, X, Y, Y_all):
        self.binarizer.fit(Y_all)
        X_train = [self.embeddings[x] for x in X]
        Y = self.binarizer.transform(Y)
        self.clf.fit(X_train, Y)

    def evaluate(self, X, Y):
        top_k_list = [len(l) for l in Y]
        Y_ = self.predict(X, top_k_list)
        Y = self.binarizer.transform(Y)
        averages = ["micro", "macro", "samples", "weighted"]
        results = {}
        for average in averages:
            results[average] = f1_score(Y, Y_, average=average)
        results['acc'] = accuracy_score(Y,Y_)
        print('-------------------')
        print(results)
        return results
        print('-------------------')

    def predict(self, X, top_k_list):
        X_ = numpy.asarray([self.embeddings[x] for x in X])
        Y = self.clf.predict(X_, top_k_list=top_k_list)
        return Y

    def split_train_evaluate(self, X, Y, train_precent, seed=0):
        state = numpy.random.get_state()

        training_size = int(train_precent * len(X))
        numpy.random.seed(seed)
        shuffle_indices = numpy.random.permutation(numpy.arange(len(X)))
        X_train = [X[shuffle_indices[i]] for i in range(training_size)]
        Y_train = [Y[shuffle_indices[i]] for i in range(training_size)]
        X_test = [X[shuffle_indices[i]] for i in range(training_size, len(X))]
        Y_test = [Y[shuffle_indices[i]] for i in range(training_size, len(X))]

        self.train(X_train, Y_train, Y)
        numpy.random.set_state(state)
        return self.evaluate(X_test, Y_test)


def read_node_label(filename, skip_head=False):
    fin = open(filename, 'r')
    X = []
    Y = []
    while 1:
        if skip_head:
            fin.readline()
        l = fin.readline()
        if l == '':
            break
        vec = l.strip().split(' ')
        X.append(vec[0])
        Y.append(vec[1:])
    fin.close()
    return X, Y
Original line number Original line Diff line number Diff line
# -*- coding:utf-8 -*-

"""



Author:

    Weichen Shen,wcshen1994@163.com



Reference:

    [1] Perozzi B, Al-Rfou R, Skiena S. Deepwalk: Online learning of social representations[C]//Proceedings of the 20th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2014: 701-710.(http://www.perozzi.net/publications/14_kdd_deepwalk.pdf)



"""
import walker
from gensim.models import Word2Vec
import pandas as pd


class DeepWalk:
    def __init__(self, graph, walk_length, num_walks, workers=16):

        self.graph = graph
        self.w2v_model = None
        self._embeddings = {}
        self.walker = walker.RandomWalker(
            graph, p=1, q=1, )
        self.sentences = self.walker.simulate_walks(
            num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1)

    def train(self, embed_size=6, window_size=5, workers=16, iter=4, **kwargs):

        kwargs["sentences"] = self.sentences
        kwargs["min_count"] = kwargs.get("min_count", 0)
        kwargs["size"] = embed_size
        kwargs["sg"] = 1  # skip gram
        kwargs["hs"] = 1  # deepwalk use Hierarchical Softmax
        kwargs["workers"] = workers
        kwargs["window"] = window_size
        kwargs["iter"] = iter

        print("Learning embedding vectors...")
        model = Word2Vec(**kwargs)
        print("Learning embedding vectors done!")

        self.w2v_model = model
        return model

    def get_embeddings(self,):
        if self.w2v_model is None:
            print("model not train")
            return {}

        self._embeddings = {}
        for word in self.graph.nodes():
            self._embeddings[word] = self.w2v_model.wv[word]

        return self._embeddings
Original line number Original line Diff line number Diff line

import numpy as np
import time
from sklearn.linear_model import LogisticRegression
import classify
import deepwalk
import matplotlib.pyplot as plt
import networkx as nx
from sklearn.manifold import TSNE
from gensim.models import Word2Vec
from multiprocessing import Pool
from itertools import repeat

def evaluate_embeddings(embeddings):
    X, Y = classify.read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = classify.Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)

def read(arr):
    G = nx.Graph()
    for a in arr:
        print(a[0])
        if not G.has_node(a[0]):
            G.add_node(a[0])
        if not G.has_node(a[1]):
            G.add_node(a[1])
        G.add_edge(a[0],a[1],weight=1)
    return G

def plot_embeddings(embeddings,):
    X, Y = classify.read_node_label('../data/wiki/wiki_labels.txt')

    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)
    print(emb_list.shape)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()
def Affect(z,i):
    #for i in z.wv.index2entity:
        # key = i + 1
        # if key in z.wv.index2entity:
        print(i)
        print(z.index2entity.index(i))
        # print(z.wv.vectors[z.wv.index2entity.index(key)])
        x = z.index2entity.index(i)
        return z.vectors[x]
def read_all():
    data = np.load('graph/test_Bip.npy',allow_pickle=True)
    id=0
    results = [] #a retirer
    print(len(data),len(data[0]),data[0])
    for x in data:
        w = time.time()
        G=read(x)
        #print("A",len(G))
        model = deepwalk.DeepWalk(G, walk_length=5, num_walks=4, workers=1)
        z = model.train(window_size=5, iter=3)
        print(len(z.wv.vectors))
        result = np.zeros((116835, int(6)))

        for i in z.wv.index2entity:
                x = z.wv.index2entity.index(i)
                result[i] = z.wv.vectors[x]
        results.append(result)
        name = str('graph/data_train.npy')
        name = name[:name.index('.')]
        np.save(name+"\\transformed_"+str(id),results)
        print(id, "DONE")
        id += 1
    np.save('graph/test_train.npy', results)
    return model
if __name__ == "__main__":
    '''G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                         create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])

    model = deepwalk.DeepWalk(G, walk_length=15, num_walks=80, workers=1)
    z = model.train(window_size=5, iter=3)
    print(len(z.wv.vectors))
    result = np.zeros((2405, int(64)))
    for i in range(len(z.wv.vectors)):
        key = str(i + 1)
        if key in z.wv.index2entity:
            result[i] = z.wv.vectors[z.wv.index2entity.index(key)]
    result = np.asarray(result)
    print("result shape",result.shape)'''
    model = read_all()
    #embeddings = model.get_embeddings()
    #print(embeddings)
    #evaluate_embeddings(embeddings)
    #plot_embeddings(embeddings)
+213 −0
Original line number Original line Diff line number Diff line
# -*- coding:utf-8 -*-

"""



Author:

    Weichen Shen,wcshen1994@163.com



Reference:

    [1] Tang J, Qu M, Wang M, et al. Line: Large-scale information network embedding[C]//Proceedings of the 24th International Conference on World Wide Web. International World Wide Web Conferences Steering Committee, 2015: 1067-1077.(https://arxiv.org/pdf/1503.03578.pdf)



"""
import math
import random

import numpy as np
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.layers import Embedding, Input, Lambda
from tensorflow.python.keras.models import Model
import alias
import utils
import time


def line_loss(y_true, y_pred):
    return -K.mean(K.log(K.sigmoid(y_true*y_pred)))


def create_model(numNodes, embedding_size, order='second'):

    v_i = Input(shape=(1,))
    v_j = Input(shape=(1,))

    first_emb = Embedding(numNodes, embedding_size, name='first_emb')
    second_emb = Embedding(numNodes, embedding_size, name='second_emb')
    context_emb = Embedding(numNodes, embedding_size, name='context_emb')

    v_i_emb = first_emb(v_i)
    v_j_emb = first_emb(v_j)

    v_i_emb_second = second_emb(v_i)
    v_j_context_emb = context_emb(v_j)

    first = Lambda(lambda x: tf.reduce_sum(
        x[0]*x[1], axis=-1, keepdims=False), name='first_order')([v_i_emb, v_j_emb])
    second = Lambda(lambda x: tf.reduce_sum(
        x[0]*x[1], axis=-1, keepdims=False), name='second_order')([v_i_emb_second, v_j_context_emb])

    if order == 'first':
        output_list = [first]
    elif order == 'second':
        output_list = [second]
    else:
        output_list = [first, second]

    model = Model(inputs=[v_i, v_j], outputs=output_list)

    return model, {'first': first_emb, 'second': second_emb}


class LINE:
    def __init__(self, graph, embedding_size=8, negative_ratio=5, order='second',):
        """

        :param graph:
        :param embedding_size:
        :param negative_ratio:
        :param order: 'first','second','all'
        """
        if order not in ['first', 'second', 'all']:
            raise ValueError('mode must be fisrt,second,or all')

        self.graph = graph
        self.idx2node, self.node2idx = utils.preprocess_nxgraph(graph)
        self.use_alias = True

        self.rep_size = embedding_size
        self.order = order

        self._embeddings = {}
        self.negative_ratio = negative_ratio
        self.order = order

        self.node_size = graph.number_of_nodes()
        self.edge_size = graph.number_of_edges()
        self.samples_per_epoch = self.edge_size*(1+negative_ratio)

        self._gen_sampling_table()
        self.reset_model()

    def reset_training_config(self, batch_size, times):
        self.batch_size = batch_size
        self.steps_per_epoch = (
            (self.samples_per_epoch - 1) // self.batch_size + 1)*times

    def reset_model(self, opt='adam'):

        self.model, self.embedding_dict = create_model(
            self.node_size, self.rep_size, self.order)
        self.model.compile(opt, line_loss)
        self.batch_it = self.batch_iter(self.node2idx)

    def _gen_sampling_table(self):

        # create sampling table for vertex
        power = 0.75
        numNodes = self.node_size
        node_degree = np.zeros(numNodes)  # out degree
        node2idx = self.node2idx

        for edge in self.graph.edges():
            node_degree[node2idx[edge[0]]
                        ] += self.graph[edge[0]][edge[1]].get('weight', 1.0)

        total_sum = sum([math.pow(node_degree[i], power)
                         for i in range(numNodes)])
        norm_prob = [float(math.pow(node_degree[j], power)) /
                     total_sum for j in range(numNodes)]

        self.node_accept, self.node_alias = alias.create_alias_table(norm_prob)

        # create sampling table for edge
        numEdges = self.graph.number_of_edges()
        total_sum = sum([self.graph[edge[0]][edge[1]].get('weight', 1.0)
                         for edge in self.graph.edges()])
        norm_prob = [self.graph[edge[0]][edge[1]].get('weight', 1.0) *
                     numEdges / total_sum for edge in self.graph.edges()]

        self.edge_accept, self.edge_alias = alias.create_alias_table(norm_prob)

    def batch_iter(self, node2idx):

        edges = [(node2idx[x[0]], node2idx[x[1]]) for x in self.graph.edges()]

        data_size = self.graph.number_of_edges()
        shuffle_indices = np.random.permutation(np.arange(data_size))
        # positive or negative mod
        mod = 0
        mod_size = 1 + self.negative_ratio
        h = []
        t = []
        sign = 0
        count = 0
        start_index = 0
        end_index = min(start_index + self.batch_size, data_size)
        while True:
            if mod == 0:

                h = []
                t = []
                for i in range(start_index, end_index):
                    if random.random() >= self.edge_accept[shuffle_indices[i]]:
                        shuffle_indices[i] = self.edge_alias[shuffle_indices[i]]
                    cur_h = edges[shuffle_indices[i]][0]
                    cur_t = edges[shuffle_indices[i]][1]
                    h.append(cur_h)
                    t.append(cur_t)
                sign = np.ones(len(h))
            else:
                sign = np.ones(len(h))*-1
                t = []
                for i in range(len(h)):

                    t.append(alias.alias_sample(
                        self.node_accept, self.node_alias))

            if self.order == 'all':
                yield ([np.array(h), np.array(t)], [sign, sign])
            else:
                yield ([np.array(h), np.array(t)], [sign])
            mod += 1
            mod %= mod_size
            if mod == 0:
                start_index = end_index
                end_index = min(start_index + self.batch_size, data_size)

            if start_index >= data_size:
                count += 1
                mod = 0
                h = []
                shuffle_indices = np.random.permutation(np.arange(data_size))
                start_index = 0
                end_index = min(start_index + self.batch_size, data_size)

    def get_embeddings(self,):
        self._embeddings = {}
        result = np.zeros((100, int(64)))
        if self.order == 'first':
            embeddings = self.embedding_dict['first'].get_weights()[0]
        elif self.order == 'second':
            embeddings = self.embedding_dict['second'].get_weights()[0]
        else:
            embeddings = np.hstack((self.embedding_dict['first'].get_weights()[
                                   0], self.embedding_dict['second'].get_weights()[0]))
        idx2node = self.idx2node
        for i, embedding in enumerate(embeddings):
            self._embeddings[idx2node[i]] = embedding
            result[int(idx2node[i])] = embedding
        return self._embeddings, result

    def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1):
        self.reset_training_config(batch_size, times)
        hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch,
                                        verbose=verbose)
        return hist
Original line number Original line Diff line number Diff line

import numpy as np
import classify
from sklearn.linear_model import LogisticRegression
import line
import matplotlib.pyplot as plt
import networkx as nx
from sklearn.manifold import TSNE


def evaluate_embeddings(embeddings):
    X, Y = classify.read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = classify.Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)


def plot_embeddings(embeddings,):
    X, Y = classify.read_node_label('../data/wiki/wiki_labels.txt')

    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()

def read(arr):
	G = nx.Graph()
	for a,b in arr:
		if not G.has_node(a):
			G.add_node(a)
		if not G.has_node(b):
			G.add_node(b)
		G.add_edge(a,b,weight=1)
	return G

def read_all():
    data = np.load('graph/data_val.npy',allow_pickle=True)
    id=0
    for x in data:
        G=read(x)
        model = line.LINE(G, embedding_size=64, order='second')
        model.train(batch_size=120, epochs=100, verbose=2)
        embeddings, result = model.get_embeddings()
        result = np.asarray(result)
        result = np.asarray(result)
        name = str('graph/data_val.npy')
        name = name[:name.index('.')]
        np.save(name+"\\transformed_"+str(id),result)
        print(id,"DONE")
        id+=1
    return model
if __name__ == "__main__":
    '''G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                         create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])

    model = line.LINE(G, embedding_size=64, order='second')
    model.train(batch_size=1024, epochs=50, verbose=2)
    print("hnaya",model)
    embeddings, result = model.get_embeddings()
    #print(embeddings)
    result = np.asarray(embeddings)
    #print(result)
    #evaluate_embeddings(embeddings)
    #plot_embeddings(embeddings)'''
    model = read_all()
Original line number Original line Diff line number Diff line
# -*- coding:utf-8 -*-

"""



Author:

    Weichen Shen,wcshen1994@163.com



Reference:

    [1] Grover A, Leskovec J. node2vec: Scalable feature learning for networks[C]//Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2016: 855-864.(https://www.kdd.org/kdd2016/papers/files/rfp0218-groverA.pdf)



"""

from gensim.models import Word2Vec
import pandas as pd

from ..walker import RandomWalker


class Node2Vec:

    def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0):

        self.graph = graph
        self._embeddings = {}
        self.walker = RandomWalker(
            graph, p=p, q=q, use_rejection_sampling=use_rejection_sampling)

        print("Preprocess transition probs...")
        self.walker.preprocess_transition_probs()

        self.sentences = self.walker.simulate_walks(
            num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1)

    def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs):

        kwargs["sentences"] = self.sentences
        kwargs["min_count"] = kwargs.get("min_count", 0)
        kwargs["size"] = embed_size
        kwargs["sg"] = 1
        kwargs["hs"] = 0  # node2vec not use Hierarchical Softmax
        kwargs["workers"] = workers
        kwargs["window"] = window_size
        kwargs["iter"] = iter

        print("Learning embedding vectors...")
        model = Word2Vec(**kwargs)
        print("Learning embedding vectors done!")

        self.w2v_model = model

        return model

    def get_embeddings(self,):
        if self.w2v_model is None:
            print("model not train")
            return {}

        self._embeddings = {}
        for word in self.graph.nodes():
            self._embeddings[word] = self.w2v_model.wv[word]

        return self._embeddings
Original line number Original line Diff line number Diff line
import numpy as np



from ge.classify import read_node_label,Classifier

from ge import Node2Vec

from sklearn.linear_model import LogisticRegression



import matplotlib.pyplot as plt

import networkx as nx

from sklearn.manifold import TSNE



def evaluate_embeddings(embeddings):

    X, Y = read_node_label('../data/flight/labels-brazil-airports.txt',skip_head=True)

    tr_frac = 0.8

    print("Training classifier using {:.2f}% nodes...".format(

        tr_frac * 100))

    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())

    clf.split_train_evaluate(X, Y, tr_frac)





def plot_embeddings(embeddings,):

    X, Y = read_node_label('../data/flight/labels-brazil-airports.txt',skip_head=True)



    emb_list = []

    for k in X:

        emb_list.append(embeddings[k])

    emb_list = np.array(emb_list)



    model = TSNE(n_components=2)

    node_pos = model.fit_transform(emb_list)



    color_idx = {}

    for i in range(len(X)):

        color_idx.setdefault(Y[i][0], [])

        color_idx[Y[i][0]].append(i)



    for c, idx in color_idx.items():

        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)  # c=node_colors)

    plt.legend()

    plt.show()

if __name__ == "__main__":
    G = nx.read_edgelist('../data/flight/brazil-airports.edgelist', create_using=nx.DiGraph(), nodetype=None,
                         data=[('weight', int)])

    model = Node2Vec(G, 10, 80, workers=1, p=0.25, q=2, use_rejection_sampling=0)
    model.train()
    embeddings = model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
Original line number Original line Diff line number Diff line

import numpy as np

from ge.classify import read_node_label, Classifier
from ge import Node2Vec
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt
import networkx as nx
from sklearn.manifold import TSNE


def evaluate_embeddings(embeddings):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)


def plot_embeddings(embeddings,):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')

    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()


if __name__ == "__main__":
    G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                         create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)])
    model = Node2Vec(G, walk_length=10, num_walks=80,
                     p=0.25, q=4, workers=1, use_rejection_sampling=0)
    model.train(window_size = 5, iter = 3)
    embeddings=model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
+174 −0
Original line number Original line Diff line number Diff line
# -*- coding:utf-8 -*-

"""



Author:

    Weichen Shen,wcshen1994@163.com



Reference:

    [1] Wang D, Cui P, Zhu W. Structural deep network embedding[C]//Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2016: 1225-1234.(https://www.kdd.org/kdd2016/papers/files/rfp0191-wangAemb.pdf)



"""
import time

import numpy as np
import scipy.sparse as sp
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.callbacks import History
from tensorflow.python.keras.layers import Dense, Input
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.regularizers import l1_l2

from ..utils import preprocess_nxgraph


def l_2nd(beta):
    def loss_2nd(y_true, y_pred):
        b_ = np.ones_like(y_true)
        b_[y_true != 0] = beta
        x = K.square((y_true - y_pred) * b_)
        t = K.sum(x, axis=-1, )
        return K.mean(t)

    return loss_2nd


def l_1st(alpha):
    def loss_1st(y_true, y_pred):
        L = y_true
        Y = y_pred
        batch_size = tf.to_float(K.shape(L)[0])
        return alpha * 2 * tf.linalg.trace(tf.matmul(tf.matmul(Y, L, transpose_a=True), Y)) / batch_size

    return loss_1st


def create_model(node_size, hidden_size=[256, 128], l1=1e-5, l2=1e-4):
    A = Input(shape=(node_size,))
    L = Input(shape=(None,))
    fc = A
    for i in range(len(hidden_size)):
        if i == len(hidden_size) - 1:
            fc = Dense(hidden_size[i], activation='relu',
                       kernel_regularizer=l1_l2(l1, l2), name='1st')(fc)
        else:
            fc = Dense(hidden_size[i], activation='relu',
                       kernel_regularizer=l1_l2(l1, l2))(fc)
    Y = fc
    for i in reversed(range(len(hidden_size) - 1)):
        fc = Dense(hidden_size[i], activation='relu',
                   kernel_regularizer=l1_l2(l1, l2))(fc)

    A_ = Dense(node_size, 'relu', name='2nd')(fc)
    model = Model(inputs=[A, L], outputs=[A_, Y])
    emb = Model(inputs=A, outputs=Y)
    return model, emb


class SDNE(object):
    def __init__(self, graph, hidden_size=[32, 16], alpha=1e-6, beta=5., nu1=1e-5, nu2=1e-4, ):

        self.graph = graph
        # self.g.remove_edges_from(self.g.selfloop_edges())
        self.idx2node, self.node2idx = preprocess_nxgraph(self.graph)

        self.node_size = self.graph.number_of_nodes()
        self.hidden_size = hidden_size
        self.alpha = alpha
        self.beta = beta
        self.nu1 = nu1
        self.nu2 = nu2

        self.A, self.L = self._create_A_L(
            self.graph, self.node2idx)  # Adj Matrix,L Matrix
        self.reset_model()
        self.inputs = [self.A, self.L]
        self._embeddings = {}

    def reset_model(self, opt='adam'):

        self.model, self.emb_model = create_model(self.node_size, hidden_size=self.hidden_size, l1=self.nu1,
                                                  l2=self.nu2)
        self.model.compile(opt, [l_2nd(self.beta), l_1st(self.alpha)])
        self.get_embeddings()

    def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1):
        if batch_size >= self.node_size:
            if batch_size > self.node_size:
                print('batch_size({0}) > node_size({1}),set batch_size = {1}'.format(
                    batch_size, self.node_size))
                batch_size = self.node_size
            return self.model.fit([self.A.todense(), self.L.todense()], [self.A.todense(), self.L.todense()],
                                  batch_size=batch_size, epochs=epochs, initial_epoch=initial_epoch, verbose=verbose,
                                  shuffle=False, )
        else:
            steps_per_epoch = (self.node_size - 1) // batch_size + 1
            hist = History()
            hist.on_train_begin()
            logs = {}
            for epoch in range(initial_epoch, epochs):
                start_time = time.time()
                losses = np.zeros(3)
                for i in range(steps_per_epoch):
                    index = np.arange(
                        i * batch_size, min((i + 1) * batch_size, self.node_size))
                    A_train = self.A[index, :].todense()
                    L_mat_train = self.L[index][:, index].todense()
                    inp = [A_train, L_mat_train]
                    batch_losses = self.model.train_on_batch(inp, inp)
                    losses += batch_losses
                losses = losses / steps_per_epoch

                logs['loss'] = losses[0]
                logs['2nd_loss'] = losses[1]
                logs['1st_loss'] = losses[2]
                epoch_time = int(time.time() - start_time)
                hist.on_epoch_end(epoch, logs)
                if verbose > 0:
                    print('Epoch {0}/{1}'.format(epoch + 1, epochs))
                    print('{0}s - loss: {1: .4f} - 2nd_loss: {2: .4f} - 1st_loss: {3: .4f}'.format(
                        epoch_time, losses[0], losses[1], losses[2]))
            return hist

    def evaluate(self, ):
        return self.model.evaluate(x=self.inputs, y=self.inputs, batch_size=self.node_size)

    def get_embeddings(self):
        self._embeddings = {}
        embeddings = self.emb_model.predict(self.A.todense(), batch_size=self.node_size)
        look_back = self.idx2node
        for i, embedding in enumerate(embeddings):
            self._embeddings[look_back[i]] = embedding

        return self._embeddings

    def _create_A_L(self, graph, node2idx):
        node_size = graph.number_of_nodes()
        A_data = []
        A_row_index = []
        A_col_index = []

        for edge in graph.edges():
            v1, v2 = edge
            edge_weight = graph[v1][v2].get('weight', 1)

            A_data.append(edge_weight)
            A_row_index.append(node2idx[v1])
            A_col_index.append(node2idx[v2])

        A = sp.csr_matrix((A_data, (A_row_index, A_col_index)), shape=(node_size, node_size))
        A_ = sp.csr_matrix((A_data + A_data, (A_row_index + A_col_index, A_col_index + A_row_index)),
                           shape=(node_size, node_size))

        D = sp.diags(A_.sum(axis=1).flatten().tolist()[0])
        L = D - A_
        return A, L
Original line number Original line Diff line number Diff line

import numpy as np

from ge.classify import read_node_label, Classifier
from ge import SDNE
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt
import networkx as nx
from sklearn.manifold import TSNE


def evaluate_embeddings(embeddings):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)


def plot_embeddings(embeddings,):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')

    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)
    plt.legend()
    plt.show()


if __name__ == "__main__":
    G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                         create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])

    model = SDNE(G, hidden_size=[256, 128],)
    model.train(batch_size=3000, epochs=40, verbose=2)
    embeddings = model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
Original line number Original line Diff line number Diff line
from .deepwalk import DeepWalk
from .node2vec import Node2Vec
from .line import LINE
from .sdne import SDNE
from .struc2vec import Struc2Vec


__all__ = ["DeepWalk", "Node2Vec", "LINE", "SDNE", "Struc2Vec"]
+14 −0
Original line number Original line Diff line number Diff line
*.pyc
.DS_Store
target
bin
build
.gradle
*.iml
*.ipr
*.iws
*.log
.classpath
.project
.settings
.idea
 No newline at end of file
+98 −0

File added.

Preview size limit exceeded, changes collapsed.

Skit-Cluster.py

0 → 100644
+175 −0

File added.

Preview size limit exceeded, changes collapsed.

Train2.py

0 → 100644
+97 −0

File added.

Preview size limit exceeded, changes collapsed.

Voisins.py

0 → 100644
+157 −0

File added.

Preview size limit exceeded, changes collapsed.

example1.model

0 → 100644
+3 −0

File added.

Preview size limit exceeded, changes collapsed.

generation.py

0 → 100644
+67 −0

File added.

Preview size limit exceeded, changes collapsed.

graph_node_add.py

0 → 100644
+16 −0

File added.

Preview size limit exceeded, changes collapsed.

merge.py

0 → 100644
+27 −0

File added.

Preview size limit exceeded, changes collapsed.

merge_all.py

0 → 100644
+26 −0

File added.

Preview size limit exceeded, changes collapsed.

merge_embaded.py

0 → 100644
+18 −0

File added.

Preview size limit exceeded, changes collapsed.

model.h5

0 → 100644
+551 KiB

File added.

Preview size limit exceeded, changes collapsed.

model2.h5

0 → 100644
+5.05 MiB

File added.

Preview size limit exceeded, changes collapsed.

split_data.py

0 → 100644
+83 −0

File added.

Preview size limit exceeded, changes collapsed.

train.py

0 → 100644
+118 −0

File added.

Preview size limit exceeded, changes collapsed.

transformation_clus.py

0 → 100644
+119 −0

File added.

Preview size limit exceeded, changes collapsed.