Skip to content
Snippets Groups Projects
Commit bee165d8 authored by Ikenna Oluigbo's avatar Ikenna Oluigbo
Browse files

LP

parent 996d673a
No related branches found
No related tags found
No related merge requests found
embeds.py 0 → 100644
import numpy as np
import networkx as nx
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.manifold import TSNE
from sklearn.linear_model import LogisticRegression
from build_graph import node_labels
from CNI import CNI
labels_dict = node_labels()
labels = list(labels_dict.values())
_ , CNI_log = CNI()
raw_list = []
#Add CNI to original embeddings
def embs_with_CNI():
file = open('filename.emb', 'r') #Enter emb file
f = file.readlines()
del f[0] #Removes first row in learned embeddings
for emb in f:
temp = emb.split() #temp is a list of each row in the embedding
temp = [float(x) for x in temp]
temp[0] = int(temp[0]) #Convert the nodes from float to integer
get_cni = CNI_log[int(temp[0])]
temp.append(str(get_cni))
raw_list.append(temp)
embeddings = np.array([i for i in raw_list])
np.savetxt('filename.txt', embeddings, fmt='%s') #Output file / filepath
#Unpickle .pkl embedding files from LINE and add CNI
def unpickle():
raw_l = []
H = nx.read_gpickle('filename.pkl') #Enter pkl file
for k, v in H.items():
temp = list(v)
get_cni = CNI_log[int(k)]
temp.append(get_cni)
raw_l.append([int(k), *temp])
embeddings = np.array([i for i in raw_l])
np.savetxt('filename.txt', embeddings, fmt='%s') #Output file / filepath
#unpickle()
def read_EmbsCNI(file):
for path in file:
extension = os.path.splitext(path)[-1].lower()
filebody = os.path.splitext(path)[0].upper()
if extension == '.emb':
name = "GRAPH WITH INHERENT LABELS // " + filebody
np_emb = np.loadtxt(open(file[-1]), skiprows = 1)
nodes = [int(x[:1]) for x in np_emb]
features = [labels_dict[n] for n in nodes]
nodes_emb = [x[1:] for x in np_emb]
return (nodes, features, nodes_emb, name)
#model = TSNE(n_components=1)
#two_dim = model.fit_transform(nodes_emb)
#return (features, two_dim)
elif extension == '.txt':
name = "GRAPH WITH CNI ENCODING // " + filebody
np_embCNI = np.loadtxt(open(file[-1]))
np.random.shuffle(np_embCNI)
nodes = [int(x[:1]) for x in np_embCNI]
enc_feature = [labels_dict[n] for n in nodes]
nodes_emb = [x[1:] for x in np_embCNI]
return (nodes, enc_feature, nodes_emb, name)
elif extension == '.pkl':
name = "GRAPH WITH INHERENT LABELS // " + filebody
H = nx.read_gpickle(file[-1])
nodes = [int(k) for k, _ in H.items()]
features = [labels_dict[n] for n in nodes]
nodes_emb = [v for _, v in H.items()]
return (nodes, features, nodes_emb, name)
else:
print("file does not exist")
def LogReg(features, node_emb, name):
xtrain, xtest, ytrain, ytest = train_test_split(node_emb, features,
test_size = 0.3, random_state = 25)
model = LogisticRegression(class_weight='balanced')
model.fit(xtrain, ytrain)
LogisticRegression(C=1.0, class_weight='balanced', dual=False,
fit_intercept=True, intercept_scaling=1, max_iter=1500,
multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
predicted = model.predict(xtest)
accu = accuracy_score(ytest, predicted)
prs = precision_score(ytest, predicted, average='weighted')
rcs = recall_score(ytest, predicted, average='weighted')
print('>>>', name, 'LogReg', end='\n')
print('Link Prediction Accuracy Score: ', accu, end='\n')
print('Precision Score: ', prs, end='\n')
print('Recall Score: ', rcs)
print()
_, features, node_emb, name = read_EmbsCNI(['filename.emb']) #Read emb or txt file
#LogReg(features, node_emb, name)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment