Skip to content
Snippets Groups Projects
Commit 7725415a authored by yacinetouahria's avatar yacinetouahria
Browse files

final push

parents
No related branches found
No related tags found
1 merge request!1final push
Showing
with 1085 additions and 0 deletions
__pycache__/
__pycache__/
\ No newline at end of file
from __future__ import division
from __future__ import print_function
import logging
import os
import time
import numpy as np
import torch
from Ghypeddings.H2HGCN.models.base_models import NCModel
from Ghypeddings.H2HGCN.utils.data_utils import process_data
from Ghypeddings.H2HGCN.utils.train_utils import format_metrics, create_args
from Ghypeddings.H2HGCN.utils.pre_utils import *
import warnings
warnings.filterwarnings('ignore')
class H2HGCN:
def __init__(self,
adj,
features,
labels,
dim,
c=None,
num_layers=2,
bias=True,
act='leaky_relu',
select_manifold='lorentz',
num_centroid=10,
lr_stie=0.009,
stie_vars=[],
stiefel_optimizer='rsgd',
eucl_vars=[],
grad_clip=None,
optimizer='Adam',
weight_decay=0.01,
lr=0.01,
lr_scheduler='step',
lr_gamma=0.5,
step_lr_gamma=0.1,
step_lr_reduce_freq=500,
proj_init='xavier',
tie_weight=True,
cuda=0,
epochs=50,
min_epochs=50,
patience=None,
seed=42,
log_freq=1,
eval_freq=1,
val_prop=0.15,
test_prop=0.15,
double_precision=0,
dropout=0.1,
normalize_adj=False,
normalize_feats=True
):
self.args = create_args(dim,c,num_layers,bias,act,select_manifold,num_centroid,lr_stie,stie_vars,stiefel_optimizer,eucl_vars,grad_clip,optimizer,weight_decay,lr,lr_scheduler,lr_gamma,step_lr_gamma,step_lr_reduce_freq,proj_init,tie_weight,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
self.args.n_nodes = adj.shape[0]
self.args.feat_dim = features.shape[1]
self.args.n_classes = len(np.unique(labels))
self.data = process_data(self.args,adj,features,labels)
if int(self.args.double_precision):
torch.set_default_dtype(torch.float64)
self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
self.args.patience = self.args.epochs if not self.args.patience else int(self.args.patience)
self.model = NCModel(self.args)
self.optimizer, self.lr_scheduler, self.stiefel_optimizer, self.stiefel_lr_scheduler = set_up_optimizer_scheduler(True, self.args, self.model, self.args.lr, self.args.lr_stie)
if self.args.cuda is not None and int(self.args.cuda) >= 0 :
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
self.model = self.model.to(self.args.device)
for x, val in self.data.items():
if torch.is_tensor(self.data[x]):
self.data[x] = self.data[x].to(self.args.device)
self.best_emb = None
def fit(self):
logging.getLogger().setLevel(logging.INFO)
logging.info(f'Using: {self.args.device}')
tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
logging.info(f"Total number of parameters: {tot_params}")
t_total = time.time()
counter = 0
best_val_metrics = self.model.init_metric_dict()
best_losses = []
real_losses = []
train_losses = []
for epoch in range(self.args.epochs):
t = time.time()
self.model.train()
self.optimizer.zero_grad()
self.stiefel_optimizer.zero_grad()
embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
train_metrics['loss'].backward()
if self.args.grad_clip is not None:
max_norm = float(self.args.grad_clip)
all_params = list(self.model.parameters())
for param in all_params:
torch.nn.utils.clip_grad_norm_(param, max_norm)
self.optimizer.step()
self.stiefel_optimizer.step()
self.lr_scheduler.step()
self.stiefel_lr_scheduler.step()
train_losses.append(train_metrics['loss'].item())
if(len(best_losses) == 0):
best_losses.append(train_losses[0])
elif (best_losses[-1] > train_losses[-1]):
best_losses.append(train_losses[-1])
else:
best_losses.append(best_losses[-1])
if (epoch + 1) % self.args.log_freq == 0:
logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
'lr: {:04f}, stie_lr: {:04f}'.format(self.lr_scheduler.get_lr()[0], self.stiefel_lr_scheduler.get_lr()[0]),
format_metrics(train_metrics, 'train'),
'time: {:.4f}s'.format(time.time() - t)
]))
if (epoch + 1) % self.args.eval_freq == 0:
self.model.eval()
embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
real_losses.append(val_metrics['loss'].item())
if (epoch + 1) % self.args.log_freq == 0:
logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
if self.model.has_improved(best_val_metrics, val_metrics):
self.best_emb = embeddings
best_val_metrics = val_metrics
counter = 0
else:
counter += 1
if counter == self.args.patience and epoch > self.args.min_epochs:
logging.info("Early stopping")
break
logging.info("Training Finished!")
logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
return {'val':real_losses,'best':best_losses,'train':train_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
def predict(self):
self.model.eval()
embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
logging.info(" ".join([format_metrics(val_metrics, 'test')]))
return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
def save_embeddings(self):
#tb_embeddings_euc = self.model.manifold.log_map_zero(self.best_emb)
for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
#for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
hyp_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_hyp.csv')
#euc_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_euc.csv')
np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
#np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from Ghypeddings.H2HGCN.utils import *
class CentroidDistance(nn.Module):
"""
Implement a model that calculates the pairwise distances between node representations
and centroids
"""
def __init__(self, args, logger, manifold):
super(CentroidDistance, self).__init__()
self.args = args
self.logger = logger
self.manifold = manifold
self.debug = False
# centroid embedding
self.centroid_embedding = nn.Embedding(
args.num_centroid, args.dim,
sparse=False,
scale_grad_by_freq=False,
)
nn_init(self.centroid_embedding, self.args.proj_init)
args.eucl_vars.append(self.centroid_embedding)
def forward(self, node_repr, mask):
"""
Args:
node_repr: [node_num, dim]
mask: [node_num, 1] 1 denote real node, 0 padded node
return:
graph_centroid_dist: [1, num_centroid]
node_centroid_dist: [1, node_num, num_centroid]
"""
node_num = node_repr.size(0)
# broadcast and reshape node_repr to [node_num * num_centroid, dim]
node_repr = node_repr.unsqueeze(1).expand(
-1,
self.args.num_centroid,
-1).contiguous().view(-1, self.args.dim)
# broadcast and reshape centroid embeddings to [node_num * num_centroid, dim]
centroid_repr = self.manifold.exp_map_zero(self.centroid_embedding(th.arange(self.args.num_centroid).cuda().to(self.args.device)))
centroid_repr = centroid_repr.unsqueeze(0).expand(
node_num,
-1,
-1).contiguous().view(-1, self.args.dim)
# get distance
node_centroid_dist = self.manifold.distance(node_repr, centroid_repr)
node_centroid_dist = node_centroid_dist.view(1, node_num, self.args.num_centroid)
# average pooling over nodes
graph_centroid_dist = th.sum(node_centroid_dist, dim=1) / th.sum(mask)
return graph_centroid_dist, node_centroid_dist
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter
class Linear(Module):
"""
Simple Linear layer with dropout.
"""
def __init__(self, args, in_features, out_features, dropout, act, use_bias):
super(Linear, self).__init__()
self.dropout = dropout
self.linear = nn.Linear(in_features, out_features, use_bias)
self.act = act
args.eucl_vars.append(self.linear)
def forward(self, x):
hidden = self.linear.forward(x)
hidden = F.dropout(hidden, self.dropout, training=self.training)
out = self.act(hidden)
return out
\ No newline at end of file
"""Lorentz manifold."""
import torch
import torch as th
import torch.nn as nn
import numpy as np
from torch.autograd import Function, Variable
import torch
from Ghypeddings.H2HGCN.utils import *
from Ghypeddings.H2HGCN.utils.pre_utils import *
from Ghypeddings.H2HGCN.manifolds import *
from Ghypeddings.H2HGCN.utils.math_utils import arcosh, cosh, sinh
_eps = 1e-10
class LorentzManifold:
def __init__(self, args, eps=1e-3, norm_clip=1, max_norm=1e3):
self.args = args
self.eps = eps
self.norm_clip = norm_clip
self.max_norm = max_norm
def minkowski_dot(self, x, y, keepdim=True):
res = torch.sum(x * y, dim=-1) - 2 * x[..., 0] * y[..., 0]
if keepdim:
res = res.view(res.shape + (1,))
return res
def sqdist(self, x, y, c):
K = 1. / c
prod = self.minkowski_dot(x, y)
eps = {torch.float32: 1e-7, torch.float64: 1e-15}
theta = torch.clamp(-prod / K, min=1.0 + eps[x.dtype])
sqdist = K * arcosh(theta) ** 2
return torch.clamp(sqdist, max=50.0)
@staticmethod
def ldot(u, v, keepdim=False):
"""
Lorentzian Scalar Product
Args:
u: [batch_size, d + 1]
v: [batch_size, d + 1]
Return:
keepdim: False [batch_size]
keepdim: True [batch_size, 1]
"""
d = u.size(1) - 1
uv = u * v
uv = th.cat((-uv.narrow(1, 0, 1), uv.narrow(1, 1, d)), dim=1)
return th.sum(uv, dim=1, keepdim=keepdim)
def from_lorentz_to_poincare(self, x):
"""
Args:
u: [batch_size, d + 1]
"""
d = x.size(-1) - 1
return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
def from_poincare_to_lorentz(self, x):
"""
Args:
u: [batch_size, d]
"""
x_norm_square = th_dot(x, x)
return th.cat((1 + x_norm_square, 2 * x), dim=1) / (1 - x_norm_square + self.eps)
def distance(self, u, v):
d = -LorentzDot.apply(u, v)
dis = Acosh.apply(d, self.eps)
return dis
def normalize(self, w):
"""
Normalize vector such that it is located on the Lorentz
Args:
w: [batch_size, d + 1]
"""
d = w.size(-1) - 1
narrowed = w.narrow(-1, 1, d)
if self.max_norm:
narrowed = th.renorm(narrowed.view(-1, d), 2, 0, self.max_norm)
first = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
first = th.sqrt(first)
tmp = th.cat((first, narrowed), dim=1)
return tmp
def init_embed(self, embed, irange=1e-2):
embed.weight.data.uniform_(-irange, irange)
embed.weight.data.copy_(self.normalize(embed.weight.data))
def rgrad(self, p, d_p):
"""Riemannian gradient for Lorentz"""
u = d_p
x = p
u.narrow(-1, 0, 1).mul_(-1)
u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
return d_p
def exp_map_zero(self, v):
zeros = th.zeros_like(v)
zeros[:, 0] = 1
return self.exp_map_x(zeros, v)
def exp_map_x(self, p, d_p, d_p_normalize=True, p_normalize=True):
if d_p_normalize:
d_p = self.normalize_tan(p, d_p)
ldv = self.ldot(d_p, d_p, keepdim=True)
nd_p = th.sqrt(th.clamp(ldv + self.eps, _eps))
t = th.clamp(nd_p, max=self.norm_clip)
newp = (th.cosh(t) * p) + (th.sinh(t) * d_p / nd_p)
if p_normalize:
newp = self.normalize(newp)
return newp
def normalize_tan(self, x_all, v_all):
d = v_all.size(1) - 1
x = x_all.narrow(1, 1, d)
xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
tmp = th.sqrt(tmp)
return th.cat((xv / tmp, v_all.narrow(1, 1, d)), dim=1)
def log_map_zero(self, y, i=-1):
zeros = th.zeros_like(y)
zeros[:, 0] = 1
return self.log_map_x(zeros, y)
def log_map_x(self, x, y, normalize=False):
"""Logarithmic map on the Lorentz Manifold"""
xy = self.ldot(x, y).unsqueeze(-1)
tmp = th.sqrt(th.clamp(xy * xy - 1 + self.eps, _eps))
v = Acosh.apply(-xy, self.eps) / (
tmp
) * th.addcmul(y, xy, x)
if normalize:
result = self.normalize_tan(x, v)
else:
result = v
return result
def parallel_transport(self, x, y, v):
"""Parallel transport for Lorentz"""
v_ = v
x_ = x
y_ = y
xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
vnew = v_ + vy / (1 - xy) * (x_ + y_)
return vnew
def metric_tensor(self, x, u, v):
return self.ldot(u, v, keepdim=True)
class LorentzDot(Function):
@staticmethod
def forward(ctx, u, v):
ctx.save_for_backward(u, v)
return LorentzManifold.ldot(u, v)
@staticmethod
def backward(ctx, g):
u, v = ctx.saved_tensors
g = g.unsqueeze(-1).expand_as(u).clone()
g.narrow(-1, 0, 1).mul_(-1)
return g * v, g * u
class Acosh(Function):
@staticmethod
def forward(ctx, x, eps):
z = th.sqrt(th.clamp(x * x - 1 + eps, _eps))
ctx.save_for_backward(z)
ctx.eps = eps
xz = x + z
tmp = th.log(xz)
return tmp
@staticmethod
def backward(ctx, g):
z, = ctx.saved_tensors
z = th.clamp(z, min=ctx.eps)
z = g / z
return z, None
import torch as th
import torch.nn as nn
import numpy as np
from torch.autograd import Function, Variable
from Ghypeddings.clusterers.utils import *
_eps = 1e-10
class StiefelManifold:
def __init__(self, args, logger, eps=1e-3, norm_clip=1, max_norm=1e3):
self.args = args
self.logger = logger
self.eps = eps
self.norm_clip = norm_clip
self.max_norm = max_norm
def normalize(self, w):
return w
def init_embed(self, embed, irange=1e-2):
embed.weight.data.uniform_(-irange, irange)
embed.weight.data.copy_(self.normalize(embed.weight.data))
def symmetric(self, A):
return 0.5 * (A + A.t())
def rgrad(self, A, B):
out = B - A.mm(self.symmetric(A.transpose(0,1).mm(B)))
return out
def exp_map_x(self, A, ref):
data = A + ref
Q, R = data.qr()
# To avoid (any possible) negative values in the output matrix, we multiply the negative values by -1
sign = (R.diag().sign() + 0.5).sign().diag()
out = Q.mm(sign)
return out
from Ghypeddings.H2HGCN.manifolds.LorentzManifold import LorentzManifold
\ No newline at end of file
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score
import torch
import torch.nn as nn
import torch.nn.functional as F
import Ghypeddings.H2HGCN.models.encoders as encoders
from Ghypeddings.H2HGCN.models.encoders import H2HGCN
from Ghypeddings.H2HGCN.models.decoders import model2decoder
from Ghypeddings.H2HGCN.utils.eval_utils import acc_f1
from Ghypeddings.H2HGCN.manifolds import LorentzManifold
class BaseModel(nn.Module):
"""
Base model for graph embedding tasks.
"""
def __init__(self, args):
super(BaseModel, self).__init__()
self.c = torch.Tensor([1.]).cuda().to(args.device)
args.manifold = self.manifold = LorentzManifold(args)
args.feat_dim = args.feat_dim + 1
# add 1 for Lorentz as the degree of freedom is d - 1 with d dimensions
args.dim = args.dim + 1
self.nnodes = args.n_nodes
self.encoder = H2HGCN(args, 1)
def encode(self, x, hgnn_adj, hgnn_weight):
h = self.encoder.encode(x, hgnn_adj, hgnn_weight)
return h
def compute_metrics(self, embeddings, data, split):
raise NotImplementedError
def init_metric_dict(self):
raise NotImplementedError
def has_improved(self, m1, m2):
raise NotImplementedError
class NCModel(BaseModel):
"""
Base model for node classification task.
"""
def __init__(self, args):
super(NCModel, self).__init__(args)
self.decoder = model2decoder(self.c, args)
if args.n_classes > 2:
self.f1_average = 'micro'
else:
self.f1_average = 'binary'
self.weights = torch.Tensor([1.] * args.n_classes)
if not args.cuda == -1:
self.weights = self.weights.to(args.device)
def decode(self, h, adj, idx):
output = self.decoder.decode(h, adj)
return F.log_softmax(output[idx], dim=1)
def compute_metrics(self, embeddings, data, split):
idx = data[f'idx_{split}']
output = self.decode(embeddings, data['adj_train_norm'], idx)
loss = F.nll_loss(output, data['labels'][idx], self.weights)
acc, f1 , recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average)
metrics = {'loss': loss, 'acc': acc, 'f1': f1 , 'recall':recall,'precision':precision,'roc_auc':roc_auc}
return metrics
def init_metric_dict(self):
return {'acc': -1, 'f1': -1}
def has_improved(self, m1, m2):
return m1["f1"] < m2["f1"]
\ No newline at end of file
"""Graph decoders."""
import torch.nn as nn
import torch.nn.functional as F
from Ghypeddings.H2HGCN.layers.layers import Linear
class Decoder(nn.Module):
"""
Decoder abstract class for node classification tasks.
"""
def __init__(self, c):
super(Decoder, self).__init__()
self.c = c
def decode(self, x, adj):
if self.decode_adj:
input = (x, adj)
probs, _ = self.cls.forward(input)
else:
probs = self.cls.forward(x)
return probs
class MyDecoder(Decoder):
"""
Decoder abstract class for node classification tasks.
"""
def __init__(self, c, args):
super(MyDecoder, self).__init__(c)
self.input_dim = args.num_centroid
self.output_dim = args.n_classes
act = lambda x: x
self.cls = Linear(args, self.input_dim, self.output_dim, args.dropout, act, args.bias)
self.decode_adj = False
def decode(self, x, adj):
h = x
return super(MyDecoder, self).decode(h, adj)
model2decoder = MyDecoder
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import Ghypeddings.H2HGCN.utils.math_utils as pmath
import torch as th
from Ghypeddings.H2HGCN.utils import *
from Ghypeddings.H2HGCN.utils import pre_utils
from Ghypeddings.H2HGCN.utils.pre_utils import *
from Ghypeddings.H2HGCN.manifolds import *
from Ghypeddings.H2HGCN.layers.CentroidDistance import CentroidDistance
class H2HGCN(nn.Module):
def __init__(self, args, logger):
super(H2HGCN, self).__init__()
self.debug = False
self.args = args
self.logger = logger
self.set_up_params()
self.activation = nn.SELU()
fd = args.feat_dim - 1
self.linear = nn.Linear(
int(fd), int(args.dim),
)
nn_init(self.linear, self.args.proj_init)
self.args.eucl_vars.append(self.linear)
self.distance = CentroidDistance(args, logger, args.manifold)
def create_params(self):
"""
create the GNN params for a specific msg type
"""
msg_weight = []
layer = self.args.num_layers if not self.args.tie_weight else 1
for iii in range(layer):
M = th.zeros([self.args.dim-1, self.args.dim-1], requires_grad=True)
init_weight(M, 'orthogonal')
M = nn.Parameter(M)
self.args.stie_vars.append(M)
msg_weight.append(M)
return nn.ParameterList(msg_weight)
def set_up_params(self):
"""
set up the params for all message types
"""
self.type_of_msg = 1
for i in range(0, self.type_of_msg):
setattr(self, "msg_%d_weight" % i, self.create_params())
def apply_activation(self, node_repr):
"""
apply non-linearity for different manifolds
"""
if self.args.select_manifold == "poincare":
return self.activation(node_repr)
elif self.args.select_manifold == "lorentz":
return self.args.manifold.from_poincare_to_lorentz(
self.activation(self.args.manifold.from_lorentz_to_poincare(node_repr))
)
def split_graph_by_negative_edge(self, adj_mat, weight):
"""
Split the graph according to positive and negative edges.
"""
mask = weight > 0
neg_mask = weight < 0
pos_adj_mat = adj_mat * mask.long()
neg_adj_mat = adj_mat * neg_mask.long()
pos_weight = weight * mask.float()
neg_weight = -weight * neg_mask.float()
return pos_adj_mat, pos_weight, neg_adj_mat, neg_weight
def split_graph_by_type(self, adj_mat, weight):
"""
split the graph according to edge type for multi-relational datasets
"""
multi_relation_adj_mat = []
multi_relation_weight = []
for relation in range(1, self.args.edge_type):
mask = (weight.int() == relation)
multi_relation_adj_mat.append(adj_mat * mask.long())
multi_relation_weight.append(mask.float())
return multi_relation_adj_mat, multi_relation_weight
def split_input(self, adj_mat, weight):
return [adj_mat], [weight]
def p2k(self, x, c):
denom = 1 + c * x.pow(2).sum(-1, keepdim=True)
return 2 * x / denom
def k2p(self, x, c):
denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True))
return x / denom
def lorenz_factor(self, x, *, c=1.0, dim=-1, keepdim=False):
"""
Calculate Lorenz factors
"""
x_norm = x.pow(2).sum(dim=dim, keepdim=keepdim)
x_norm = torch.clamp(x_norm, 0, 0.9)
tmp = 1 / torch.sqrt(1 - c * x_norm)
return tmp
def from_lorentz_to_poincare(self, x):
"""
Args:
u: [batch_size, d + 1]
"""
d = x.size(-1) - 1
return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
def h2p(self, x):
return self.from_lorentz_to_poincare(x)
def from_poincare_to_lorentz(self, x, eps=1e-3):
"""
Args:
u: [batch_size, d]
"""
x_norm_square = x.pow(2).sum(-1, keepdim=True)
tmp = th.cat((1 + x_norm_square, 2 * x), dim=1)
tmp = tmp / (1 - x_norm_square)
return tmp
def p2h(self, x):
return self.from_poincare_to_lorentz(x)
def p2k(self, x, c=1.0):
denom = 1 + c * x.pow(2).sum(-1, keepdim=True)
return 2 * x / denom
def k2p(self, x, c=1.0):
denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True))
return x / denom
def h2k(self, x):
tmp = x.narrow(-1, 1, x.size(-1)-1) / x.narrow(-1, 0, 1)
return tmp
def k2h(self, x):
x_norm_square = x.pow(2).sum(-1, keepdim=True)
x_norm_square = torch.clamp(x_norm_square, max=0.9)
tmp = torch.ones((x.size(0),1)).cuda().to(self.args.device)
tmp1 = th.cat((tmp, x), dim=1)
tmp2 = 1.0 / torch.sqrt(1.0 - x_norm_square)
tmp3 = (tmp1 * tmp2)
return tmp3
def hyperbolic_mean(self, y, node_num, max_neighbor, real_node_num, weight, dim=0, c=1.0, ):
'''
y [node_num * max_neighbor, dim]
'''
x = y[0:real_node_num*max_neighbor, :]
weight_tmp = weight.view(-1,1)[0:real_node_num*max_neighbor, :]
x = self.h2k(x)
lamb = self.lorenz_factor(x, c=c, keepdim=True)
lamb = lamb * weight_tmp
lamb = lamb.view(real_node_num, max_neighbor, -1)
x = x.view(real_node_num, max_neighbor, -1)
k_mean = (torch.sum(lamb * x, dim=1, keepdim=True) / (torch.sum(lamb, dim=1, keepdim=True))).squeeze()
h_mean = self.k2h(k_mean)
virtual_mean = torch.cat((torch.tensor([[1.0]]), torch.zeros(1,y.size(-1)-1)), 1).cuda().to(self.args.device)
tmp = virtual_mean.repeat(node_num-real_node_num, 1)
mean = torch.cat((h_mean, tmp), 0)
return mean
def test_lor(self, A):
tmp1 = (A[:,0] * A[:,0]).view(-1)
tmp2 = A[:,1:]
tmp2 = th.diag(tmp2.mm(tmp2.transpose(0,1)))
return (tmp1 - tmp2)
def retrieve_params(self, weight, step):
"""
Args:
weight: a list of weights
step: a certain layer
"""
layer_weight = th.cat((th.zeros((self.args.dim-1, 1)).cuda().to(self.args.device), weight[step]), dim=1)
tmp = th.zeros((1, self.args.dim)).cuda().to(self.args.device)
tmp[0,0] = 1
layer_weight = th.cat((tmp, layer_weight), dim=0)
return layer_weight
def aggregate_msg(self, node_repr, adj_mat, weight, layer_weight, mask):
"""
message passing for a specific message type.
"""
node_num, max_neighbor = adj_mat.shape[0], adj_mat.shape[1]
combined_msg = node_repr.clone()
tmp = self.test_lor(node_repr)
msg = th.mm(node_repr, layer_weight) * mask
real_node_num = (mask>0).sum()
# select out the neighbors of each node
neighbors = th.index_select(msg, 0, adj_mat.view(-1))
combined_msg = self.hyperbolic_mean(neighbors, node_num, max_neighbor, real_node_num, weight)
return combined_msg
def get_combined_msg(self, step, node_repr, adj_mat, weight, mask):
"""
perform message passing in the tangent space of x'
"""
gnn_layer = 0 if self.args.tie_weight else step
combined_msg = None
for relation in range(0, self.type_of_msg):
layer_weight = self.retrieve_params(getattr(self, "msg_%d_weight" % relation), gnn_layer)
aggregated_msg = self.aggregate_msg(node_repr,
adj_mat[relation],
weight[relation],
layer_weight, mask)
combined_msg = aggregated_msg if combined_msg is None else (combined_msg + aggregated_msg)
return combined_msg
def encode(self, node_repr, adj_list, weight):
node_repr = self.activation(self.linear(node_repr))
adj_list, weight = self.split_input(adj_list, weight)
mask = torch.ones((node_repr.size(0),1)).cuda().to(self.args.device)
node_repr = self.args.manifold.exp_map_zero(node_repr)
for step in range(self.args.num_layers):
node_repr = node_repr * mask
tmp = node_repr
combined_msg = self.get_combined_msg(step, node_repr, adj_list, weight, mask)
combined_msg = (combined_msg) * mask
node_repr = combined_msg * mask
node_repr = self.apply_activation(node_repr) * mask
real_node_num = (mask>0).sum()
node_repr = self.args.manifold.normalize(node_repr)
_, node_centroid_sim = self.distance(node_repr, mask)
return node_centroid_sim.squeeze()
class Encoder(nn.Module):
"""
Encoder abstract class.
"""
def __init__(self, c):
super(Encoder, self).__init__()
self.c = c
def encode(self, x, adj):
if self.encode_graph:
input = (x, adj)
output, _ = self.layers.forward(input)
else:
output = self.layers.forward(x)
return output
from torch.optim import Adam
import torch as th
from torch.optim.optimizer import Optimizer, required
from Ghypeddings.H2HGCN.utils import *
import os
import math
class RiemannianSGD(Optimizer):
"""Riemannian stochastic gradient descent.
"""
def __init__(self, args, params, lr):
defaults = dict(lr=lr)
self.args = args
super(RiemannianSGD, self).__init__(params, defaults)
def step(self, lr=None):
"""
Performs a single optimization step.
"""
loss = None
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
d_p = p.grad.data
d_p = self.args.manifold.rgrad(p, d_p)
if lr is None:
lr = group['lr']
p.data = self.args.manifold.exp_map_x(p, -lr * d_p)
return loss
from Ghypeddings.H2HGCN.utils.pre_utils import *
\ No newline at end of file
"""Data utils functions for pre-processing and data loading."""
import os
import pickle as pkl
import sys
import networkx as nx
import numpy as np
import scipy.sparse as sp
import torch
from Ghypeddings.H2HGCN.utils.pre_utils import *
def convert_hgnn_adj(adj):
hgnn_adj = [[i] for i in range(adj.shape[0])]
hgnn_weight = [[1] for i in range(adj.shape[0])]
for i in range(adj.shape[0]):
for j in range(adj.shape[1]):
if adj[i,j] == 1:
hgnn_adj[i].append(j)
hgnn_weight[i].append(1)
max_len = max([len(i) for i in hgnn_adj])
normalize_weight(hgnn_adj, hgnn_weight)
hgnn_adj = pad_sequence(hgnn_adj, max_len)
hgnn_weight = pad_sequence(hgnn_weight, max_len)
hgnn_adj = np.array(hgnn_adj)
hgnn_weight = np.array(hgnn_weight)
return torch.from_numpy(hgnn_adj).cuda(), torch.from_numpy(hgnn_weight).cuda().float()
def process_data(args,adj,features,labels):
data = process_data_nc(args,adj,features,labels)
data['adj_train_norm'], data['features'] = process(
data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats
)
return data
def process(adj, features, normalize_adj, normalize_feats):
if sp.isspmatrix(features):
features = np.array(features.todense())
if normalize_feats:
features = normalize(features)
features = torch.Tensor(features)
if normalize_adj:
adj = normalize(adj)
adj = sparse_mx_to_torch_sparse_tensor(adj)
return adj, features
def normalize(mx):
"""Row-normalize sparse matrix."""
rowsum = np.array(mx.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
mx = r_mat_inv.dot(mx)
return mx
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
"""Convert a scipy sparse matrix to a torch sparse tensor."""
sparse_mx = sparse_mx.tocoo()
indices = torch.from_numpy(
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
)
values = torch.Tensor(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
return torch.sparse.FloatTensor(indices, values, shape)
def augment(adj, features, normalize_feats=True):
deg = np.squeeze(np.sum(adj, axis=0).astype(int))
deg[deg > 5] = 5
deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
const_f = torch.ones(features.size(0), 1)
features = torch.cat((features, deg_onehot, const_f), dim=1)
return features
def split_data(labels, val_prop, test_prop, seed):
np.random.seed(seed)
nb_nodes = labels.shape[0]
all_idx = np.arange(nb_nodes)
pos_idx = labels.nonzero()[0]
neg_idx = (1. - labels).nonzero()[0]
np.random.shuffle(pos_idx)
np.random.shuffle(neg_idx)
pos_idx = pos_idx.tolist()
neg_idx = neg_idx.tolist()
nb_pos_neg = min(len(pos_idx), len(neg_idx))
nb_val = round(val_prop * nb_pos_neg)
nb_test = round(test_prop * nb_pos_neg)
idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
nb_val + nb_test:]
idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
nb_val + nb_test:]
return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
def process_data_nc(args,adj,features,labels):
adj = sp.csr_matrix(adj)
hgnn_adj, hgnn_weight = convert_hgnn_adj(adj.todense())
idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
labels = torch.LongTensor(labels)
data = {'adj_train': adj, 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test, 'hgnn_adj': hgnn_adj, 'hgnn_weight': hgnn_weight}
return data
\ No newline at end of file
from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score
def acc_f1(output, labels, average='binary'):
preds = output.max(1)[1].type_as(labels)
if preds.is_cuda:
preds = preds.cpu()
labels = labels.cpu()
accuracy = accuracy_score(labels,preds)
f1 = f1_score(labels,preds , average=average)
recall = recall_score(labels,preds)
precision = precision_score(labels,preds )
roc_auc = roc_auc_score(labels,preds)
return accuracy, f1 , recall,precision, roc_auc
\ No newline at end of file
"""Math utils functions."""
import torch
def cosh(x, clamp=15):
return x.clamp(-clamp, clamp).cosh()
def sinh(x, clamp=15):
return x.clamp(-clamp, clamp).sinh()
def tanh(x, clamp=15):
return x.clamp(-clamp, clamp).tanh()
def arcosh(x):
return Arcosh.apply(x)
def arsinh(x):
return Arsinh.apply(x)
def artanh(x):
return Artanh.apply(x)
class Artanh(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
x = x.clamp(-1 + 1e-15, 1 - 1e-15)
ctx.save_for_backward(x)
z = x.double()
return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output / (1 - input ** 2)
class Arsinh(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)
z = x.double()
return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype)
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output / (1 + input ** 2) ** 0.5
class Arcosh(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
x = x.clamp(min=1.0 + 1e-15)
ctx.save_for_backward(x)
z = x.double()
return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype)
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output / (input ** 2 - 1) ** 0.5
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment