diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..c18dd8d83ceed1806b50b0aaa46beb7e335fff13 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/H2HGCN/.gitignore b/H2HGCN/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c --- /dev/null +++ b/H2HGCN/.gitignore @@ -0,0 +1 @@ +__pycache__/ \ No newline at end of file diff --git a/H2HGCN/__init__.py b/H2HGCN/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/H2HGCN/h2hgcn.py b/H2HGCN/h2hgcn.py new file mode 100644 index 0000000000000000000000000000000000000000..3db3c025b7b5cf85c6fe37fb21b6518135bcc6c8 --- /dev/null +++ b/H2HGCN/h2hgcn.py @@ -0,0 +1,160 @@ +from __future__ import division +from __future__ import print_function +import logging +import os +import time +import numpy as np +import torch +from Ghypeddings.H2HGCN.models.base_models import NCModel +from Ghypeddings.H2HGCN.utils.data_utils import process_data +from Ghypeddings.H2HGCN.utils.train_utils import format_metrics, create_args +from Ghypeddings.H2HGCN.utils.pre_utils import * +import warnings +warnings.filterwarnings('ignore') + +class H2HGCN: + def __init__(self, + adj, + features, + labels, + dim, + c=None, + num_layers=2, + bias=True, + act='leaky_relu', + select_manifold='lorentz', + num_centroid=100, + lr_stie=1, + stie_vars=[], + stiefel_optimizer='rsgd', + eucl_vars=[], + grad_clip=None, + optimizer='Adam', + weight_decay=0.1, + lr=1, + lr_scheduler='step', + lr_gamma=.5, + step_lr_gamma=0.99, + step_lr_reduce_freq=20, + proj_init='xavier', + tie_weight=True, + cuda=0, + epochs=50, + min_epochs=50, + patience=None, + seed=42, + log_freq=1, + eval_freq=1, + val_prop=.3, + test_prop=0.3, + double_precision=0, + dropout=0.1, + normalize_adj=False, + normalize_feats=True + ): + + self.args = create_args(dim,c,num_layers,bias,act,select_manifold,num_centroid,lr_stie,stie_vars,stiefel_optimizer,eucl_vars,grad_clip,optimizer,weight_decay,lr,lr_scheduler,lr_gamma,step_lr_gamma,step_lr_reduce_freq,proj_init,tie_weight,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats) + + self.args.n_nodes = adj.shape[0] + self.args.feat_dim = features.shape[1] + self.args.n_classes = len(np.unique(labels)) + self.data = process_data(self.args,adj,features,labels) + + if int(self.args.double_precision): + torch.set_default_dtype(torch.float64) + + self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu' + self.args.patience = self.args.epochs if not self.args.patience else int(self.args.patience) + self.model = NCModel(self.args) + self.optimizer, self.lr_scheduler, self.stiefel_optimizer, self.stiefel_lr_scheduler = set_up_optimizer_scheduler(True, self.args, self.model, self.args.lr, self.args.lr_stie) + + if self.args.cuda is not None and int(self.args.cuda) >= 0 : + os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda) + self.model = self.model.to(self.args.device) + for x, val in self.data.items(): + if torch.is_tensor(self.data[x]): + self.data[x] = self.data[x].to(self.args.device) + self.best_emb = None + + + def fit(self): + logging.getLogger().setLevel(logging.INFO) + logging.info(f'Using: {self.args.device}') + tot_params = sum([np.prod(p.size()) for p in self.model.parameters()]) + logging.info(f"Total number of parameters: {tot_params}") + + t_total = time.time() + counter = 0 + best_val_metrics = self.model.init_metric_dict() + + best_losses = [] + real_losses = [] + + for epoch in range(self.args.epochs): + t = time.time() + self.model.train() + self.optimizer.zero_grad() + self.stiefel_optimizer.zero_grad() + embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight']) + train_metrics = self.model.compute_metrics(embeddings, self.data, 'train') + train_metrics['loss'].backward() + if self.args.grad_clip is not None: + max_norm = float(self.args.grad_clip) + all_params = list(self.model.parameters()) + for param in all_params: + torch.nn.utils.clip_grad_norm_(param, max_norm) + self.optimizer.step() + self.stiefel_optimizer.step() + self.lr_scheduler.step() + self.stiefel_lr_scheduler.step() + + real_losses.append(train_metrics['loss'].item()) + if(len(best_losses) == 0): + best_losses.append(real_losses[0]) + elif (best_losses[-1] > real_losses[-1]): + best_losses.append(real_losses[-1]) + else: + best_losses.append(best_losses[-1]) + + if (epoch + 1) % self.args.log_freq == 0: + logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), + 'lr: {:04f}, stie_lr: {:04f}'.format(self.lr_scheduler.get_lr()[0], self.stiefel_lr_scheduler.get_lr()[0]), + format_metrics(train_metrics, 'train'), + 'time: {:.4f}s'.format(time.time() - t) + ])) + + if (epoch + 1) % self.args.eval_freq == 0: + self.model.eval() + embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight']) + val_metrics = self.model.compute_metrics(embeddings, self.data, 'val') + if (epoch + 1) % self.args.log_freq == 0: + logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')])) + if self.model.has_improved(best_val_metrics, val_metrics): + self.best_emb = embeddings + best_val_metrics = val_metrics + counter = 0 + else: + counter += 1 + if counter == self.args.patience and epoch > self.args.min_epochs: + logging.info("Early stopping") + break + + logging.info("Training Finished!") + logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + return {'real':real_losses,'best':best_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total + + def predict(self): + self.model.eval() + embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight']) + val_metrics = self.model.compute_metrics(embeddings, self.data, 'test') + logging.info(" ".join([format_metrics(val_metrics, 'test')])) + return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc'] + + def save_embeddings(self): + #tb_embeddings_euc = self.model.manifold.log_map_zero(self.best_emb) + for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1))) + #for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1))) + hyp_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_hyp.csv') + #euc_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_euc.csv') + np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',') + #np.savetxt(euc_file_path, for_classification_euc, delimiter=',') diff --git a/H2HGCN/layers/CentroidDistance.py b/H2HGCN/layers/CentroidDistance.py new file mode 100644 index 0000000000000000000000000000000000000000..546447492330997a479f47d34b3ad22094d45288 --- /dev/null +++ b/H2HGCN/layers/CentroidDistance.py @@ -0,0 +1,56 @@ +import torch as th +import torch.nn as nn +import torch.nn.functional as F +from Ghypeddings.H2HGCN.utils import * + +class CentroidDistance(nn.Module): + """ + Implement a model that calculates the pairwise distances between node representations + and centroids + """ + def __init__(self, args, logger, manifold): + super(CentroidDistance, self).__init__() + self.args = args + self.logger = logger + self.manifold = manifold + self.debug = False + + # centroid embedding + self.centroid_embedding = nn.Embedding( + args.num_centroid, args.dim, + sparse=False, + scale_grad_by_freq=False, + ) + nn_init(self.centroid_embedding, self.args.proj_init) + args.eucl_vars.append(self.centroid_embedding) + + def forward(self, node_repr, mask): + """ + Args: + node_repr: [node_num, dim] + mask: [node_num, 1] 1 denote real node, 0 padded node + return: + graph_centroid_dist: [1, num_centroid] + node_centroid_dist: [1, node_num, num_centroid] + """ + node_num = node_repr.size(0) + + # broadcast and reshape node_repr to [node_num * num_centroid, dim] + node_repr = node_repr.unsqueeze(1).expand( + -1, + self.args.num_centroid, + -1).contiguous().view(-1, self.args.dim) + + # broadcast and reshape centroid embeddings to [node_num * num_centroid, dim] + centroid_repr = self.manifold.exp_map_zero(self.centroid_embedding(th.arange(self.args.num_centroid).cuda().to(self.args.device))) + centroid_repr = centroid_repr.unsqueeze(0).expand( + node_num, + -1, + -1).contiguous().view(-1, self.args.dim) + # get distance + node_centroid_dist = self.manifold.distance(node_repr, centroid_repr) + node_centroid_dist = node_centroid_dist.view(1, node_num, self.args.num_centroid) + # average pooling over nodes + graph_centroid_dist = th.sum(node_centroid_dist, dim=1) / th.sum(mask) + return graph_centroid_dist, node_centroid_dist + diff --git a/H2HGCN/layers/__init__.py b/H2HGCN/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/H2HGCN/layers/__init__.py @@ -0,0 +1 @@ + diff --git a/H2HGCN/layers/layers.py b/H2HGCN/layers/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..48d5c1f2799dbc0cb8dc5fbbc4b8236de4dc9abf --- /dev/null +++ b/H2HGCN/layers/layers.py @@ -0,0 +1,24 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + +class Linear(Module): + """ + Simple Linear layer with dropout. + """ + + def __init__(self, args, in_features, out_features, dropout, act, use_bias): + super(Linear, self).__init__() + self.dropout = dropout + self.linear = nn.Linear(in_features, out_features, use_bias) + self.act = act + args.eucl_vars.append(self.linear) + + def forward(self, x): + hidden = self.linear.forward(x) + hidden = F.dropout(hidden, self.dropout, training=self.training) + out = self.act(hidden) + return out \ No newline at end of file diff --git a/H2HGCN/manifolds/LorentzManifold.py b/H2HGCN/manifolds/LorentzManifold.py new file mode 100644 index 0000000000000000000000000000000000000000..1ae351ce44b283e97f1bceb23eaf2bbcb9fa791b --- /dev/null +++ b/H2HGCN/manifolds/LorentzManifold.py @@ -0,0 +1,194 @@ +"""Lorentz manifold.""" +import torch +import torch as th +import torch.nn as nn +import numpy as np +from torch.autograd import Function, Variable +import torch +from Ghypeddings.H2HGCN.utils import * +from Ghypeddings.H2HGCN.utils.pre_utils import * +from Ghypeddings.H2HGCN.manifolds import * +from Ghypeddings.H2HGCN.utils.math_utils import arcosh, cosh, sinh + +_eps = 1e-10 + +class LorentzManifold: + + def __init__(self, args, eps=1e-3, norm_clip=1, max_norm=1e3): + self.args = args + self.eps = eps + self.norm_clip = norm_clip + self.max_norm = max_norm + + def minkowski_dot(self, x, y, keepdim=True): + res = torch.sum(x * y, dim=-1) - 2 * x[..., 0] * y[..., 0] + if keepdim: + res = res.view(res.shape + (1,)) + return res + + + def sqdist(self, x, y, c): + K = 1. / c + prod = self.minkowski_dot(x, y) + eps = {torch.float32: 1e-7, torch.float64: 1e-15} + theta = torch.clamp(-prod / K, min=1.0 + eps[x.dtype]) + sqdist = K * arcosh(theta) ** 2 + return torch.clamp(sqdist, max=50.0) + + + @staticmethod + def ldot(u, v, keepdim=False): + """ + Lorentzian Scalar Product + Args: + u: [batch_size, d + 1] + v: [batch_size, d + 1] + Return: + keepdim: False [batch_size] + keepdim: True [batch_size, 1] + """ + d = u.size(1) - 1 + uv = u * v + uv = th.cat((-uv.narrow(1, 0, 1), uv.narrow(1, 1, d)), dim=1) + return th.sum(uv, dim=1, keepdim=keepdim) + + def from_lorentz_to_poincare(self, x): + """ + Args: + u: [batch_size, d + 1] + """ + d = x.size(-1) - 1 + return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1) + + def from_poincare_to_lorentz(self, x): + """ + Args: + u: [batch_size, d] + """ + x_norm_square = th_dot(x, x) + return th.cat((1 + x_norm_square, 2 * x), dim=1) / (1 - x_norm_square + self.eps) + + def distance(self, u, v): + d = -LorentzDot.apply(u, v) + dis = Acosh.apply(d, self.eps) + return dis + + def normalize(self, w): + """ + Normalize vector such that it is located on the Lorentz + Args: + w: [batch_size, d + 1] + """ + d = w.size(-1) - 1 + narrowed = w.narrow(-1, 1, d) + if self.max_norm: + narrowed = th.renorm(narrowed.view(-1, d), 2, 0, self.max_norm) + first = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True) + first = th.sqrt(first) + tmp = th.cat((first, narrowed), dim=1) + return tmp + + def init_embed(self, embed, irange=1e-2): + embed.weight.data.uniform_(-irange, irange) + embed.weight.data.copy_(self.normalize(embed.weight.data)) + + def rgrad(self, p, d_p): + """Riemannian gradient for Lorentz""" + u = d_p + x = p + u.narrow(-1, 0, 1).mul_(-1) + u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x) + return d_p + + def exp_map_zero(self, v): + zeros = th.zeros_like(v) + zeros[:, 0] = 1 + return self.exp_map_x(zeros, v) + + def exp_map_x(self, p, d_p, d_p_normalize=True, p_normalize=True): + if d_p_normalize: + d_p = self.normalize_tan(p, d_p) + + ldv = self.ldot(d_p, d_p, keepdim=True) + nd_p = th.sqrt(th.clamp(ldv + self.eps, _eps)) + + t = th.clamp(nd_p, max=self.norm_clip) + newp = (th.cosh(t) * p) + (th.sinh(t) * d_p / nd_p) + + if p_normalize: + newp = self.normalize(newp) + return newp + + def normalize_tan(self, x_all, v_all): + d = v_all.size(1) - 1 + x = x_all.narrow(1, 1, d) + xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True) + tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True) + tmp = th.sqrt(tmp) + return th.cat((xv / tmp, v_all.narrow(1, 1, d)), dim=1) + + def log_map_zero(self, y, i=-1): + zeros = th.zeros_like(y) + zeros[:, 0] = 1 + return self.log_map_x(zeros, y) + + def log_map_x(self, x, y, normalize=False): + """Logarithmic map on the Lorentz Manifold""" + xy = self.ldot(x, y).unsqueeze(-1) + tmp = th.sqrt(th.clamp(xy * xy - 1 + self.eps, _eps)) + v = Acosh.apply(-xy, self.eps) / ( + tmp + ) * th.addcmul(y, xy, x) + if normalize: + result = self.normalize_tan(x, v) + else: + result = v + return result + + def parallel_transport(self, x, y, v): + """Parallel transport for Lorentz""" + v_ = v + x_ = x + y_ = y + + xy = self.ldot(x_, y_, keepdim=True).expand_as(x_) + vy = self.ldot(v_, y_, keepdim=True).expand_as(x_) + vnew = v_ + vy / (1 - xy) * (x_ + y_) + return vnew + + def metric_tensor(self, x, u, v): + return self.ldot(u, v, keepdim=True) + + + +class LorentzDot(Function): + @staticmethod + def forward(ctx, u, v): + ctx.save_for_backward(u, v) + return LorentzManifold.ldot(u, v) + + @staticmethod + def backward(ctx, g): + u, v = ctx.saved_tensors + g = g.unsqueeze(-1).expand_as(u).clone() + g.narrow(-1, 0, 1).mul_(-1) + return g * v, g * u + +class Acosh(Function): + @staticmethod + def forward(ctx, x, eps): + z = th.sqrt(th.clamp(x * x - 1 + eps, _eps)) + ctx.save_for_backward(z) + ctx.eps = eps + xz = x + z + tmp = th.log(xz) + return tmp + + @staticmethod + def backward(ctx, g): + z, = ctx.saved_tensors + z = th.clamp(z, min=ctx.eps) + z = g / z + return z, None + + diff --git a/H2HGCN/manifolds/StiefelManifold.py b/H2HGCN/manifolds/StiefelManifold.py new file mode 100644 index 0000000000000000000000000000000000000000..42f141a9b76c4d8539b8fd6a6a0a14606f119184 --- /dev/null +++ b/H2HGCN/manifolds/StiefelManifold.py @@ -0,0 +1,41 @@ +import torch as th +import torch.nn as nn +import numpy as np +from torch.autograd import Function, Variable +from utils import * + +_eps = 1e-10 + +class StiefelManifold: + + def __init__(self, args, logger, eps=1e-3, norm_clip=1, max_norm=1e3): + self.args = args + self.logger = logger + self.eps = eps + self.norm_clip = norm_clip + self.max_norm = max_norm + + def normalize(self, w): + return w + + def init_embed(self, embed, irange=1e-2): + embed.weight.data.uniform_(-irange, irange) + embed.weight.data.copy_(self.normalize(embed.weight.data)) + + def symmetric(self, A): + return 0.5 * (A + A.t()) + + def rgrad(self, A, B): + out = B - A.mm(self.symmetric(A.transpose(0,1).mm(B))) + return out + + def exp_map_x(self, A, ref): + data = A + ref + Q, R = data.qr() + # To avoid (any possible) negative values in the output matrix, we multiply the negative values by -1 + sign = (R.diag().sign() + 0.5).sign().diag() + out = Q.mm(sign) + return out + + + diff --git a/H2HGCN/manifolds/__init__.py b/H2HGCN/manifolds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..99a7356c05028872305abad28f5ac410681319a4 --- /dev/null +++ b/H2HGCN/manifolds/__init__.py @@ -0,0 +1 @@ +from Ghypeddings.H2HGCN.manifolds.LorentzManifold import LorentzManifold \ No newline at end of file diff --git a/H2HGCN/models/__init__.py b/H2HGCN/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/H2HGCN/models/base_models.py b/H2HGCN/models/base_models.py new file mode 100644 index 0000000000000000000000000000000000000000..a0a235cfac0a099d484449fca494e894411ee20a --- /dev/null +++ b/H2HGCN/models/base_models.py @@ -0,0 +1,76 @@ +import numpy as np +from sklearn.metrics import roc_auc_score, average_precision_score +import torch +import torch.nn as nn +import torch.nn.functional as F +import Ghypeddings.H2HGCN.models.encoders as encoders +from Ghypeddings.H2HGCN.models.encoders import H2HGCN +from Ghypeddings.H2HGCN.models.decoders import model2decoder +from Ghypeddings.H2HGCN.utils.eval_utils import acc_f1 +from Ghypeddings.H2HGCN.manifolds import LorentzManifold + + +class BaseModel(nn.Module): + """ + Base model for graph embedding tasks. + """ + + def __init__(self, args): + super(BaseModel, self).__init__() + self.c = torch.Tensor([1.]).cuda().to(args.device) + args.manifold = self.manifold = LorentzManifold(args) + args.feat_dim = args.feat_dim + 1 + # add 1 for Lorentz as the degree of freedom is d - 1 with d dimensions + args.dim = args.dim + 1 + self.nnodes = args.n_nodes + self.encoder = H2HGCN(args, 1) + + def encode(self, x, hgnn_adj, hgnn_weight): + h = self.encoder.encode(x, hgnn_adj, hgnn_weight) + return h + + def compute_metrics(self, embeddings, data, split): + raise NotImplementedError + + def init_metric_dict(self): + raise NotImplementedError + + def has_improved(self, m1, m2): + raise NotImplementedError + + +class NCModel(BaseModel): + """ + Base model for node classification task. + """ + + def __init__(self, args): + super(NCModel, self).__init__(args) + self.decoder = model2decoder(self.c, args) + if args.n_classes > 2: + self.f1_average = 'micro' + else: + self.f1_average = 'binary' + + self.weights = torch.Tensor([1.] * args.n_classes) + if not args.cuda == -1: + self.weights = self.weights.to(args.device) + + def decode(self, h, adj, idx): + output = self.decoder.decode(h, adj) + return F.log_softmax(output[idx], dim=1) + + + def compute_metrics(self, embeddings, data, split): + idx = data[f'idx_{split}'] + output = self.decode(embeddings, data['adj_train_norm'], idx) + loss = F.nll_loss(output, data['labels'][idx], self.weights) + acc, f1 , recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average) + metrics = {'loss': loss, 'acc': acc, 'f1': f1 , 'recall':recall,'precision':precision,'roc_auc':roc_auc} + return metrics + + def init_metric_dict(self): + return {'acc': -1, 'f1': -1} + + def has_improved(self, m1, m2): + return m1["f1"] < m2["f1"] \ No newline at end of file diff --git a/H2HGCN/models/decoders.py b/H2HGCN/models/decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..cac75ec3f51abb35f1dcd5cfd17fa7655c00cf3a --- /dev/null +++ b/H2HGCN/models/decoders.py @@ -0,0 +1,42 @@ +"""Graph decoders.""" +import torch.nn as nn +import torch.nn.functional as F +from Ghypeddings.H2HGCN.layers.layers import Linear + +class Decoder(nn.Module): + """ + Decoder abstract class for node classification tasks. + """ + + def __init__(self, c): + super(Decoder, self).__init__() + self.c = c + + def decode(self, x, adj): + if self.decode_adj: + input = (x, adj) + probs, _ = self.cls.forward(input) + else: + probs = self.cls.forward(x) + return probs + + +class MyDecoder(Decoder): + """ + Decoder abstract class for node classification tasks. + """ + + def __init__(self, c, args): + super(MyDecoder, self).__init__(c) + self.input_dim = args.num_centroid + self.output_dim = args.n_classes + act = lambda x: x + self.cls = Linear(args, self.input_dim, self.output_dim, 0.0, act, args.bias) + self.decode_adj = False + + def decode(self, x, adj): + h = x + return super(MyDecoder, self).decode(h, adj) + +model2decoder = MyDecoder + diff --git a/H2HGCN/models/encoders.py b/H2HGCN/models/encoders.py new file mode 100644 index 0000000000000000000000000000000000000000..e5ab9313c1506ffd7984ead39591098158f2dae1 --- /dev/null +++ b/H2HGCN/models/encoders.py @@ -0,0 +1,264 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import Ghypeddings.H2HGCN.utils.math_utils as pmath +import torch as th +from Ghypeddings.H2HGCN.utils import * +from Ghypeddings.H2HGCN.utils import pre_utils +from Ghypeddings.H2HGCN.utils.pre_utils import * +from Ghypeddings.H2HGCN.manifolds import * +from Ghypeddings.H2HGCN.layers.CentroidDistance import CentroidDistance + + +class H2HGCN(nn.Module): + + def __init__(self, args, logger): + super(H2HGCN, self).__init__() + self.debug = False + self.args = args + self.logger = logger + self.set_up_params() + self.activation = nn.SELU() + fd = args.feat_dim - 1 + self.linear = nn.Linear( + int(fd), int(args.dim), + ) + nn_init(self.linear, self.args.proj_init) + self.args.eucl_vars.append(self.linear) + + self.distance = CentroidDistance(args, logger, args.manifold) + + + def create_params(self): + """ + create the GNN params for a specific msg type + """ + msg_weight = [] + layer = self.args.num_layers if not self.args.tie_weight else 1 + for iii in range(layer): + M = th.zeros([self.args.dim-1, self.args.dim-1], requires_grad=True) + init_weight(M, 'orthogonal') + M = nn.Parameter(M) + self.args.stie_vars.append(M) + msg_weight.append(M) + return nn.ParameterList(msg_weight) + + def set_up_params(self): + """ + set up the params for all message types + """ + self.type_of_msg = 1 + + for i in range(0, self.type_of_msg): + setattr(self, "msg_%d_weight" % i, self.create_params()) + + def apply_activation(self, node_repr): + """ + apply non-linearity for different manifolds + """ + if self.args.select_manifold == "poincare": + return self.activation(node_repr) + elif self.args.select_manifold == "lorentz": + return self.args.manifold.from_poincare_to_lorentz( + self.activation(self.args.manifold.from_lorentz_to_poincare(node_repr)) + ) + + def split_graph_by_negative_edge(self, adj_mat, weight): + """ + Split the graph according to positive and negative edges. + """ + mask = weight > 0 + neg_mask = weight < 0 + + pos_adj_mat = adj_mat * mask.long() + neg_adj_mat = adj_mat * neg_mask.long() + pos_weight = weight * mask.float() + neg_weight = -weight * neg_mask.float() + return pos_adj_mat, pos_weight, neg_adj_mat, neg_weight + + def split_graph_by_type(self, adj_mat, weight): + """ + split the graph according to edge type for multi-relational datasets + """ + multi_relation_adj_mat = [] + multi_relation_weight = [] + for relation in range(1, self.args.edge_type): + mask = (weight.int() == relation) + multi_relation_adj_mat.append(adj_mat * mask.long()) + multi_relation_weight.append(mask.float()) + return multi_relation_adj_mat, multi_relation_weight + + def split_input(self, adj_mat, weight): + return [adj_mat], [weight] + + def p2k(self, x, c): + denom = 1 + c * x.pow(2).sum(-1, keepdim=True) + return 2 * x / denom + + def k2p(self, x, c): + denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True)) + return x / denom + + def lorenz_factor(self, x, *, c=1.0, dim=-1, keepdim=False): + """ + Calculate Lorenz factors + """ + x_norm = x.pow(2).sum(dim=dim, keepdim=keepdim) + x_norm = torch.clamp(x_norm, 0, 0.9) + tmp = 1 / torch.sqrt(1 - c * x_norm) + return tmp + + def from_lorentz_to_poincare(self, x): + """ + Args: + u: [batch_size, d + 1] + """ + d = x.size(-1) - 1 + return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1) + + def h2p(self, x): + return self.from_lorentz_to_poincare(x) + + def from_poincare_to_lorentz(self, x, eps=1e-3): + """ + Args: + u: [batch_size, d] + """ + x_norm_square = x.pow(2).sum(-1, keepdim=True) + tmp = th.cat((1 + x_norm_square, 2 * x), dim=1) + tmp = tmp / (1 - x_norm_square) + return tmp + + def p2h(self, x): + return self.from_poincare_to_lorentz(x) + + def p2k(self, x, c=1.0): + denom = 1 + c * x.pow(2).sum(-1, keepdim=True) + return 2 * x / denom + + def k2p(self, x, c=1.0): + denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True)) + return x / denom + + def h2k(self, x): + tmp = x.narrow(-1, 1, x.size(-1)-1) / x.narrow(-1, 0, 1) + return tmp + + def k2h(self, x): + x_norm_square = x.pow(2).sum(-1, keepdim=True) + x_norm_square = torch.clamp(x_norm_square, max=0.9) + tmp = torch.ones((x.size(0),1)).cuda().to(self.args.device) + tmp1 = th.cat((tmp, x), dim=1) + tmp2 = 1.0 / torch.sqrt(1.0 - x_norm_square) + tmp3 = (tmp1 * tmp2) + return tmp3 + + + def hyperbolic_mean(self, y, node_num, max_neighbor, real_node_num, weight, dim=0, c=1.0, ): + ''' + y [node_num * max_neighbor, dim] + ''' + x = y[0:real_node_num*max_neighbor, :] + weight_tmp = weight.view(-1,1)[0:real_node_num*max_neighbor, :] + x = self.h2k(x) + + lamb = self.lorenz_factor(x, c=c, keepdim=True) + lamb = lamb * weight_tmp + lamb = lamb.view(real_node_num, max_neighbor, -1) + + x = x.view(real_node_num, max_neighbor, -1) + k_mean = (torch.sum(lamb * x, dim=1, keepdim=True) / (torch.sum(lamb, dim=1, keepdim=True))).squeeze() + h_mean = self.k2h(k_mean) + + virtual_mean = torch.cat((torch.tensor([[1.0]]), torch.zeros(1,y.size(-1)-1)), 1).cuda().to(self.args.device) + tmp = virtual_mean.repeat(node_num-real_node_num, 1) + + mean = torch.cat((h_mean, tmp), 0) + return mean + + def test_lor(self, A): + tmp1 = (A[:,0] * A[:,0]).view(-1) + tmp2 = A[:,1:] + tmp2 = th.diag(tmp2.mm(tmp2.transpose(0,1))) + return (tmp1 - tmp2) + + def retrieve_params(self, weight, step): + """ + Args: + weight: a list of weights + step: a certain layer + """ + layer_weight = th.cat((th.zeros((self.args.dim-1, 1)).cuda().to(self.args.device), weight[step]), dim=1) + tmp = th.zeros((1, self.args.dim)).cuda().to(self.args.device) + tmp[0,0] = 1 + layer_weight = th.cat((tmp, layer_weight), dim=0) + return layer_weight + + def aggregate_msg(self, node_repr, adj_mat, weight, layer_weight, mask): + """ + message passing for a specific message type. + """ + node_num, max_neighbor = adj_mat.shape[0], adj_mat.shape[1] + combined_msg = node_repr.clone() + + tmp = self.test_lor(node_repr) + msg = th.mm(node_repr, layer_weight) * mask + real_node_num = (mask>0).sum() + + # select out the neighbors of each node + neighbors = th.index_select(msg, 0, adj_mat.view(-1)) + combined_msg = self.hyperbolic_mean(neighbors, node_num, max_neighbor, real_node_num, weight) + return combined_msg + + def get_combined_msg(self, step, node_repr, adj_mat, weight, mask): + """ + perform message passing in the tangent space of x' + """ + gnn_layer = 0 if self.args.tie_weight else step + combined_msg = None + for relation in range(0, self.type_of_msg): + layer_weight = self.retrieve_params(getattr(self, "msg_%d_weight" % relation), gnn_layer) + aggregated_msg = self.aggregate_msg(node_repr, + adj_mat[relation], + weight[relation], + layer_weight, mask) + combined_msg = aggregated_msg if combined_msg is None else (combined_msg + aggregated_msg) + return combined_msg + + + def encode(self, node_repr, adj_list, weight): + node_repr = self.activation(self.linear(node_repr)) + adj_list, weight = self.split_input(adj_list, weight) + + mask = torch.ones((node_repr.size(0),1)).cuda().to(self.args.device) + node_repr = self.args.manifold.exp_map_zero(node_repr) + + for step in range(self.args.num_layers): + node_repr = node_repr * mask + tmp = node_repr + combined_msg = self.get_combined_msg(step, node_repr, adj_list, weight, mask) + combined_msg = (combined_msg) * mask + node_repr = combined_msg * mask + node_repr = self.apply_activation(node_repr) * mask + real_node_num = (mask>0).sum() + node_repr = self.args.manifold.normalize(node_repr) + _, node_centroid_sim = self.distance(node_repr, mask) + return node_centroid_sim.squeeze() + +class Encoder(nn.Module): + """ + Encoder abstract class. + """ + + def __init__(self, c): + super(Encoder, self).__init__() + self.c = c + + def encode(self, x, adj): + if self.encode_graph: + input = (x, adj) + output, _ = self.layers.forward(input) + else: + output = self.layers.forward(x) + return output diff --git a/H2HGCN/optimizers/__init__.py b/H2HGCN/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..23027ab1847bcd1d4e3c46d7ae72fbfba2fc86b5 --- /dev/null +++ b/H2HGCN/optimizers/__init__.py @@ -0,0 +1 @@ +from torch.optim import Adam diff --git a/H2HGCN/optimizers/rsgd.py b/H2HGCN/optimizers/rsgd.py new file mode 100644 index 0000000000000000000000000000000000000000..968b97444edb76491fa39cdc65636cdf9fc6b432 --- /dev/null +++ b/H2HGCN/optimizers/rsgd.py @@ -0,0 +1,29 @@ +import torch as th +from torch.optim.optimizer import Optimizer, required +from Ghypeddings.H2HGCN.utils import * +import os +import math + +class RiemannianSGD(Optimizer): + """Riemannian stochastic gradient descent. + """ + def __init__(self, args, params, lr): + defaults = dict(lr=lr) + self.args = args + super(RiemannianSGD, self).__init__(params, defaults) + + def step(self, lr=None): + """ + Performs a single optimization step. + """ + loss = None + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + d_p = p.grad.data + d_p = self.args.manifold.rgrad(p, d_p) + if lr is None: + lr = group['lr'] + p.data = self.args.manifold.exp_map_x(p, -lr * d_p) + return loss diff --git a/H2HGCN/utils/__init__.py b/H2HGCN/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7c3b5149888b0c78501eb36595aff1f4f4027b8c --- /dev/null +++ b/H2HGCN/utils/__init__.py @@ -0,0 +1 @@ +from Ghypeddings.H2HGCN.utils.pre_utils import * \ No newline at end of file diff --git a/H2HGCN/utils/data_utils.py b/H2HGCN/utils/data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f726ddff75b141d465f387292b13fa3fa25e6f2e --- /dev/null +++ b/H2HGCN/utils/data_utils.py @@ -0,0 +1,102 @@ +"""Data utils functions for pre-processing and data loading.""" +import os +import pickle as pkl +import sys +import networkx as nx +import numpy as np +import scipy.sparse as sp +import torch +from Ghypeddings.H2HGCN.utils.pre_utils import * + +def convert_hgnn_adj(adj): + hgnn_adj = [[i] for i in range(adj.shape[0])] + hgnn_weight = [[1] for i in range(adj.shape[0])] + for i in range(adj.shape[0]): + for j in range(adj.shape[1]): + if adj[i,j] == 1: + hgnn_adj[i].append(j) + hgnn_weight[i].append(1) + + max_len = max([len(i) for i in hgnn_adj]) + normalize_weight(hgnn_adj, hgnn_weight) + + hgnn_adj = pad_sequence(hgnn_adj, max_len) + hgnn_weight = pad_sequence(hgnn_weight, max_len) + hgnn_adj = np.array(hgnn_adj) + hgnn_weight = np.array(hgnn_weight) + return torch.from_numpy(hgnn_adj).cuda(), torch.from_numpy(hgnn_weight).cuda().float() + + +def process_data(args,adj,features,labels): + data = process_data_nc(args,adj,features,labels) + data['adj_train_norm'], data['features'] = process( + data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats + ) + return data + + +def process(adj, features, normalize_adj, normalize_feats): + if sp.isspmatrix(features): + features = np.array(features.todense()) + if normalize_feats: + features = normalize(features) + features = torch.Tensor(features) + if normalize_adj: + adj = normalize(adj) + adj = sparse_mx_to_torch_sparse_tensor(adj) + return adj, features + +def normalize(mx): + """Row-normalize sparse matrix.""" + rowsum = np.array(mx.sum(1)) + r_inv = np.power(rowsum, -1).flatten() + r_inv[np.isinf(r_inv)] = 0. + r_mat_inv = sp.diags(r_inv) + mx = r_mat_inv.dot(mx) + return mx + +def sparse_mx_to_torch_sparse_tensor(sparse_mx): + """Convert a scipy sparse matrix to a torch sparse tensor.""" + sparse_mx = sparse_mx.tocoo() + indices = torch.from_numpy( + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64) + ) + values = torch.Tensor(sparse_mx.data) + shape = torch.Size(sparse_mx.shape) + return torch.sparse.FloatTensor(indices, values, shape) + +def augment(adj, features, normalize_feats=True): + deg = np.squeeze(np.sum(adj, axis=0).astype(int)) + deg[deg > 5] = 5 + deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze() + const_f = torch.ones(features.size(0), 1) + features = torch.cat((features, deg_onehot, const_f), dim=1) + return features + +def split_data(labels, val_prop, test_prop, seed): + np.random.seed(seed) + nb_nodes = labels.shape[0] + all_idx = np.arange(nb_nodes) + pos_idx = labels.nonzero()[0] + neg_idx = (1. - labels).nonzero()[0] + np.random.shuffle(pos_idx) + np.random.shuffle(neg_idx) + pos_idx = pos_idx.tolist() + neg_idx = neg_idx.tolist() + nb_pos_neg = min(len(pos_idx), len(neg_idx)) + nb_val = round(val_prop * nb_pos_neg) + nb_test = round(test_prop * nb_pos_neg) + idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[ + nb_val + nb_test:] + idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[ + nb_val + nb_test:] + return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg + + +def process_data_nc(args,adj,features,labels): + adj = sp.csr_matrix(adj) + hgnn_adj, hgnn_weight = convert_hgnn_adj(adj.todense()) + idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed) + labels = torch.LongTensor(labels) + data = {'adj_train': adj, 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test, 'hgnn_adj': hgnn_adj, 'hgnn_weight': hgnn_weight} + return data \ No newline at end of file diff --git a/H2HGCN/utils/eval_utils.py b/H2HGCN/utils/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4a17797a6916957f4b9f0962f77bfe15000ecb4e --- /dev/null +++ b/H2HGCN/utils/eval_utils.py @@ -0,0 +1,13 @@ +from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score + +def acc_f1(output, labels, average='binary'): + preds = output.max(1)[1].type_as(labels) + if preds.is_cuda: + preds = preds.cpu() + labels = labels.cpu() + accuracy = accuracy_score(labels,preds) + f1 = f1_score(labels,preds , average=average) + recall = recall_score(labels,preds) + precision = precision_score(labels,preds ) + roc_auc = roc_auc_score(labels,preds) + return accuracy, f1 , recall,precision, roc_auc \ No newline at end of file diff --git a/H2HGCN/utils/math_utils.py b/H2HGCN/utils/math_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9cf278ed7ce59b97f4793f5def3218f3e830d473 --- /dev/null +++ b/H2HGCN/utils/math_utils.py @@ -0,0 +1,69 @@ +"""Math utils functions.""" + +import torch + + +def cosh(x, clamp=15): + return x.clamp(-clamp, clamp).cosh() + + +def sinh(x, clamp=15): + return x.clamp(-clamp, clamp).sinh() + + +def tanh(x, clamp=15): + return x.clamp(-clamp, clamp).tanh() + + +def arcosh(x): + return Arcosh.apply(x) + + +def arsinh(x): + return Arsinh.apply(x) + + +def artanh(x): + return Artanh.apply(x) + + +class Artanh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + x = x.clamp(-1 + 1e-15, 1 - 1e-15) + ctx.save_for_backward(x) + z = x.double() + return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (1 - input ** 2) + + +class Arsinh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + z = x.double() + return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (1 + input ** 2) ** 0.5 + + +class Arcosh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + x = x.clamp(min=1.0 + 1e-15) + ctx.save_for_backward(x) + z = x.double() + return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (input ** 2 - 1) ** 0.5 + diff --git a/H2HGCN/utils/pre_utils.py b/H2HGCN/utils/pre_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..283e73035851b0e7bbe7a78b02c8eeb2257be7f7 --- /dev/null +++ b/H2HGCN/utils/pre_utils.py @@ -0,0 +1,167 @@ +from collections import defaultdict +import os +import pickle +import json +import torch.nn as nn +import torch as th +import torch.optim as optim +import numpy as np +import random +from Ghypeddings.H2HGCN.optimizers.rsgd import RiemannianSGD +import math +import subprocess +import random + +def set_seed(seed): + """ + Set the random seed + """ + random.seed(seed) + np.random.seed(seed) + th.manual_seed(seed) + th.cuda.manual_seed(seed) + th.cuda.manual_seed_all(seed) + +def th_dot(x, y, keepdim=True): + return th.sum(x * y, dim=1, keepdim=keepdim) + +def pad_sequence(data_list, maxlen, value=0): + return [row + [value] * (maxlen - len(row)) for row in data_list] + +def normalize_weight(adj_mat, weight): + degree = [1 / math.sqrt(sum(np.abs(w))) for w in weight] + for dst in range(len(adj_mat)): + for src_idx in range(len(adj_mat[dst])): + src = adj_mat[dst][src_idx] + weight[dst][src_idx] = degree[dst] * weight[dst][src_idx] * degree[src] + +def nn_init(nn_module, method='orthogonal'): + """ + Initialize a Sequential or Module object + Args: + nn_module: Sequential or Module + method: initialization method + """ + if method == 'none': + return + for param_name, _ in nn_module.named_parameters(): + if isinstance(nn_module, nn.Sequential): + # for a Sequential object, the param_name contains both id and param name + i, name = param_name.split('.', 1) + param = getattr(nn_module[int(i)], name) + else: + param = getattr(nn_module, param_name) + if param_name.find('weight') > -1: + init_weight(param, method) + elif param_name.find('bias') > -1: + nn.init.uniform_(param, -1e-4, 1e-4) + +def get_params(params_list, vars_list): + """ + Add parameters in vars_list to param_list + """ + for i in vars_list: + if issubclass(i.__class__, nn.Module): + params_list.extend(list(i.parameters())) + elif issubclass(i.__class__, nn.Parameter): + params_list.append(i) + else: + print("Encounter unknown objects") + exit(1) + +def categorize_params(args): + """ + Categorize parameters into hyperbolic ones and euclidean ones + """ + stiefel_params, euclidean_params = [], [] + get_params(euclidean_params, args.eucl_vars) + get_params(stiefel_params, args.stie_vars) + return stiefel_params, euclidean_params + +def get_activation(args): + if args.activation == 'leaky_relu': + return nn.LeakyReLU(args.leaky_relu) + elif args.activation == 'rrelu': + return nn.RReLU() + elif args.activation == 'relu': + return nn.ReLU() + elif args.activation == 'elu': + return nn.ELU() + elif args.activation == 'prelu': + return nn.PReLU() + elif args.activation == 'selu': + return nn.SELU() + +def init_weight(weight, method): + """ + Initialize parameters + Args: + weight: a Parameter object + method: initialization method + """ + if method == 'orthogonal': + nn.init.orthogonal_(weight) + elif method == 'xavier': + nn.init.xavier_uniform_(weight) + elif method == 'kaiming': + nn.init.kaiming_uniform_(weight) + elif method == 'none': + pass + else: + raise Exception('Unknown init method') + + +def get_stiefel_optimizer(args, params, lr_stie): + if args.stiefel_optimizer == 'rsgd': + optimizer = RiemannianSGD( + args, + params, + lr=lr_stie, + ) + elif args.stiefel_optimizer == 'ramsgrad': + optimizer = RiemannianAMSGrad( + args, + params, + lr=lr_stie, + ) + else: + print("unsupported hyper optimizer") + exit(1) + return optimizer + +def get_lr_scheduler(args, optimizer): + if args.lr_scheduler == 'exponential': + return optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_gamma) + elif args.lr_scheduler == 'cosine': + return optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=0) + elif args.lr_scheduler == 'cycle': + return optim.lr_scheduler.CyclicLR(optimizer, 0, max_lr=args.lr, step_size_up=20, cycle_momentum=False) + elif args.lr_scheduler == 'step': + return optim.lr_scheduler.StepLR( + optimizer, + step_size=int(args.step_lr_reduce_freq), + gamma=float(args.step_lr_gamma) + ) + elif args.lr_scheduler == 'none': + return NoneScheduler() + +def get_optimizer(args, params, lr): + if args.optimizer == 'sgd': + optimizer = optim.SGD(params, lr=lr, weight_decay=args.weight_decay) + elif args.optimizer == 'Adam': + optimizer = optim.Adam(params, lr=lr, weight_decay=args.weight_decay) + elif args.optimizer == 'amsgrad': + optimizer = optim.Adam(params, lr=lr, amsgrad=True, weight_decay=args.weight_decay) + return optimizer + +def set_up_optimizer_scheduler(hyperbolic, args, model, lr, lr_stie, pprint=True): + stiefel_params, euclidean_params = categorize_params(args) + #assert(len(list(model.parameters())) == len(stiefel_params) + len(euclidean_params)) + optimizer = get_optimizer(args, euclidean_params, lr) + lr_scheduler = get_lr_scheduler(args, optimizer) + if len(stiefel_params) > 0: + stiefel_optimizer = get_stiefel_optimizer(args, stiefel_params, lr_stie) + stiefel_lr_scheduler = get_lr_scheduler(args, stiefel_optimizer) + else: + stiefel_optimizer, stiefel_lr_scheduler = None, None + return optimizer, lr_scheduler, stiefel_optimizer, stiefel_lr_scheduler \ No newline at end of file diff --git a/H2HGCN/utils/train_utils.py b/H2HGCN/utils/train_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..71781f97d42cfae91a9caef4ada7b56ce7a4cbe1 --- /dev/null +++ b/H2HGCN/utils/train_utils.py @@ -0,0 +1,52 @@ +import os +import numpy as np +import torch +import torch.nn.functional as F +import torch.nn.modules.loss +import argparse + + +def format_metrics(metrics, split): + """Format metric in metric dict for logging.""" + return " ".join( + ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()]) + + +def create_args(*args): + parser = argparse.ArgumentParser() + parser.add_argument('--dim', type=int, default=args[0]) + parser.add_argument('--c', type=int, default=args[1]) + parser.add_argument('--num_layers', type=int, default=args[2]) + parser.add_argument('--bias', type=bool, default=args[3]) + parser.add_argument('--act', type=str, default=args[4]) + parser.add_argument('--select_manifold', type=str, default=args[5]) + parser.add_argument('--num_centroid', type=int, default=args[6]) + parser.add_argument('--lr_stie', type=float, default=args[7]) + parser.add_argument('--stie_vars', nargs='+', default=args[8]) + parser.add_argument('--stiefel_optimizer', type=str, default=args[9]) + parser.add_argument('--eucl_vars', nargs='+', default=args[10]) + parser.add_argument('--grad_clip', type=float, default=args[11]) + parser.add_argument('--optimizer', type=str, default=args[12]) + parser.add_argument('--weight_decay', type=float, default=args[13]) + parser.add_argument('--lr', type=float, default=args[14]) + parser.add_argument('--lr_scheduler', type=str, default=args[15]) + parser.add_argument('--lr_gamma', type=float, default=args[16]) + parser.add_argument('--step_lr_gamma', type=float, default=args[17]) + parser.add_argument('--step_lr_reduce_freq', type=int, default=args[18]) + parser.add_argument('--proj_init', type=str, default=args[19]) + parser.add_argument('--tie_weight', type=bool, default=args[20]) + parser.add_argument('--cuda', type=int, default=args[21]) + parser.add_argument('--epochs', type=int, default=args[22]) + parser.add_argument('--min_epochs', type=int, default=args[23]) + parser.add_argument('--patience', type=int, default=args[24]) + parser.add_argument('--seed', type=int, default=args[25]) + parser.add_argument('--log_freq', type=int, default=args[26]) + parser.add_argument('--eval_freq', type=int, default=args[27]) + parser.add_argument('--val_prop', type=float, default=args[28]) + parser.add_argument('--test_prop', type=float, default=args[29]) + parser.add_argument('--double_precision', type=int, default=args[30]) + parser.add_argument('--dropout', type=float, default=args[31]) + parser.add_argument('--normalize_adj', type=bool, default=args[32]) + parser.add_argument('--normalize_feats', type=bool, default=args[33]) + flags, unknown = parser.parse_known_args() + return flags \ No newline at end of file diff --git a/HGCAE/.gitignore b/HGCAE/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..bee8a64b79a99590d5303307144172cfe824fbf7 --- /dev/null +++ b/HGCAE/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/HGCAE/__init__.py b/HGCAE/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9 --- /dev/null +++ b/HGCAE/__init__.py @@ -0,0 +1,2 @@ +from __future__ import print_function +from __future__ import division diff --git a/HGCAE/hgcae.py b/HGCAE/hgcae.py new file mode 100644 index 0000000000000000000000000000000000000000..58a36cf05c6aecdef212428f987424cb52dc19b3 --- /dev/null +++ b/HGCAE/hgcae.py @@ -0,0 +1,199 @@ +from Ghypeddings.HGCAE.models.base_models import LPModel +import logging +import torch +import numpy as np +import os +import time +from Ghypeddings.HGCAE.utils.train_utils import get_dir_name, format_metrics +from Ghypeddings.HGCAE.utils.data_utils import process_data +from Ghypeddings.HGCAE.utils.train_utils import create_args , perform_task +import Ghypeddings.HGCAE.optimizers as optimizers +from Ghypeddings.HGCAE.utils.data_utils import sparse_mx_to_torch_sparse_tensor + +class HGCAE(object): + def __init__(self, + adj, + features, + labels, + dim, + hidden_dim, + c=None, + num_layers=2, + bias=True, + act='relu', + grad_clip=None, + optimizer='RiemannianAdam', + weight_decay=0., + lr=0.01, + gamma=0.5, + lr_reduce_freq=500, + cuda=0, + epochs=50, + min_epochs=50, + patience=None, + seed=42, + log_freq=0, + eval_freq=1, + val_prop=.5, + test_prop=0.3, + double_precision=0, + dropout=0.01, + lambda_rec=1.0, + lambda_lp=1.0, + num_dec_layers=2, + use_att= True, + att_type= 'sparse_adjmask_dist', + att_logit='tanh', + beta = 0, + classifier=None, + clusterer = None, + normalize_adj=True, + normalize_feats=True + ): + + self.args = create_args(dim,hidden_dim,c,num_layers,bias,act,grad_clip,optimizer,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,lambda_rec,lambda_lp,num_dec_layers,use_att,att_type,att_logit,beta,classifier,clusterer,normalize_adj,normalize_feats) + + self.args.n_nodes = adj.shape[0] + self.args.feat_dim = features.shape[1] + self.args.n_classes = len(np.unique(labels)) + self.data = process_data(self.args,adj,features,labels) + + if(self.args.c == None): + self.args.c_trainable = 1 + self.args.c = 1.0 + + np.random.seed(self.args.seed) + torch.manual_seed(self.args.seed) + + if int(self.args.double_precision): + torch.set_default_dtype(torch.float64) + if int(self.args.cuda) >= 0: + torch.cuda.manual_seed(self.args.seed) + + self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu' + self.args.patience = self.args.epochs if not self.args.patience else int(self.args.patience) + + if not self.args.lr_reduce_freq: + self.args.lr_reduce_freq = self.args.epochs + + self.args.nb_false_edges = len(self.data['train_edges_false']) + self.args.nb_edges = len(self.data['train_edges']) + st0 = np.random.get_state() + self.args.np_seed = st0 + np.random.set_state(self.args.np_seed) + + for x, val in self.data.items(): + if 'adj' in x: + self.data[x] = sparse_mx_to_torch_sparse_tensor(self.data[x]) + + self.model = LPModel(self.args) + + if self.args.cuda is not None and int(self.args.cuda) >= 0 : + os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda) + self.model = self.model.to(self.args.device) + for x, val in self.data.items(): + if torch.is_tensor(self.data[x]): + self.data[x] = self.data[x].to(self.args.device) + + self.adj_train_enc = self.data['adj_train_enc'] + self.optimizer = getattr(optimizers, self.args.optimizer)(params=self.model.parameters(), lr=self.args.lr, + weight_decay=self.args.weight_decay) + self.lr_scheduler = torch.optim.lr_scheduler.StepLR( + self.optimizer, + step_size=int(self.args.lr_reduce_freq), + gamma=float(self.args.gamma) + ) + + self.best_emb = None + + + + def fit(self): + + logging.getLogger().setLevel(logging.INFO) + logging.info(f'Using: {self.args.device}') + logging.info(str(self.model)) + tot_params = sum([np.prod(p.size()) for p in self.model.parameters()]) + logging.info(f"Total number of parameters: {tot_params}") + + t_total = time.time() + counter = 0 + best_val_metrics = self.model.init_metric_dict() + + best_losses = [] + real_losses = [] + + for epoch in range(self.args.epochs): + t = time.time() + self.model.train() + self.optimizer.zero_grad() + embeddings = self.model.encode(self.data['features'], self.adj_train_enc) + train_metrics = self.model.compute_metrics(embeddings, self.data, 'train', epoch) + train_metrics['loss'].backward() + if self.args.grad_clip is not None: + max_norm = float(self.args.grad_clip) + all_params = list(self.model.parameters()) + for param in all_params: + torch.nn.utils.clip_grad_norm_(param, max_norm) + self.optimizer.step() + self.lr_scheduler.step() + + real_losses.append(train_metrics['loss'].item()) + if(len(best_losses) == 0): + best_losses.append(real_losses[0]) + elif (best_losses[-1] > real_losses[-1]): + best_losses.append(real_losses[-1]) + else: + best_losses.append(best_losses[-1]) + + with torch.no_grad(): + if (epoch + 1) % self.args.log_freq == 0: + logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), + 'lr: {}'.format(self.lr_scheduler.get_lr()[0]), + format_metrics(train_metrics, 'train'), + 'time: {:.4f}s'.format(time.time() - t) + ])) + + if (epoch + 1) % self.args.eval_freq == 0: + self.model.eval() + embeddings = self.model.encode(self.data['features'], self.adj_train_enc) + val_metrics = self.model.compute_metrics(embeddings, self.data, 'val') + if (epoch + 1) % self.args.log_freq == 0: + logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')])) + if self.model.has_improved(best_val_metrics, val_metrics): + self.best_emb = embeddings + best_val_metrics = val_metrics + counter = 0 + else: + counter += 1 + if counter == self.args.patience and epoch > self.args.min_epochs: + logging.info("Early stopping") + break + + logging.info("Training Finished!") + logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + + X = self.model.manifold.logmap0(self.best_emb,self.model.encoder.curvatures[-1]).cpu().detach().numpy() + y = self.data['labels'].reshape(-1,1) + acc,f1,recall,precision,roc_auc = perform_task(self.args, X,y) + + return {'real':real_losses,'best':best_losses},acc,f1,recall,precision,roc_auc , time.time() - t_total + + def predict(self): + self.model.eval() + embeddings = self.model.encode(self.data['features'], self.adj_train_enc) + val_metrics = self.model.compute_metrics(embeddings, self.data, 'test') + data = self.model.manifold.logmap0(embeddings,self.model.encoder.curvatures[-1]).cpu().detach().numpy() + labels = self.data['labels'].reshape(-1,1) + acc,f1,recall,precision,roc_auc=perform_task(self.args,data,labels) + return val_metrics['loss'].item(),acc,f1,recall,precision,roc_auc + + + def save_embeddings(self,directory,prefix): + tb_embeddings_euc = self.model.manifold.logmap0(self.best_emb,self.model.encoder.curvatures[-1]) + for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].reshape(-1,1))) + for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].reshape(-1,1))) + hyp_file_path = os.path.join(directory,f'{prefix}_embeddings_hyp.csv') + euc_file_path = os.path.join(directory,f'{prefix}_embeddings_euc.csv') + np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',') + np.savetxt(euc_file_path, for_classification_euc, delimiter=',') \ No newline at end of file diff --git a/HGCAE/layers/__init__.py b/HGCAE/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGCAE/layers/att_layers.py b/HGCAE/layers/att_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..e99964c73d3eefa9a058c73d9bb5d53fa604839d --- /dev/null +++ b/HGCAE/layers/att_layers.py @@ -0,0 +1,80 @@ +"""Attention layers (some modules are copied from https://github.com/Diego999/pyGAT.)""" +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +def HypAggAtt(in_features, manifold, dropout, act=None, att_type=None, att_logit=None, beta=0): + att_logit = get_att_logit(att_logit, att_type) + return GeometricAwareHypAggAtt(in_features, manifold, dropout, lambda x: x, att_logit=att_logit, beta=beta) + +class GeometricAwareHypAggAtt(nn.Module): + def __init__(self, in_features, manifold, dropout, act, att_logit=torch.tanh, beta=0.): + super(GeometricAwareHypAggAtt, self).__init__() + self.dropout = dropout + self.att_logit=att_logit + self.special_spmm = SpecialSpmm() + + + self.m = manifold + self.beta = nn.Parameter(torch.Tensor([1e-6])) + self.con = nn.Parameter(torch.Tensor([1e-6])) + self.act = act + self.in_features = in_features + + def forward (self, x, adj, c=1): + n = x.size(0) + edge = adj._indices() + + assert not torch.isnan(self.beta).any() + edge_h = self.beta * self.m.sqdist(x[edge[0, :], :], x[edge[1, :], :], c) + self.con + + self.edge_h = edge_h + assert not torch.isnan(edge_h).any() + edge_e = self.att_logit(edge_h) + self.edge_e = edge_e + ones = torch.ones(size=(n, 1)) + if x.is_cuda: + ones = ones.to(x.device) + e_rowsum = self.special_spmm(edge, abs(edge_e), torch.Size([n, n]), ones) + 1e-10 + + return edge_e, e_rowsum + +class SpecialSpmmFunction(torch.autograd.Function): + """Special function for only sparse region backpropataion layer.""" + # generate sparse matrix from `indicex, values, shape` and matmul with b + # Previously, `AXW` computing did not need bp to `A`. + # To trian attention of `A`, now bp through sparse matrix needed. + @staticmethod + def forward(ctx, indices, values, shape, b): + assert indices.requires_grad == False + a = torch.sparse_coo_tensor(indices, values, shape, device=b.device) # make sparse matrix shaped of `NxN` + ctx.save_for_backward(a, b) # save sparse matrix for bp + ctx.N = shape[0] # number of nodes + return torch.matmul(a, b) + + @staticmethod + def backward(ctx, grad_output): + assert not torch.isnan(grad_output).any() + + # grad_output : Nxd gradient + # a : NxN adj(attention) matrix, b: Nxd node feature + a, b = ctx.saved_tensors + grad_values = grad_b = None + if ctx.needs_input_grad[1]: + grad_a_dense = grad_output.matmul(b.t()) + edge_idx = a._indices()[0, :] * ctx.N + a._indices()[1, :] # flattening (x,y) --> nx + y + grad_values = grad_a_dense.view(-1)[edge_idx] + if ctx.needs_input_grad[3]: + grad_b = a.t().matmul(grad_output) + return None, grad_values, None, grad_b + + +class SpecialSpmm(nn.Module): + def forward(self, indices, values, shape, b): + return SpecialSpmmFunction.apply(indices, values, shape, b) + +def get_att_logit(att_logit, att_type): + if att_logit: + att_logit = getattr(torch, att_logit) + return att_logit diff --git a/HGCAE/layers/hyp_layers.py b/HGCAE/layers/hyp_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..c19b24150a0992f06790f510e4da5d1dcd516d50 --- /dev/null +++ b/HGCAE/layers/hyp_layers.py @@ -0,0 +1,232 @@ +""" +Hyperbolic layers. +Major codes of hyperbolic layers are from HGCN +""" +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.nn.init as init +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + +from Ghypeddings.HGCAE.layers.att_layers import HypAggAtt, SpecialSpmm + + +def get_dim_act_curv(args): + """ + Helper function to get dimension and activation at every layer. + :param args: + :return: + """ + if not args.act: + act = lambda x: x + else: + act = getattr(F, args.act) + acts = [act] * (args.num_layers - 1) + + dims = [args.feat_dim] + # Check layer_num and hdden_dim match + if args.num_layers > 1: + hidden_dim = [args.hidden_dim for _ in range(args.num_layers -1)] + if args.num_layers != len(hidden_dim) + 1: + raise RuntimeError('Check dimension hidden:{}, num_layers:{}'.format(args.hidden_dim, args.num_layers) ) + dims = dims + hidden_dim + + dims += [args.dim] + acts += [act] + n_curvatures = args.num_layers + if args.c_trainable == 1: # NOTE : changed from # if args.c is None: + # create list of trainable curvature parameters + curvatures = [nn.Parameter(torch.Tensor([args.c]).to(args.device)) for _ in range(n_curvatures)] + else: + # fixed curvature + curvatures = [torch.tensor([args.c]) for _ in range(n_curvatures)] + if not args.cuda == -1: + curvatures = [curv.to(args.device) for curv in curvatures] + return dims, acts, curvatures + + + +class HNNLayer(nn.Module): + """ + Hyperbolic neural networks layer. + """ + + def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias): + super(HNNLayer, self).__init__() + self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias) + self.hyp_act = HypAct(manifold, c_in, c_out, act) + + def forward(self, x): + h = self.linear.forward(x) + h = self.hyp_act.forward(h) + return h + + +class HyperbolicGraphConvolution(nn.Module): + """ + Hyperbolic graph convolution layer. + """ + + def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias, use_att, + att_type='sparse_adjmask_dist', att_logit=torch.exp, beta=0., decode=False): + super(HyperbolicGraphConvolution, self).__init__() + self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias) + self.agg = HypAgg(manifold, c_in, use_att, out_features, dropout, att_type=att_type, att_logit=att_logit, beta=beta, decode=decode) + self.hyp_act = HypAct(manifold, c_in, c_out, act) + self.decode = decode + + def forward(self, input): + x, adj = input + assert not torch.isnan(self.hyp_act.c_in).any() + self.hyp_act.c_in.data = torch.clamp_min(self.hyp_act.c_in,1e-12) + if self.hyp_act.c_out: + assert not torch.isnan(self.hyp_act.c_out).any() + self.hyp_act.c_out.data = torch.clamp_min(self.hyp_act.c_out,1e-12) + assert not torch.isnan(x).any() + h = self.linear.forward(x) + assert not torch.isnan(h).any() + h = self.agg.forward(h, adj, prev_x=x) + assert not torch.isnan(h).any() + h = self.hyp_act.forward(h) + assert not torch.isnan(h).any() + output = h, adj + return output + + +class HypLinear(nn.Module): + """ + Hyperbolic linear layer. + """ + + def __init__(self, manifold, in_features, out_features, c, dropout, use_bias): + super(HypLinear, self).__init__() + self.manifold = manifold + self.in_features = in_features + self.out_features = out_features + self.c = c + self.dropout = dropout + self.use_bias = use_bias + # self.bias = nn.Parameter(torch.Tensor(out_features)) + self.bias = nn.Parameter(torch.Tensor(1, out_features)) + self.weight = nn.Parameter(torch.Tensor(out_features, in_features)) + self.reset_parameters() + + def reset_parameters(self): + init.xavier_uniform_(self.weight, gain=math.sqrt(2)) + init.constant_(self.bias, 0) + + def forward(self, x): + drop_weight = F.dropout(self.weight, self.dropout, training=self.training) + mv = self.manifold.mobius_matvec(drop_weight, x, self.c) + res = self.manifold.proj(mv, self.c) + if self.use_bias: + bias = self.bias + hyp_bias = self.manifold.expmap0(bias, self.c) + hyp_bias = self.manifold.proj(hyp_bias, self.c) + res = self.manifold.mobius_add(res, hyp_bias, c=self.c) + res = self.manifold.proj(res, self.c) + return res + + def extra_repr(self): + return 'in_features={}, out_features={}, c={}'.format( + self.in_features, self.out_features, self.c + ) + + +class HypAgg(Module): + """ + Hyperbolic aggregation layer. + """ + + def __init__(self, manifold, c, use_att, in_features, dropout, att_type='sparse_adjmask_dist', att_logit=None, beta=0, decode=False): + super(HypAgg, self).__init__() + self.manifold = manifold + self.c = c + self.use_att = use_att + + self.in_features = in_features + self.dropout = dropout + if use_att: + self.att = HypAggAtt(in_features, manifold, dropout, act=None, att_type=att_type, att_logit=att_logit, beta=beta) + self.att_type = att_type + + self.special_spmm = SpecialSpmm() + self.decode = decode + + def forward(self, x, adj, prev_x=None): + + if self.use_att: + dist = 'dist' in self.att_type + if dist: + if 'sparse' in self.att_type: + if self.decode: + # NOTE : AGG(prev_x) + edge_e, e_rowsum = self.att(prev_x, adj, self.c) # SparseAtt + else: + # NOTE : AGG(x) + edge_e, e_rowsum = self.att(x, adj, self.c) # SparseAtt + self.edge_e = edge_e + self.e_rowsum = e_rowsum + ## SparseAtt + x_tangent = self.manifold.logmap0(x, c=self.c) + N = x.size()[0] + edge = adj._indices() + support_t = self.special_spmm(edge, edge_e, torch.Size([N, N]), x_tangent) + assert not torch.isnan(support_t).any() + support_t = support_t.div(e_rowsum) + assert not torch.isnan(support_t).any() + output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c) + else: + adj = self.att(x, adj, self.c) # DenseAtt + x_tangent = self.manifold.logmap0(x, c=self.c) + support_t = torch.spmm(adj, x_tangent) + output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c) + else: + ## MLP attention + x_tangent = self.manifold.logmap0(x, c=self.c) + adj = self.att(x_tangent, adj) + support_t = torch.spmm(adj, x_tangent) + output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c) + else: + x_tangent = self.manifold.logmap0(x, c=self.c) + support_t = torch.spmm(adj, x_tangent) + output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c) + + return output + + def extra_repr(self): + return 'c={}, use_att={}, decode={}'.format( + self.c, self.use_att, self.decode + ) + + +class HypAct(Module): + """ + Hyperbolic activation layer. + """ + + def __init__(self, manifold, c_in, c_out, act): + super(HypAct, self).__init__() + self.manifold = manifold + self.c_in = c_in + self.c_out = c_out + self.act = act + + def forward(self, x): + if self.manifold.name == 'PoincareBall': + if self.c_out: + xt = self.manifold.activation(x, self.act, self.c_in, self.c_out) + return xt + else: + xt = self.manifold.logmap0(x, c=self.c_in) + return xt + else: + NotImplementedError("not implemented") + + def extra_repr(self): + return 'Manifold={},\n c_in={},\n act={},\n c_out={}'.format( + self.manifold.name, self.c_in, self.act.__name__, self.c_out + ) diff --git a/HGCAE/layers/layers.py b/HGCAE/layers/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..d17b37d1f62390e982b94ee4e5450d8a5a0bf632 --- /dev/null +++ b/HGCAE/layers/layers.py @@ -0,0 +1,68 @@ +"""Euclidean layers.""" +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + + +def get_dim_act(args): + """ + Helper function to get dimension and activation at every layer. + :param args: + :return: + """ + if not args.act: + act = lambda x: x + else: + act = getattr(F, args.act) + acts = [act] * (args.num_layers - 1) + + dims = [args.feat_dim] + if args.num_layers > 1: + # Check layer_num and hdden_dim match + hidden_dim = [int(h) for h in args.hidden_dim.split(',')] + if args.num_layers != len(hidden_dim) + 1: + raise RuntimeError('Check dimension hidden:{}, num_laysers:{}'.format(args.hidden_dim, args.num_layers) ) + dims = dims + hidden_dim + + dims += [args.dim] + acts += [act] + return dims, acts + + +class Linear(Module): + """ + Simple Linear layer with dropout. + """ + + def __init__(self, in_features, out_features, dropout, act, use_bias): + super(Linear, self).__init__() + self.dropout = dropout + self.linear = nn.Linear(in_features, out_features, use_bias) + self.act = act + + def forward(self, x): + hidden = self.linear.forward(x) + hidden = F.dropout(hidden, self.dropout, training=self.training) + out = self.act(hidden) + return out + +''' +InnerProductDecdoer implemntation from: +https://github.com/zfjsail/gae-pytorch/blob/master/gae/model.py +''' +class InnerProductDecoder(nn.Module): + """Decoder for using inner product for prediction.""" + + def __init__(self, dropout=0, act=torch.sigmoid): + super(InnerProductDecoder, self).__init__() + self.dropout = dropout + self.act = act + + def forward(self, emb_in, emb_out): + cos_dist = emb_in * emb_out + probs = self.act(cos_dist.sum(1)) + return probs diff --git a/HGCAE/manifolds/__init__.py b/HGCAE/manifolds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ed1d71769d7412c7f0fa82c9425ee7fe449e9567 --- /dev/null +++ b/HGCAE/manifolds/__init__.py @@ -0,0 +1,7 @@ +''' +Major codes of hyperbolic layers are from HGCN +Refer Lorentz implementation from HGCN if you need. +''' +from Ghypeddings.HGCAE.manifolds.base import ManifoldParameter +from Ghypeddings.HGCAE.manifolds.euclidean import Euclidean +from Ghypeddings.HGCAE.manifolds.poincare import PoincareBall diff --git a/HGCAE/manifolds/base.py b/HGCAE/manifolds/base.py new file mode 100644 index 0000000000000000000000000000000000000000..805edd678d9e768f22a0dce3a6691bf8556ed53d --- /dev/null +++ b/HGCAE/manifolds/base.py @@ -0,0 +1,84 @@ +''' +Major codes of hyperbolic layers are from HGCN +''' +from torch.nn import Parameter + +class Manifold(object): + """ + Abstract class to define operations on a manifold. + """ + + def __init__(self): + super().__init__() + self.eps = 10e-8 + + def sqdist(self, p1, p2, c): + """Squared distance between pairs of points.""" + raise NotImplementedError + + def egrad2rgrad(self, p, dp, c): + """Converts Euclidean Gradient to Riemannian Gradients.""" + raise NotImplementedError + + def proj(self, p, c): + """Projects point p on the manifold.""" + raise NotImplementedError + + def proj_tan(self, u, p, c): + """Projects u on the tangent space of p.""" + raise NotImplementedError + + def proj_tan0(self, u, c): + """Projects u on the tangent space of the origin.""" + raise NotImplementedError + + def expmap(self, u, p, c): + """Exponential map of u at point p.""" + raise NotImplementedError + + def logmap(self, p1, p2, c): + """Logarithmic map of point p1 at point p2.""" + raise NotImplementedError + + def expmap0(self, u, c): + """Exponential map of u at the origin.""" + raise NotImplementedError + + def logmap0(self, p, c): + """Logarithmic map of point p at the origin.""" + raise NotImplementedError + + def mobius_add(self, x, y, c, dim=-1): + """Adds points x and y.""" + raise NotImplementedError + + def mobius_matvec(self, m, x, c): + """Performs hyperboic martrix-vector multiplication.""" + raise NotImplementedError + + def init_weights(self, w, c, irange=1e-5): + """Initializes random weigths on the manifold.""" + raise NotImplementedError + + def inner(self, p, c, u, v=None): + """Inner product for tangent vectors at point x.""" + raise NotImplementedError + + def ptransp(self, x, y, u, c): + """Parallel transport of u from x to y.""" + raise NotImplementedError + + +class ManifoldParameter(Parameter): + """ + Subclass of torch.nn.Parameter for Riemannian optimization. + """ + def __new__(cls, data, requires_grad, manifold, c): + return Parameter.__new__(cls, data, requires_grad) + + def __init__(self, data, requires_grad, manifold, c): + self.c = c + self.manifold = manifold + + def __repr__(self): + return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__() diff --git a/HGCAE/manifolds/euclidean.py b/HGCAE/manifolds/euclidean.py new file mode 100644 index 0000000000000000000000000000000000000000..c102023b24eebc91053be85984a8a295166e8c41 --- /dev/null +++ b/HGCAE/manifolds/euclidean.py @@ -0,0 +1,66 @@ +''' +Major codes of hyperbolic layers are from HGCN +''' +import torch +from Ghypeddings.HGCAE.manifolds.base import Manifold + + +class Euclidean(Manifold): + """ + Euclidean Manifold class. + """ + + def __init__(self): + super(Euclidean, self).__init__() + self.name = 'Euclidean' + + def normalize(self, p): + dim = p.size(-1) + p_norm = torch.renorm(p, 2, 0, 1.) + return p_norm + + def sqdist(self, p1, p2, c): + return (p1 - p2).pow(2).sum(dim=-1) + + def egrad2rgrad(self, p, dp, c): + return dp + + def proj(self, p, c): + return p + + def proj_tan(self, u, p, c): + return u + + def proj_tan0(self, u, c): + return u + + def expmap(self, u, p, c): + return p + u + + def logmap(self, p1, p2, c): + return p2 - p1 + + def expmap0(self, u, c): + return u + + def logmap0(self, p, c): + return p + + def mobius_add(self, x, y, c, dim=-1): + return x + y + + def mobius_matvec(self, m, x, c): + mx = x @ m.transpose(-1, -2) + return mx + + def init_weights(self, w, c, irange=1e-5): + w.data.uniform_(-irange, irange) + return w + + def inner(self, p, c, u, v=None, keepdim=False): + if v is None: + v = u + return (u * v).sum(dim=-1, keepdim=keepdim) + + def ptransp(self, x, y, v, c): + return v diff --git a/HGCAE/manifolds/poincare.py b/HGCAE/manifolds/poincare.py new file mode 100644 index 0000000000000000000000000000000000000000..df06e38afaca4063ce8975527f286fb2397d8956 --- /dev/null +++ b/HGCAE/manifolds/poincare.py @@ -0,0 +1,136 @@ +''' +Major codes of hyperbolic layers are from HGCN +''' +import torch +from Ghypeddings.HGCAE.manifolds.base import Manifold +from torch.autograd import Function +from Ghypeddings.HGCAE.utils.math_utils import artanh, tanh + + +class PoincareBall(Manifold): + """ + PoicareBall Manifold class. + + We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c + + Note that 1/sqrt(c) is the Poincare ball radius. + + """ + + def __init__(self, ): + super(PoincareBall, self).__init__() + self.name = 'PoincareBall' + self.min_norm = 1e-15 + self.eps = {torch.float32: 4e-3, torch.float64: 1e-5} + + def sqdist(self, p1, p2, c): + sqrt_c = c ** 0.5 + dist_c = artanh( + sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False) + ) + dist = dist_c * 2 / sqrt_c + return dist ** 2 + + def _lambda_x(self, x, c): + x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True) + return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm) + + def egrad2rgrad(self, p, dp, c): + lambda_p = self._lambda_x(p, c) + dp /= lambda_p.pow(2) + return dp + + def proj(self, x, c): + norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm) + maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5) + cond = norm > maxnorm + projected = x / norm * maxnorm + return torch.where(cond, projected, x) + + def proj_tan(self, u, p, c): + return u + + def proj_tan0(self, u, c): + return u + + def expmap(self, u, p, c): + sqrt_c = c ** 0.5 + u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + second_term = ( + tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm) + * u + / (sqrt_c * u_norm) + ) + gamma_1 = self.mobius_add(p, second_term, c) + return gamma_1 + + def logmap(self, p1, p2, c): + sub = self.mobius_add(-p1, p2, c) + sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + lam = self._lambda_x(p1, c) + sqrt_c = c ** 0.5 + return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm + + def expmap0(self, u, c): + sqrt_c = c ** 0.5 + u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm) + gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm) + return gamma_1 + + def logmap0(self, p, c): + sqrt_c = c ** 0.5 + p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm + return scale * p + + def mobius_add(self, x, y, c, dim=-1): + x2 = x.pow(2).sum(dim=dim, keepdim=True) + y2 = y.pow(2).sum(dim=dim, keepdim=True) + xy = (x * y).sum(dim=dim, keepdim=True) + num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y + denom = 1 + 2 * c * xy + c ** 2 * x2 * y2 + return num / denom.clamp_min(self.min_norm) + + def mobius_matvec(self, m, x, c): + sqrt_c = c ** 0.5 + x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm) + mx = x @ m.transpose(-1, -2) + mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm) + res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c) + cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8) + res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device) + res = torch.where(cond, res_0, res_c) + return res + + def init_weights(self, w, c, irange=1e-5): + w.data.uniform_(-irange, irange) + return w + + def _gyration(self, u, v, w, c, dim: int = -1): + u2 = u.pow(2).sum(dim=dim, keepdim=True) + v2 = v.pow(2).sum(dim=dim, keepdim=True) + uv = (u * v).sum(dim=dim, keepdim=True) + uw = (u * w).sum(dim=dim, keepdim=True) + vw = (v * w).sum(dim=dim, keepdim=True) + c2 = c ** 2 + a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw + b = -c2 * vw * u2 - c * uw + d = 1 + 2 * c * uv + c2 * u2 * v2 + return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm) + + def inner(self, x, c, u, v=None, keepdim=False, dim=-1): + if v is None: + v = u + lambda_x = self._lambda_x(x, c) + return lambda_x ** 2 * (u * v).sum(dim=dim, keepdim=keepdim) + + def ptransp(self, x, y, u, c): + lambda_x = self._lambda_x(x, c) + lambda_y = self._lambda_x(y, c) + return self._gyration(y, -x, u, c) * lambda_x / lambda_y + + def activation(self, x, act, c_in, c_out): + x_act = act(x) + x_prev = self.logmap0(x_act, c_in) + x_next = self.expmap0(x_prev, c_out) + return x_next diff --git a/HGCAE/models/__init__.py b/HGCAE/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGCAE/models/base_models.py b/HGCAE/models/base_models.py new file mode 100644 index 0000000000000000000000000000000000000000..7d10fd9f134e8ffe44b5c5e58975f31723a081cd --- /dev/null +++ b/HGCAE/models/base_models.py @@ -0,0 +1,200 @@ +import Ghypeddings.HGCAE.models.encoders as encoders +import torch +import numpy as np +import torch.nn as nn +import torch.nn.functional as F +from Ghypeddings.HGCAE.models.decoders import model2decoder +from Ghypeddings.HGCAE.layers.layers import InnerProductDecoder +from sklearn.metrics import roc_auc_score, average_precision_score +from Ghypeddings.HGCAE.utils.eval_utils import acc_f1 +from sklearn import cluster +from sklearn.metrics import accuracy_score, normalized_mutual_info_score, adjusted_rand_score +import Ghypeddings.HGCAE.manifolds as manifolds +import Ghypeddings.HGCAE.models.encoders as encoders + +class BaseModel(nn.Module): + """ + Base model for graph embedding tasks. + """ + + def __init__(self, args): + super(BaseModel, self).__init__() + self.manifold_name = "PoincareBall" + if args.c is not None: + self.c = torch.tensor([args.c]) + if not args.cuda == -1: + self.c = self.c.to(args.device) + else: + self.c = nn.Parameter(torch.Tensor([1.])) + self.manifold = getattr(manifolds, self.manifold_name)() + self.nnodes = args.n_nodes + self.n_classes = args.n_classes + self.encoder = getattr(encoders, "HGCAE")(self.c, args) + self.num_layers=args.num_layers + + # Embedding c + self.hyperbolic_embedding = True if args.use_att else False + self.decoder_type = 'InnerProductDecoder' + self.dc = InnerProductDecoder(dropout=0, act=torch.sigmoid) + + + def encode(self, x, adj): + h = self.encoder.encode(x, adj) + return h + + def pred_link_score(self, h, idx): # for LP,REC + emb_in = h[idx[:, 0], :] + emb_out = h[idx[:, 1], :] + probs = self.dc.forward(emb_in, emb_out) + + return probs + + def decode(self, h, adj, idx): # REC + output = self.decoder.decode(h, adj) + return output + + + def eval_cluster(self, embeddings, data, split): + if self.hyperbolic_embedding: + emb_c = self.encoder.layers[-1].hyp_act.c_out + embeddings = self.manifold.logmap0(embeddings.to(emb_c.device), c=emb_c).cpu() + + idx = data[f'idx_{split}'] + n_classes = self.n_classes + + embeddings_to_cluster = embeddings[idx].detach().cpu().numpy() + # gt_label = data['labels'][idx].cpu().numpy() + gt_label = data['labels'] + + kmeans = cluster.KMeans(n_clusters=n_classes, algorithm='auto') + kmeans.fit(embeddings_to_cluster) + pred_label = kmeans.fit_predict(embeddings_to_cluster) + + from munkres import Munkres + def best_map(L1,L2): + #L1 should be the groundtruth labels and L2 should be the clustering labels we got + Label1 = np.unique(L1) + nClass1 = len(Label1) + Label2 = np.unique(L2) + nClass2 = len(Label2) + nClass = np.maximum(nClass1,nClass2) + G = np.zeros((nClass,nClass)) + for i in range(nClass1): + ind_cla1 = L1 == Label1[i] + ind_cla1 = ind_cla1.astype(float) + for j in range(nClass2): + ind_cla2 = L2 == Label2[j] + ind_cla2 = ind_cla2.astype(float) + G[i,j] = np.sum(ind_cla2 * ind_cla1) + m = Munkres() + index = m.compute(-G.T) + index = np.array(index) + c = index[:,1] + newL2 = np.zeros(L2.shape) + for i in range(nClass2): + newL2[L2 == Label2[i]] = Label1[c[i]] + return newL2 + + + def err_rate(gt_s, s): + c_x = best_map(gt_s, s) + err_x = np.sum(gt_s[:] !=c_x[:]) + missrate = err_x.astype(float) / (gt_s.shape[0]) + return missrate + + + acc = 1-err_rate(gt_label, pred_label) + # acc = accuracy_score(gt_label, pred_label) + nmi = normalized_mutual_info_score(gt_label, pred_label, average_method='arithmetic') + ari = adjusted_rand_score(gt_label, pred_label) + + metrics = { 'cluster_acc': acc, 'nmi': nmi, 'ari': ari} + return metrics, pred_label + + + def compute_metrics(self, embeddings, data, split, epoch=None): + raise NotImplementedError + + def init_metric_dict(self): + raise NotImplementedError + + def has_improved(self, m1, m2): + raise NotImplementedError + +class LPModel(BaseModel): + """ + Base model for link prediction task. + """ + + def __init__(self, args): + super(LPModel, self).__init__(args) + self.nb_false_edges = args.nb_false_edges + self.positive_edge_samplig = True + if self.positive_edge_samplig: + self.nb_edges = min(args.nb_edges, 5000) # NOTE : be-aware too dense edges + else: + self.nb_edges = args.nb_edges + + if args.lambda_rec > 0: + self.num_dec_layers = args.num_dec_layers + self.lambda_rec = args.lambda_rec + c = self.encoder.curvatures if hasattr(self.encoder, 'curvatures') else args.c ### handle HNN + self.decoder = model2decoder(c, args, 'rec') + else: + self.lambda_rec = 0 + + if args.lambda_lp > 0: + self.lambda_lp = args.lambda_lp + else: + self.lambda_lp = 0 + + def compute_metrics(self, embeddings, data, split, epoch=None): + if split == 'train': + num_true_edges = data[f'{split}_edges'].shape[0] + if self.positive_edge_samplig and num_true_edges > self.nb_edges: + edges_true = data[f'{split}_edges'][np.random.randint(0, num_true_edges, self.nb_edges)] + else: + edges_true = data[f'{split}_edges'] + edges_false = data[f'{split}_edges_false'][np.random.randint(0, self.nb_false_edges, self.nb_edges)] + else: + edges_true = data[f'{split}_edges'] + edges_false = data[f'{split}_edges_false'] + + pos_scores = self.pred_link_score(embeddings, edges_true) + neg_scores = self.pred_link_score(embeddings, edges_false) + assert not torch.isnan(pos_scores).any() + assert not torch.isnan(neg_scores).any() + loss = F.binary_cross_entropy(pos_scores, torch.ones_like(pos_scores)) + loss += F.binary_cross_entropy(neg_scores, torch.zeros_like(neg_scores)) + if pos_scores.is_cuda: + pos_scores = pos_scores.cpu() + neg_scores = neg_scores.cpu() + labels = [1] * pos_scores.shape[0] + [0] * neg_scores.shape[0] + preds = list(pos_scores.data.numpy()) + list(neg_scores.data.numpy()) + roc = roc_auc_score(labels, preds) + ap = average_precision_score(labels, preds) + metrics = {'loss': loss, 'roc': roc, 'ap': ap} + + assert not torch.isnan(loss).any() + if self.lambda_rec: + idx = data['idx_all'] + recon = self.decode(embeddings, data['adj_train_dec'], idx) ## NOTE : adj + assert not torch.isnan(recon).any() + if self.num_dec_layers == self.num_layers: + target = data['features'][idx] + elif self.num_dec_layers == self.num_layers - 1: + target = self.encoder.features[0].detach()[idx] + else: + raise RuntimeError('num_dec_layers only support 1,2') + loss_rec = self.lambda_rec * torch.nn.functional.mse_loss(recon[idx], target , reduction='mean') + assert not torch.isnan(loss_rec).any() + loss_lp = loss * self.lambda_lp + metrics.update({'loss': loss_lp + loss_rec, 'loss_rec': loss_rec, 'loss_lp': loss_lp}) + + return metrics + + def init_metric_dict(self): + return {'roc': -1, 'ap': -1} + + def has_improved(self, m1, m2): + return 0.5 * (m1['roc'] + m1['ap']) < 0.5 * (m2['roc'] + m2['ap']) diff --git a/HGCAE/models/decoders.py b/HGCAE/models/decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..72c46942397dc97d00f1980e6569399e2792a644 --- /dev/null +++ b/HGCAE/models/decoders.py @@ -0,0 +1,106 @@ +"""Graph decoders.""" +import Ghypeddings.HGCAE.manifolds as manifolds +import torch.nn as nn +import torch.nn.functional as F + + +import torch + + +class Decoder(nn.Module): + """ + Decoder abstract class + """ + + def __init__(self, c): + super(Decoder, self).__init__() + self.c = c + + def classify(self, x, adj): + ''' + output + - nc : probs + - rec : input_feat + ''' + if self.decode_adj: + input = (x, adj) + output, _ = self.classifier.forward(input) + else: + output = self.classifier.forward(x) + return output + + + def decode(self, x, adj): + ''' + output + - nc : probs + - rec : input_feat + ''' + if self.decode_adj: + input = (x, adj) + output, _ = self.decoder.forward(input) + else: + output = self.decoder.forward(x) + return output + + + +import Ghypeddings.HGCAE.layers.hyp_layers as hyp_layers +class HGCAEDecoder(Decoder): + """ + Decoder for HGCAE + """ + + def __init__(self, c, args, task): + super(HGCAEDecoder, self).__init__(c) + self.manifold = getattr(manifolds, 'PoincareBall')() + + assert args.num_layers > 0 + + dims, acts, _ = hyp_layers.get_dim_act_curv(args) + dims = dims[::-1] + acts = acts[::-1][:-1] + [lambda x: x] # Last layer without act + self.curvatures = self.c[::-1] + + encdec_share_curvature = False + if not encdec_share_curvature and args.num_layers == args.num_dec_layers: # do not share and enc-dec mirror-shape + num_c = len(self.curvatures) + self.curvatures = self.curvatures[:1] + if args.c_trainable == 1: + self.curvatures += [nn.Parameter(torch.Tensor([args.c]).to(args.device))] * (num_c - 1) + else: + self.curvatures += [torch.tensor([args.c])] * (num_c - 1) + if not args.cuda == -1: + self.curvatures = [curv.to(args.device) for curv in self.curvatures] + + + self.curvatures = self.curvatures[:-1] + [None] + + + hgc_layers = [] + num_dec_layers = args.num_dec_layers + for i in range(num_dec_layers): + c_in, c_out = self.curvatures[i], self.curvatures[i + 1] + in_dim, out_dim = dims[i], dims[i + 1] + act = acts[i] + hgc_layers.append( + hyp_layers.HyperbolicGraphConvolution( + self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att, + att_type=args.att_type, att_logit=args.att_logit, beta=args.beta, decode=True + ) + ) + + self.decoder = nn.Sequential(*hgc_layers) + self.decode_adj = True + + # NOTE : self.c is fixed, not trainable + def classify(self, x, adj): + h = self.manifold.logmap0(x, c=self.c) + return super(HGCAEDecoder, self).classify(h, adj) + + def decode(self, x, adj): + output = super(HGCAEDecoder, self).decode(x, adj) + return output + +model2decoder = HGCAEDecoder + diff --git a/HGCAE/models/encoders.py b/HGCAE/models/encoders.py new file mode 100644 index 0000000000000000000000000000000000000000..6dfdc0f984d4018035118bd4b09b88900bcb0976 --- /dev/null +++ b/HGCAE/models/encoders.py @@ -0,0 +1,64 @@ +"""Graph encoders.""" +import Ghypeddings.HGCAE.manifolds as manifolds +import Ghypeddings.HGCAE.layers.hyp_layers as hyp_layers +import torch +import torch.nn as nn + + +class Encoder(nn.Module): + """ + Encoder abstract class. + """ + + def __init__(self, c, use_cnn=None): + super(Encoder, self).__init__() + self.c = c + + def encode(self, x, adj): + self.features = [] + if self.encode_graph: + input = (x, adj) + xx = input + for i in range(len(self.layers)): + out = self.layers[i].forward(xx) + self.features.append(out[0]) + xx = out + output , _ = xx + else: + output = self.layers.forward(x) + return output + +class HGCAE(Encoder): + """ + Hyperbolic Graph Convolutional Auto-Encoders. + """ + + def __init__(self, c, args): #, use_cnn + super(HGCAE, self).__init__(c, use_cnn=True) + self.manifold = getattr(manifolds, "PoincareBall")() + assert args.num_layers > 0 + dims, acts, self.curvatures = hyp_layers.get_dim_act_curv(args) + if args.c_trainable == 1: + self.curvatures.append(nn.Parameter(torch.Tensor([args.c]).to(args.device))) + else: + self.curvatures.append(torch.tensor([args.c]).to(args.device)) + hgc_layers = [] + for i in range(len(dims) - 1): + c_in, c_out = self.curvatures[i], self.curvatures[i + 1] + in_dim, out_dim = dims[i], dims[i + 1] + act = acts[i] + + hgc_layers.append( + hyp_layers.HyperbolicGraphConvolution( + self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att, + att_type=args.att_type, att_logit=args.att_logit, beta=args.beta + ) + ) + self.layers = nn.Sequential(*hgc_layers) + self.encode_graph = True + + def encode(self, x, adj): + x_hyp = self.manifold.proj( + self.manifold.expmap0(self.manifold.proj_tan0(x, self.curvatures[0]), c=self.curvatures[0]), + c=self.curvatures[0]) + return super(HGCAE, self).encode(x_hyp, adj) diff --git a/HGCAE/optimizers/__init__.py b/HGCAE/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a1198f3d759b39b51aedfe5b2d92f068151a0fe7 --- /dev/null +++ b/HGCAE/optimizers/__init__.py @@ -0,0 +1,2 @@ +from torch.optim import Adam +from Ghypeddings.HGCAE.optimizers.radam import RiemannianAdam diff --git a/HGCAE/optimizers/radam.py b/HGCAE/optimizers/radam.py new file mode 100644 index 0000000000000000000000000000000000000000..b48cb6fe6f1a66a8b2103a49b207485a143df1f8 --- /dev/null +++ b/HGCAE/optimizers/radam.py @@ -0,0 +1,175 @@ +"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/).""" +import torch.optim +from Ghypeddings.HGCAE.manifolds import Euclidean,ManifoldParameter + +_default_manifold = Euclidean() + + +class OptimMixin(object): + def __init__(self, *args, stabilize=None, **kwargs): + self._stabilize = stabilize + super().__init__(*args, **kwargs) + + def stabilize_group(self, group): + pass + + def stabilize(self): + """Stabilize parameters if they are off-manifold due to numerical reasons + """ + for group in self.param_groups: + self.stabilize_group(group) + + +def copy_or_set_(dest, source): + """ + A workaround to respect strides of :code:`dest` when copying :code:`source` + (https://github.com/geoopt/geoopt/issues/70) + Parameters + ---------- + dest : torch.Tensor + Destination tensor where to store new data + source : torch.Tensor + Source data to put in the new tensor + Returns + ------- + dest + torch.Tensor, modified inplace + """ + if dest.stride() != source.stride(): + return dest.copy_(source) + else: + return dest.set_(source) + + +class RiemannianAdam(OptimMixin, torch.optim.Adam): + r"""Riemannian Adam with the same API as :class:`torch.optim.Adam` + Parameters + ---------- + params : iterable + iterable of parameters to optimize or dicts defining + parameter groups + lr : float (optional) + learning rate (default: 1e-3) + betas : Tuple[float, float] (optional) + coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + eps : float (optional) + term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay : float (optional) + weight decay (L2 penalty) (default: 0) + amsgrad : bool (optional) + whether to use the AMSGrad variant of this + algorithm from the paper `On the Convergence of Adam and Beyond`_ + (default: False) + Other Parameters + ---------------- + stabilize : int + Stabilize parameters if they are off-manifold due to numerical + reasons every ``stabilize`` steps (default: ``None`` -- no stabilize) + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def step(self, closure=None): + """Performs a single optimization step. + Arguments + --------- + closure : callable (optional) + A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + with torch.no_grad(): + for group in self.param_groups: + if "step" not in group: + group["step"] = 0 + betas = group["betas"] + weight_decay = group["weight_decay"] + eps = group["eps"] + learning_rate = group["lr"] + amsgrad = group["amsgrad"] + for point in group["params"]: + grad = point.grad + if grad is None: + continue + + if isinstance(point, (ManifoldParameter)): + manifold = point.manifold + c = point.c + else: + manifold = _default_manifold + c = None + if grad.is_sparse: + raise RuntimeError( + "Riemannian Adam does not support sparse gradients yet (PR is welcome)" + ) + + state = self.state[point] + + # State initialization + if len(state) == 0: + state["step"] = 0 + # Exponential moving average of gradient values + state["exp_avg"] = torch.zeros_like(point) + # Exponential moving average of squared gradient values + state["exp_avg_sq"] = torch.zeros_like(point) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state["max_exp_avg_sq"] = torch.zeros_like(point) + # make local variables for easy access + exp_avg = state["exp_avg"] + exp_avg_sq = state["exp_avg_sq"] + # actual step + grad.add_(weight_decay, point) + grad = manifold.egrad2rgrad(point, grad, c) + + exp_avg.mul_(betas[0]).add_(1 - betas[0], grad) + exp_avg_sq.mul_(betas[1]).add_( + 1 - betas[1], manifold.inner(point, c, grad, keepdim=True) + ) + if amsgrad: + max_exp_avg_sq = state["max_exp_avg_sq"] + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = max_exp_avg_sq.sqrt().add_(eps) + else: + denom = exp_avg_sq.sqrt().add_(eps) + group["step"] += 1 + bias_correction1 = 1 - betas[0] ** group["step"] + bias_correction2 = 1 - betas[1] ** group["step"] + step_size = ( + learning_rate * bias_correction2 ** 0.5 / bias_correction1 + ) + + # copy the state, we need it for retraction + # get the direction for ascend + direction = exp_avg / denom + # transport the exponential averaging to the new point + new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c) + exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c) + # use copy only for user facing point + copy_or_set_(point, new_point) + exp_avg.set_(exp_avg_new) + + group["step"] += 1 + if self._stabilize is not None and group["step"] % self._stabilize == 0: + self.stabilize_group(group) + return loss + + @torch.no_grad() + def stabilize_group(self, group): + for p in group["params"]: + if not isinstance(p, ManifoldParameter): + continue + state = self.state[p] + if not state: # due to None grads + continue + manifold = p.manifold + c = p.c + exp_avg = state["exp_avg"] + copy_or_set_(p, manifold.proj(p, c)) + exp_avg.set_(manifold.proj_tan(exp_avg, u, c)) diff --git a/HGCAE/utils/__init__.py b/HGCAE/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGCAE/utils/data_utils.py b/HGCAE/utils/data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8739c8a56bc1e8bf6b7dad1e98e88ccce79a28d7 --- /dev/null +++ b/HGCAE/utils/data_utils.py @@ -0,0 +1,134 @@ +"""Data utils functions for pre-processing and data loading.""" +import os +import pickle as pkl +import sys + +import networkx as nx +import numpy as np +import scipy.sparse as sp +import torch + +from scipy import sparse +import logging + +import pandas as pd + +def process_data(args, adj , features, labels): + ## Load data + data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels} + adj = data['adj_train'] + + ## TAKES a lot of time + + adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_edges( + adj, args.val_prop, args.test_prop, args.seed + ) + + ## TAKES a lot of time + data['adj_train'] = adj_train + data['train_edges'], data['train_edges_false'] = train_edges, train_edges_false + if args.val_prop + args.test_prop > 0: + data['val_edges'], data['val_edges_false'] = val_edges, val_edges_false + data['test_edges'], data['test_edges_false'] = test_edges, test_edges_false + all_info="" + + ## Adj matrix + adj = data['adj_train'] + data['adj_train_enc'], data['features'] = process( + data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats + ) + + if args.lambda_rec: + data['adj_train_dec'] = rowwise_normalizing(data['adj_train']) + + adj_2hop = get_adj_2hop(adj) + data['adj_train_enc_2hop'] = symmetric_laplacian_smoothing(adj_2hop) + + # NOTE : Re-adjust labels + # Some data omit `0` class, thus n_classes are wrong with `max(labels)+1` + args.n_classes = int(data['labels'].max() + 1) + + data['idx_all'] = range(data['features'].shape[0]) + data_info = "Dataset {} Loaded : dimensions are adj:{}, edges:{}, features:{}, labels:{}\n".format( + 'ddos2019', data['adj_train'].shape, data['adj_train'].sum(), data['features'].shape, data['labels'].shape) + data['info'] = data_info + return data + +def process(adj, features, normalize_adj, normalize_feats): + if sp.isspmatrix(features): + features = np.array(features.todense()) + if normalize_feats: + features = normalize(features) + features = torch.Tensor(features) + if normalize_adj: + adj = normalize(adj + sp.eye(adj.shape[0])) + return adj, features + +def get_adj_2hop(adj): + adj_self = adj + sp.eye(adj.shape[0]) + adj_2hop = adj_self.dot(adj_self) + adj_2hop.data = np.clip(adj_2hop.data, 0, 1) + adj_2hop = adj_2hop - sp.eye(adj.shape[0]) - adj + return adj_2hop + +def normalize(mx): + """Row-normalize sparse matrix.""" + rowsum = np.array(mx.sum(1)) + r_inv = np.power(rowsum, -1).flatten() + r_inv[np.isinf(r_inv)] = 0. + r_mat_inv = sp.diags(r_inv) + mx = r_mat_inv.dot(mx) + return mx + +def symmetric_laplacian_smoothing(adj): + """Symmetrically normalize adjacency matrix.""" + adj = adj + sp.eye(adj.shape[0]) # self-loop + + adj = sp.coo_matrix(adj) + rowsum = np.array(adj.sum(1)) + d_inv_sqrt = np.power(rowsum, -0.5).flatten() + d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. + d_mat_inv_sqrt = sp.diags(d_inv_sqrt) + return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() + +def rowwise_normalizing(adj): + """Row-wise normalize adjacency matrix.""" + adj = adj + sp.eye(adj.shape[0]) # self-loop + adj = sp.coo_matrix(adj) + rowsum = np.array(adj.sum(1)) + d_inv = np.power(rowsum, -1.0).flatten() + d_inv[np.isinf(d_inv)] = 0. + d_mat_inv = sp.diags(d_inv) + return adj.dot(d_mat_inv).transpose().tocoo() + +def sparse_mx_to_torch_sparse_tensor(sparse_mx): + """Convert a scipy sparse matrix to a torch sparse tensor.""" + sparse_mx = sparse_mx.tocoo() + indices = torch.from_numpy( + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64) + ) + values = torch.Tensor(sparse_mx.data) + shape = torch.Size(sparse_mx.shape) + return torch.sparse.FloatTensor(indices, values, shape) + +def mask_edges(adj, val_prop, test_prop, seed): + np.random.seed(seed) # get tp edges + x, y = sp.triu(adj).nonzero() + pos_edges = np.array(list(zip(x, y))) + np.random.shuffle(pos_edges) + # get tn edges + x, y = sp.triu(sp.csr_matrix(1. - adj.toarray())).nonzero() # LONG + neg_edges = np.array(list(zip(x, y))) # EVEN LONGER + np.random.shuffle(neg_edges) # ALSO LONG + + m_pos = len(pos_edges) + n_val = int(m_pos * val_prop) + n_test = int(m_pos * test_prop) + val_edges, test_edges, train_edges = pos_edges[:n_val], pos_edges[n_val:n_test + n_val], pos_edges[n_test + n_val:] + val_edges_false, test_edges_false = neg_edges[:n_val], neg_edges[n_val:n_test + n_val] + train_edges_false = np.concatenate([neg_edges, val_edges, test_edges], axis=0) + adj_train = sp.csr_matrix((np.ones(train_edges.shape[0]), (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape) + adj_train = adj_train + adj_train.T + return adj_train, torch.LongTensor(train_edges), torch.LongTensor(train_edges_false), torch.LongTensor(val_edges), \ + torch.LongTensor(val_edges_false), torch.LongTensor(test_edges), torch.LongTensor( + test_edges_false) diff --git a/HGCAE/utils/eval_utils.py b/HGCAE/utils/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e2793a673e8d9a19d78be82733c652e568cec985 --- /dev/null +++ b/HGCAE/utils/eval_utils.py @@ -0,0 +1,11 @@ +from sklearn.metrics import average_precision_score, accuracy_score, f1_score + +def acc_f1(output, labels, average='binary'): + preds = output.max(1)[1].type_as(labels) + if preds.is_cuda: + preds = preds.cpu() + labels = labels.cpu() + accuracy = accuracy_score(labels,preds) + f1 = f1_score(labels,preds, average=average) + return accuracy, f1 + diff --git a/HGCAE/utils/math_utils.py b/HGCAE/utils/math_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..56a0de2552fcc9ef35e0e933904f1b391d63f3ec --- /dev/null +++ b/HGCAE/utils/math_utils.py @@ -0,0 +1,70 @@ +''' +Code from HGCN (https://github.com/HazyResearch/hgcn/blob/master/utils/math_utils.py) +''' +import torch + + +def cosh(x, clamp=15): + return x.clamp(-clamp, clamp).cosh() + + +def sinh(x, clamp=15): + return x.clamp(-clamp, clamp).sinh() + + +def tanh(x, clamp=15): + return x.clamp(-clamp, clamp).tanh() + + +def arcosh(x): + return Arcosh.apply(x) + + +def arsinh(x): + return Arsinh.apply(x) + + +def artanh(x): + return Artanh.apply(x) + + +class Artanh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + x = x.clamp(-1 + 1e-15, 1 - 1e-15) + ctx.save_for_backward(x) + z = x.double() + return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (1 - input ** 2) + + +class Arsinh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + z = x.double() + return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (1 + input ** 2) ** 0.5 + + +class Arcosh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + x = x.clamp(min=1 + 1e-7) + ctx.save_for_backward(x) + z = x.double() + return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (input ** 2 - 1) ** 0.5 + diff --git a/HGCAE/utils/train_utils.py b/HGCAE/utils/train_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3d207f71cd919d8f446147eafcf48bdd4f91e141 --- /dev/null +++ b/HGCAE/utils/train_utils.py @@ -0,0 +1,199 @@ +import os +import sys + +import numpy as np +import torch +import torch.nn.functional as F +import torch.nn.modules.loss +import argparse + +def format_metrics(metrics, split): + """Format metric in metric dict for logging.""" + return " ".join( + ["{}_{}: {:.8f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()]) + +def get_dir_name(models_dir): + """Gets a directory to save the model. + + If the directory already exists, then append a new integer to the end of + it. This method is useful so that we don't overwrite existing models + when launching new jobs. + + Args: + models_dir: The directory where all the models are. + + Returns: + The name of a new directory to save the training logs and model weights. + """ + if not os.path.exists(models_dir): + save_dir = os.path.join(models_dir, '0') + os.makedirs(save_dir) + else: + existing_dirs = np.array( + [ + d + for d in os.listdir(models_dir) + if os.path.isdir(os.path.join(models_dir, d)) + ] + ).astype(np.int) + if len(existing_dirs) > 0: + dir_id = str(existing_dirs.max() + 1) + else: + dir_id = "1" + save_dir = os.path.join(models_dir, dir_id) + os.makedirs(save_dir) + return save_dir + + +def add_flags_from_config(parser, config_dict): + """ + Adds a flag (and default value) to an ArgumentParser for each parameter in a config + """ + + def OrNone(default): + def func(x): + # Convert "none" to proper None object + if x.lower() == "none": + return None + # If default is None (and x is not None), return x without conversion as str + elif default is None: + return str(x) + # Otherwise, default has non-None type; convert x to that type + else: + return type(default)(x) + + return func + + for param in config_dict: + default, description = config_dict[param] + try: + if isinstance(default, dict): + parser = add_flags_from_config(parser, default) + elif isinstance(default, list): + if len(default) > 0: + # pass a list as argument + parser.add_argument( + f"--{param}", + action="append", + type=type(default[0]), + default=default, + help=description + ) + else: + pass + parser.add_argument(f"--{param}", action="append", default=default, help=description) + else: + pass + parser.add_argument(f"--{param}", type=OrNone(default), default=default, help=description) + except argparse.ArgumentError: + print( + f"Could not add flag for param {param} because it was already present." + ) + return parser + + + +import subprocess +def check_gpustats(columns=None): + query = r'nvidia-smi --query-gpu=%s --format=csv,noheader' % ','.join(columns) + smi_output = subprocess.check_output(query, shell=True).decode().strip() + + gpustats = [] + for line in smi_output.split('\n'): + if not line: + continue + gpustat = line.split(',') + gpustats.append({k: v.strip() for k, v in zip(columns, gpustat)}) + + return gpustats + + +def assign_gpus(num_gpu, memory_threshold=1000): # (MiB) + os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + + columns = ['index', 'memory.used'] + gpustats = {i['index']: i['memory.used'] for i in check_gpustats(columns)} + + + + available_gpus = [] + for gpu in sorted(gpustats.keys()): + if int(gpustats.get(gpu).split(' ')[0]) < memory_threshold: + available_gpus.append(gpu) + + if len(available_gpus) < num_gpu: + raise MemoryError('{} GPUs requested, but only {} available'.format(num_gpu, len(available_gpus))) + + gpus_to_assign = available_gpus[:num_gpu] + # os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(gpus_to_assign) + return gpus_to_assign + + + +def create_args(*args): + parser = argparse.ArgumentParser() + parser.add_argument('--dim', type=int, default=args[0]) + parser.add_argument('--hidden_dim', type=int, default=args[1]) + parser.add_argument('--c', type=int, default=args[2]) + parser.add_argument('--num_layers', type=int, default=args[3]) + parser.add_argument('--bias', type=bool, default=args[4]) + parser.add_argument('--act', type=str, default=args[5]) + parser.add_argument('--grad_clip', type=float, default=args[6]) + parser.add_argument('--optimizer', type=str, default=args[7]) + parser.add_argument('--weight_decay', type=float, default=args[8]) + parser.add_argument('--lr', type=float, default=args[9]) + parser.add_argument('--gamma', type=float, default=args[10]) + parser.add_argument('--lr_reduce_freq', type=int, default=args[11]) + parser.add_argument('--cuda', type=int, default=args[12]) + parser.add_argument('--epochs', type=int, default=args[13]) + parser.add_argument('--min_epochs', type=int, default=args[14]) + parser.add_argument('--patience', type=int, default=args[15]) + parser.add_argument('--seed', type=int, default=args[16]) + parser.add_argument('--log_freq', type=int, default=args[17]) + parser.add_argument('--eval_freq', type=int, default=args[18]) + parser.add_argument('--val_prop', type=float, default=args[19]) + parser.add_argument('--test_prop', type=float, default=args[20]) + parser.add_argument('--double_precision', type=int, default=args[21]) + parser.add_argument('--dropout', type=float, default=args[22]) + parser.add_argument('--lambda_rec', type=float, default=args[23]) + parser.add_argument('--lambda_lp', type=float, default=args[24]) + parser.add_argument('--num_dec_layers', type=int, default=args[25]) + parser.add_argument('--use_att', type=bool, default=args[26]) + parser.add_argument('--att_type', type=str, default=args[27]) + parser.add_argument('--att_logit', type=str, default=args[28]) + parser.add_argument('--beta', type=float, default=args[29]) + parser.add_argument('--classifier', type=str, default=args[30]) + parser.add_argument('--clusterer', type=str, default=args[31]) + parser.add_argument('--normalize_adj', type=bool, default=args[32]) + parser.add_argument('--normalize_feats', type=bool, default=args[33]) + flags, unknown = parser.parse_known_args() + return flags + + + +from Ghypeddings.classifiers import * +def perform_task(args,X,y): + if(args.classifier and args.clusterer): + print('You have to chose one of them!') + sys.exit(1) + elif(args.classifier): + if(args.classifier == 'svm'): + return SVM(X,y,args.test_prop,args.seed) + elif(args.classifier == 'mlp'): + return mlp(X,y,1,10) + elif(args.classifier == 'decision tree'): + return decision_tree(X,y,args.test_prop,args.seed) + elif(args.classifier == 'random forest'): + return random_forest(X,y,args.test_prop,args.seed) + elif(args.classifier == 'adaboost'): + return adaboost(X,y,args.test_prop,args.seed) + elif(args.classifier == 'knn'): + return KNN(X,y,args.test_prop,args.seed) + elif(args.classifier == 'naive bayes'): + return naive_bayes(X,y,args.test_prop,args.seed) + else: + raise NotImplementedError + elif(args.clusterer): + pass + else: + return 99,99,99,99,99 \ No newline at end of file diff --git a/HGCN/.gitignore b/HGCN/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c --- /dev/null +++ b/HGCN/.gitignore @@ -0,0 +1 @@ +__pycache__/ \ No newline at end of file diff --git a/HGCN/__init__.py b/HGCN/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9 --- /dev/null +++ b/HGCN/__init__.py @@ -0,0 +1,2 @@ +from __future__ import print_function +from __future__ import division diff --git a/HGCN/hgcn.py b/HGCN/hgcn.py new file mode 100644 index 0000000000000000000000000000000000000000..81773945e3b9dfb04fc84d9fec7acb0b0db62b88 --- /dev/null +++ b/HGCN/hgcn.py @@ -0,0 +1,163 @@ +from __future__ import division +from __future__ import print_function + +import logging +import os +import time + +import numpy as np +import Ghypeddings.HGCN.optimizers as optimizers +import torch +from Ghypeddings.HGCN.models.base_models import NCModel +from Ghypeddings.HGCN.utils.data_utils import process_data +from Ghypeddings.HGCN.utils.train_utils import format_metrics +from Ghypeddings.HGCN.utils.train_utils import create_args +import warnings +warnings.filterwarnings('ignore') + + +class HGCN: + def __init__(self, + adj, + features, + labels, + dim, + c=None, + num_layers=2, + bias=True, + act='leaky_relu', + select_manifold='Hyperboloid', + grad_clip=None, + optimizer='RiemannianAdam', + weight_decay=0.01, + lr=.1, + gamma=0.5, + lr_reduce_freq=200, + cuda=0, + epochs=50, + min_epochs=50, + patience=None, + seed=42, + log_freq=0, + eval_freq=1, + val_prop=.3, + test_prop=0.3, + double_precision=0, + dropout=0.1, + use_att= True, + alpha=0.2, + local_agg = False, + normalize_adj=False, + normalize_feats=True + ): + + self.args = create_args(dim,c,num_layers,bias,act,select_manifold,grad_clip,optimizer,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,use_att,alpha,local_agg,normalize_adj,normalize_feats) + self.args.n_nodes = adj.shape[0] + self.args.feat_dim = features.shape[1] + self.args.n_classes = len(np.unique(labels)) + self.data = process_data(self.args,adj,features,labels) + + np.random.seed(self.args.seed) + torch.manual_seed(self.args.seed) + if int(self.args.double_precision): + torch.set_default_dtype(torch.float64) + if int(self.args.cuda) >= 0: + torch.cuda.manual_seed(self.args.seed) + self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu' + self.args.patience = self.args.epochs if not self.args.patience else int(self.args.patience) + if not self.args.lr_reduce_freq: + self.args.lr_reduce_freq = self.args.epochs + self.model = NCModel(self.args) + self.optimizer = getattr(optimizers, self.args.optimizer)(params=self.model.parameters(), lr=self.args.lr,weight_decay=self.args.weight_decay) + self.lr_scheduler = torch.optim.lr_scheduler.StepLR( + self.optimizer, + step_size=int(self.args.lr_reduce_freq), + gamma=float(self.args.gamma) + ) + if self.args.cuda is not None and int(self.args.cuda) >= 0 : + os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda) + self.model = self.model.to(self.args.device) + for x, val in self.data.items(): + if torch.is_tensor(self.data[x]): + self.data[x] = self.data[x].to(self.args.device) + self.best_emb = None + + def fit(self): + logging.getLogger().setLevel(logging.INFO) + logging.info(f'Using: {self.args.device}') + logging.info(str(self.model)) + tot_params = sum([np.prod(p.size()) for p in self.model.parameters()]) + logging.info(f"Total number of parameters: {tot_params}") + + t_total = time.time() + counter = 0 + best_val_metrics = self.model.init_metric_dict() + + best_losses = [] + real_losses = [] + + for epoch in range(self.args.epochs): + t = time.time() + self.model.train() + self.optimizer.zero_grad() + embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm']) + train_metrics = self.model.compute_metrics(embeddings, self.data, 'train') + train_metrics['loss'].backward() + if self.args.grad_clip is not None: + max_norm = float(self.args.grad_clip) + all_params = list(self.model.parameters()) + for param in all_params: + torch.nn.utils.clip_grad_norm_(param, max_norm) + self.optimizer.step() + self.lr_scheduler.step() + + real_losses.append(train_metrics['loss'].item()) + if(len(best_losses) == 0): + best_losses.append(real_losses[0]) + elif (best_losses[-1] > real_losses[-1]): + best_losses.append(real_losses[-1]) + else: + best_losses.append(best_losses[-1]) + + if (epoch + 1) % self.args.log_freq == 0: + logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), + 'lr: {}'.format(self.lr_scheduler.get_lr()[0]), + format_metrics(train_metrics, 'train'), + 'time: {:.4f}s'.format(time.time() - t) + ])) + + if (epoch + 1) % self.args.eval_freq == 0: + self.model.eval() + embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm']) + val_metrics = self.model.compute_metrics(embeddings, self.data, 'val') + if (epoch + 1) % self.args.log_freq == 0: + logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')])) + if self.model.has_improved(best_val_metrics, val_metrics): + self.best_emb = embeddings + best_val_metrics = val_metrics + counter = 0 + else: + counter += 1 + if counter == self.args.patience and epoch > self.args.min_epochs: + logging.info("Early stopping") + break + + logging.info("Training Finished!") + logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + return {'real':real_losses,'best':best_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total + + def predict(self): + self.model.eval() + embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm']) + val_metrics = self.model.compute_metrics(embeddings, self.data, 'test') + return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc'] + + def save_embeddings(self): + c = self.model.decoder.c + tb_embeddings_euc = self.manifold.proj_tan0(self.model.manifold.logmap0(self.best_emb,c),c) + for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1))) + for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1))) + hyp_file_path = os.path.join(os.getcwd(),'hgcn_embeddings_hyp.csv') + euc_file_path = os.path.join(os.getcwd(),'hgcn_embeddings_euc.csv') + np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',') + np.savetxt(euc_file_path, for_classification_euc, delimiter=',') \ No newline at end of file diff --git a/HGCN/layers/__init__.py b/HGCN/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGCN/layers/att_layers.py b/HGCN/layers/att_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..8414d8d48dffa4dca79e38ebeacc54f480b4def1 --- /dev/null +++ b/HGCN/layers/att_layers.py @@ -0,0 +1,144 @@ +"""Attention layers (some modules are copied from https://github.com/Diego999/pyGAT.""" +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class DenseAtt(nn.Module): + def __init__(self, in_features, dropout): + super(DenseAtt, self).__init__() + self.dropout = dropout + self.linear = nn.Linear(2 * in_features, 1, bias=True) + self.in_features = in_features + + def forward (self, x, adj): + n = x.size(0) + # n x 1 x d + x_left = torch.unsqueeze(x, 1) + x_left = x_left.expand(-1, n, -1) + # 1 x n x d + x_right = torch.unsqueeze(x, 0) + x_right = x_right.expand(n, -1, -1) + + x_cat = torch.cat((x_left, x_right), dim=2) + att_adj = self.linear(x_cat).squeeze() + att_adj = F.sigmoid(att_adj) + att_adj = torch.mul(adj.to_dense(), att_adj) + return att_adj + + +class SpecialSpmmFunction(torch.autograd.Function): + """Special function for only sparse region backpropataion layer.""" + + @staticmethod + def forward(ctx, indices, values, shape, b): + assert indices.requires_grad == False + a = torch.sparse_coo_tensor(indices, values, shape) + ctx.save_for_backward(a, b) + ctx.N = shape[0] + return torch.matmul(a, b) + + @staticmethod + def backward(ctx, grad_output): + a, b = ctx.saved_tensors + grad_values = grad_b = None + if ctx.needs_input_grad[1]: + grad_a_dense = grad_output.matmul(b.t()) + edge_idx = a._indices()[0, :] * ctx.N + a._indices()[1, :] + grad_values = grad_a_dense.view(-1)[edge_idx] + if ctx.needs_input_grad[3]: + grad_b = a.t().matmul(grad_output) + return None, grad_values, None, grad_b + + +class SpecialSpmm(nn.Module): + def forward(self, indices, values, shape, b): + return SpecialSpmmFunction.apply(indices, values, shape, b) + + +class SpGraphAttentionLayer(nn.Module): + """ + Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 + """ + + def __init__(self, in_features, out_features, dropout, alpha, activation): + super(SpGraphAttentionLayer, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.alpha = alpha + + self.W = nn.Parameter(torch.zeros(size=(in_features, out_features))) + nn.init.xavier_normal_(self.W.data, gain=1.414) + + self.a = nn.Parameter(torch.zeros(size=(1, 2 * out_features))) + nn.init.xavier_normal_(self.a.data, gain=1.414) + + self.dropout = nn.Dropout(dropout) + self.leakyrelu = nn.LeakyReLU(self.alpha) + self.special_spmm = SpecialSpmm() + self.act = activation + + def forward(self, input, adj): + N = input.size()[0] + edge = adj._indices() + + h = torch.mm(input, self.W) + # h: N x out + assert not torch.isnan(h).any() + + # Self-attention on the nodes - Shared attention mechanism + edge_h = torch.cat((h[edge[0, :], :], h[edge[1, :], :]), dim=1).t() + # edge: 2*D x E + + edge_e = torch.exp(-self.leakyrelu(self.a.mm(edge_h).squeeze())) + assert not torch.isnan(edge_e).any() + # edge_e: E + + ones = torch.ones(size=(N, 1)) + if h.is_cuda: + ones = ones.cuda() + e_rowsum = self.special_spmm(edge, edge_e, torch.Size([N, N]), ones) + # e_rowsum: N x 1 + + edge_e = self.dropout(edge_e) + # edge_e: E + + h_prime = self.special_spmm(edge, edge_e, torch.Size([N, N]), h) + assert not torch.isnan(h_prime).any() + # h_prime: N x out + + h_prime = h_prime.div(e_rowsum) + # h_prime: N x out + assert not torch.isnan(h_prime).any() + return self.act(h_prime) + + def __repr__(self): + return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' + + +class GraphAttentionLayer(nn.Module): + def __init__(self, input_dim, output_dim, dropout, activation, alpha, nheads, concat): + """Sparse version of GAT.""" + super(GraphAttentionLayer, self).__init__() + self.dropout = dropout + self.output_dim = output_dim + self.attentions = [SpGraphAttentionLayer(input_dim, + output_dim, + dropout=dropout, + alpha=alpha, + activation=activation) for _ in range(nheads)] + self.concat = concat + for i, attention in enumerate(self.attentions): + self.add_module('attention_{}'.format(i), attention) + + def forward(self, input): + x, adj = input + x = F.dropout(x, self.dropout, training=self.training) + if self.concat: + h = torch.cat([att(x, adj) for att in self.attentions], dim=1) + else: + h_cat = torch.cat([att(x, adj).view((-1, self.output_dim, 1)) for att in self.attentions], dim=2) + h = torch.mean(h_cat, dim=2) + h = F.dropout(h, self.dropout, training=self.training) + return (h, adj) diff --git a/HGCN/layers/hyp_layers.py b/HGCN/layers/hyp_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..0913411c986dbd2b70f2f8e8a5ce216e816cb2be --- /dev/null +++ b/HGCN/layers/hyp_layers.py @@ -0,0 +1,158 @@ +"""Hyperbolic layers.""" +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.nn.init as init +from torch.nn.modules.module import Module + +from Ghypeddings.HGCN.layers.att_layers import DenseAtt + + +def get_dim_act_curv(args): + """ + Helper function to get dimension and activation at every layer. + :param args: + :return: + """ + if not args.act: + act = lambda x: x + else: + act = getattr(F, args.act) + acts = [act] * (args.num_layers - 1) + dims = [args.feat_dim] + ([args.dim] * (args.num_layers - 1)) + n_curvatures = args.num_layers - 1 + if args.c is None: + # create list of trainable curvature parameters + curvatures = [nn.Parameter(torch.Tensor([1.])) for _ in range(n_curvatures)] + else: + # fixed curvature + curvatures = [torch.tensor([args.c]) for _ in range(n_curvatures)] + if not args.cuda == -1: + curvatures = [curv.to(args.device) for curv in curvatures] + return dims, acts, curvatures + + +class HyperbolicGraphConvolution(nn.Module): + """ + Hyperbolic graph convolution layer. + """ + + def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias, use_att, local_agg): + super(HyperbolicGraphConvolution, self).__init__() + self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias) + self.agg = HypAgg(manifold, c_in, out_features, dropout, use_att, local_agg) + self.hyp_act = HypAct(manifold, c_in, c_out, act) + + def forward(self, input): + x, adj = input + h = self.linear.forward(x) + h = self.agg.forward(h, adj) + h = self.hyp_act.forward(h) + output = h, adj + return output + + +class HypLinear(nn.Module): + """ + Hyperbolic linear layer. + """ + + def __init__(self, manifold, in_features, out_features, c, dropout, use_bias): + super(HypLinear, self).__init__() + self.manifold = manifold + self.in_features = in_features + self.out_features = out_features + self.c = c + self.dropout = dropout + self.use_bias = use_bias + self.bias = nn.Parameter(torch.Tensor(out_features)) + self.weight = nn.Parameter(torch.Tensor(out_features, in_features)) + self.reset_parameters() + + def reset_parameters(self): + init.xavier_uniform_(self.weight, gain=math.sqrt(2)) + init.constant_(self.bias, 0) + + def forward(self, x): + drop_weight = F.dropout(self.weight, self.dropout, training=self.training) + mv = self.manifold.mobius_matvec(drop_weight, x, self.c) + res = self.manifold.proj(mv, self.c) + if self.use_bias: + bias = self.manifold.proj_tan0(self.bias.view(1, -1), self.c) + hyp_bias = self.manifold.expmap0(bias, self.c) + hyp_bias = self.manifold.proj(hyp_bias, self.c) + res = self.manifold.mobius_add(res, hyp_bias, c=self.c) + res = self.manifold.proj(res, self.c) + return res + + def extra_repr(self): + return 'in_features={}, out_features={}, c={}'.format( + self.in_features, self.out_features, self.c + ) + + +class HypAgg(Module): + """ + Hyperbolic aggregation layer. + """ + + def __init__(self, manifold, c, in_features, dropout, use_att, local_agg): + super(HypAgg, self).__init__() + self.manifold = manifold + self.c = c + + self.in_features = in_features + self.dropout = dropout + self.local_agg = local_agg + self.use_att = use_att + if self.use_att: + self.att = DenseAtt(in_features, dropout) + + def forward(self, x, adj): + x_tangent = self.manifold.logmap0(x, c=self.c) + if self.use_att: + if self.local_agg: + x_local_tangent = [] + for i in range(x.size(0)): + x_local_tangent.append(self.manifold.logmap(x[i], x, c=self.c)) + x_local_tangent = torch.stack(x_local_tangent, dim=0) + adj_att = self.att(x_tangent, adj) + att_rep = adj_att.unsqueeze(-1) * x_local_tangent + support_t = torch.sum(adj_att.unsqueeze(-1) * x_local_tangent, dim=1) + output = self.manifold.proj(self.manifold.expmap(x, support_t, c=self.c), c=self.c) + return output + else: + adj_att = self.att(x_tangent, adj) + support_t = torch.matmul(adj_att, x_tangent) + else: + support_t = torch.spmm(adj, x_tangent) + output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c) + return output + + def extra_repr(self): + return 'c={}'.format(self.c) + + +class HypAct(Module): + """ + Hyperbolic activation layer. + """ + + def __init__(self, manifold, c_in, c_out, act): + super(HypAct, self).__init__() + self.manifold = manifold + self.c_in = c_in + self.c_out = c_out + self.act = act + + def forward(self, x): + xt = self.act(self.manifold.logmap0(x, c=self.c_in)) + xt = self.manifold.proj_tan0(xt, c=self.c_out) + return self.manifold.proj(self.manifold.expmap0(xt, c=self.c_out), c=self.c_out) + + def extra_repr(self): + return 'c_in={}, c_out={}'.format( + self.c_in, self.c_out + ) diff --git a/HGCN/layers/layers.py b/HGCN/layers/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..c7682cfd33b7a7dcd723558c4722e93a92ff4510 --- /dev/null +++ b/HGCN/layers/layers.py @@ -0,0 +1,26 @@ +"""Euclidean layers.""" +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + +class Linear(Module): + """ + Simple Linear layer with dropout. + """ + + def __init__(self, in_features, out_features, dropout, act, use_bias): + super(Linear, self).__init__() + self.dropout = dropout + self.linear = nn.Linear(in_features, out_features, use_bias) + self.act = act + + def forward(self, x): + hidden = self.linear.forward(x) + hidden = F.dropout(hidden, self.dropout, training=self.training) + out = self.act(hidden) + return out + \ No newline at end of file diff --git a/HGCN/manifolds/__init__.py b/HGCN/manifolds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bd4b8d81f23de1d855c70804d1e1fb9441cdc960 --- /dev/null +++ b/HGCN/manifolds/__init__.py @@ -0,0 +1,4 @@ +from Ghypeddings.HGCN.manifolds.base import ManifoldParameter +from Ghypeddings.HGCN.manifolds.hyperboloid import Hyperboloid +from Ghypeddings.HGCN.manifolds.euclidean import Euclidean +from Ghypeddings.HGCN.manifolds.poincare import PoincareBall diff --git a/HGCN/manifolds/base.py b/HGCN/manifolds/base.py new file mode 100644 index 0000000000000000000000000000000000000000..925d4a6b2a59dae47a3a8ca33a7dcdcb20e0f08e --- /dev/null +++ b/HGCN/manifolds/base.py @@ -0,0 +1,88 @@ +"""Base manifold.""" + +from torch.nn import Parameter + + +class Manifold(object): + """ + Abstract class to define operations on a manifold. + """ + + def __init__(self): + super().__init__() + self.eps = 10e-8 + + def sqdist(self, p1, p2, c): + """Squared distance between pairs of points.""" + raise NotImplementedError + + def egrad2rgrad(self, p, dp, c): + """Converts Euclidean Gradient to Riemannian Gradients.""" + raise NotImplementedError + + def proj(self, p, c): + """Projects point p on the manifold.""" + raise NotImplementedError + + def proj_tan(self, u, p, c): + """Projects u on the tangent space of p.""" + raise NotImplementedError + + def proj_tan0(self, u, c): + """Projects u on the tangent space of the origin.""" + raise NotImplementedError + + def expmap(self, u, p, c): + """Exponential map of u at point p.""" + raise NotImplementedError + + def logmap(self, p1, p2, c): + """Logarithmic map of point p1 at point p2.""" + raise NotImplementedError + + def expmap0(self, u, c): + """Exponential map of u at the origin.""" + raise NotImplementedError + + def logmap0(self, p, c): + """Logarithmic map of point p at the origin.""" + raise NotImplementedError + + def mobius_add(self, x, y, c, dim=-1): + """Adds points x and y.""" + raise NotImplementedError + + def mobius_matvec(self, m, x, c): + """Performs hyperboic martrix-vector multiplication.""" + raise NotImplementedError + + def init_weights(self, w, c, irange=1e-5): + """Initializes random weigths on the manifold.""" + raise NotImplementedError + + def inner(self, p, c, u, v=None, keepdim=False): + """Inner product for tangent vectors at point x.""" + raise NotImplementedError + + def ptransp(self, x, y, u, c): + """Parallel transport of u from x to y.""" + raise NotImplementedError + + def ptransp0(self, x, u, c): + """Parallel transport of u from the origin to y.""" + raise NotImplementedError + + +class ManifoldParameter(Parameter): + """ + Subclass of torch.nn.Parameter for Riemannian optimization. + """ + def __new__(cls, data, requires_grad, manifold, c): + return Parameter.__new__(cls, data, requires_grad) + + def __init__(self, data, requires_grad, manifold, c): + self.c = c + self.manifold = manifold + + def __repr__(self): + return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__() diff --git a/HGCN/manifolds/euclidean.py b/HGCN/manifolds/euclidean.py new file mode 100644 index 0000000000000000000000000000000000000000..4ec5e38b7ff2c01ef8fc33337d26a08dd9d3cfa9 --- /dev/null +++ b/HGCN/manifolds/euclidean.py @@ -0,0 +1,67 @@ +"""Euclidean manifold.""" + +from Ghypeddings.HGCN.manifolds.base import Manifold + + +class Euclidean(Manifold): + """ + Euclidean Manifold class. + """ + + def __init__(self): + super(Euclidean, self).__init__() + self.name = 'Euclidean' + + def normalize(self, p): + dim = p.size(-1) + p.view(-1, dim).renorm_(2, 0, 1.) + return p + + def sqdist(self, p1, p2, c): + return (p1 - p2).pow(2).sum(dim=-1) + + def egrad2rgrad(self, p, dp, c): + return dp + + def proj(self, p, c): + return p + + def proj_tan(self, u, p, c): + return u + + def proj_tan0(self, u, c): + return u + + def expmap(self, u, p, c): + return p + u + + def logmap(self, p1, p2, c): + return p2 - p1 + + def expmap0(self, u, c): + return u + + def logmap0(self, p, c): + return p + + def mobius_add(self, x, y, c, dim=-1): + return x + y + + def mobius_matvec(self, m, x, c): + mx = x @ m.transpose(-1, -2) + return mx + + def init_weights(self, w, c, irange=1e-5): + w.data.uniform_(-irange, irange) + return w + + def inner(self, p, c, u, v=None, keepdim=False): + if v is None: + v = u + return (u * v).sum(dim=-1, keepdim=keepdim) + + def ptransp(self, x, y, v, c): + return v + + def ptransp0(self, x, v, c): + return x + v diff --git a/HGCN/manifolds/hyperboloid.py b/HGCN/manifolds/hyperboloid.py new file mode 100644 index 0000000000000000000000000000000000000000..d0147001c24330e86264bbb009ff2a6a2c8986e0 --- /dev/null +++ b/HGCN/manifolds/hyperboloid.py @@ -0,0 +1,155 @@ +"""Hyperboloid manifold.""" + +import torch + +from Ghypeddings.HGCN.manifolds.base import Manifold +from Ghypeddings.HGCN.utils.math_utils import arcosh, cosh, sinh + + +class Hyperboloid(Manifold): + """ + Hyperboloid manifold class. + + We use the following convention: -x0^2 + x1^2 + ... + xd^2 = -K + + c = 1 / K is the hyperbolic curvature. + """ + + def __init__(self): + super(Hyperboloid, self).__init__() + self.name = 'Hyperboloid' + self.eps = {torch.float32: 1e-7, torch.float64: 1e-15} + self.min_norm = 1e-15 + self.max_norm = 1e6 + + def minkowski_dot(self, x, y, keepdim=True): + res = torch.sum(x * y, dim=-1) - 2 * x[..., 0] * y[..., 0] + if keepdim: + res = res.view(res.shape + (1,)) + return res + + def minkowski_norm(self, u, keepdim=True): + dot = self.minkowski_dot(u, u, keepdim=keepdim) + return torch.sqrt(torch.clamp(dot, min=self.eps[u.dtype])) + + def sqdist(self, x, y, c): + K = 1. / c + prod = self.minkowski_dot(x, y) + theta = torch.clamp(-prod / K, min=1.0 + self.eps[x.dtype]) + sqdist = K * arcosh(theta) ** 2 + # clamp distance to avoid nans in Fermi-Dirac decoder + return torch.clamp(sqdist, max=50.0) + + def proj(self, x, c): + K = 1. / c + d = x.size(-1) - 1 + y = x.narrow(-1, 1, d) + y_sqnorm = torch.norm(y, p=2, dim=1, keepdim=True) ** 2 + mask = torch.ones_like(x) + mask[:, 0] = 0 + vals = torch.zeros_like(x) + vals[:, 0:1] = torch.sqrt(torch.clamp(K + y_sqnorm, min=self.eps[x.dtype])) + return vals + mask * x + + def proj_tan(self, u, x, c): + K = 1. / c + d = x.size(-1) - 1 + ux = torch.sum(x.narrow(-1, 1, d) * u.narrow(-1, 1, d), dim=1, keepdim=True) + mask = torch.ones_like(u) + mask[:, 0] = 0 + vals = torch.zeros_like(u) + if(len(x.size()) == 1): + x = x.unsqueeze(0) + vals[:, 0:1] = ux / torch.clamp(x[:, 0:1], min=self.eps[x.dtype]) + return vals + mask * u + + def proj_tan0(self, u, c): + narrowed = u.narrow(-1, 0, 1) + vals = torch.zeros_like(u) + vals[:, 0:1] = narrowed + return u - vals + + def expmap(self, u, x, c): + K = 1. / c + sqrtK = K ** 0.5 + normu = self.minkowski_norm(u) + normu = torch.clamp(normu, max=self.max_norm) + theta = normu / sqrtK + theta = torch.clamp(theta, min=self.min_norm) + result = cosh(theta) * x + sinh(theta) * u / theta + return self.proj(result, c) + + def logmap(self, x, y, c): + K = 1. / c + xy = torch.clamp(self.minkowski_dot(x, y) + K, max=-self.eps[x.dtype]) - K + u = y + xy * x * c + normu = self.minkowski_norm(u) + normu = torch.clamp(normu, min=self.min_norm) + dist = self.sqdist(x, y, c) ** 0.5 + result = dist * u / normu + return self.proj_tan(result, x, c) + + def expmap0(self, u, c): + K = 1. / c + sqrtK = K ** 0.5 + d = u.size(-1) - 1 + x = u.narrow(-1, 1, d).view(-1, d) + x_norm = torch.norm(x, p=2, dim=1, keepdim=True) + x_norm = torch.clamp(x_norm, min=self.min_norm) + theta = x_norm / sqrtK + res = torch.ones_like(u) + res[:, 0:1] = sqrtK * cosh(theta) + res[:, 1:] = sqrtK * sinh(theta) * x / x_norm + return self.proj(res, c) + + def logmap0(self, x, c): + K = 1. / c + sqrtK = K ** 0.5 + d = x.size(-1) - 1 + y = x.narrow(-1, 1, d).view(-1, d) + y_norm = torch.norm(y, p=2, dim=1, keepdim=True) + y_norm = torch.clamp(y_norm, min=self.min_norm) + res = torch.zeros_like(x) + theta = torch.clamp(x[:, 0:1] / sqrtK, min=1.0 + self.eps[x.dtype]) + res[:, 1:] = sqrtK * arcosh(theta) * y / y_norm + return res + + def mobius_add(self, x, y, c): + u = self.logmap0(y, c) + v = self.ptransp0(x, u, c) + return self.expmap(v, x, c) + + def mobius_matvec(self, m, x, c): + u = self.logmap0(x, c) + mu = u @ m.transpose(-1, -2) + return self.expmap0(mu, c) + + def ptransp(self, x, y, u, c): + logxy = self.logmap(x, y, c) + logyx = self.logmap(y, x, c) + sqdist = torch.clamp(self.sqdist(x, y, c), min=self.min_norm) + alpha = self.minkowski_dot(logxy, u) / sqdist + res = u - alpha * (logxy + logyx) + return self.proj_tan(res, y, c) + + def ptransp0(self, x, u, c): + K = 1. / c + sqrtK = K ** 0.5 + x0 = x.narrow(-1, 0, 1) + d = x.size(-1) - 1 + y = x.narrow(-1, 1, d) + y_norm = torch.clamp(torch.norm(y, p=2, dim=1, keepdim=True), min=self.min_norm) + y_normalized = y / y_norm + v = torch.ones_like(x) + v[:, 0:1] = - y_norm + v[:, 1:] = (sqrtK - x0) * y_normalized + alpha = torch.sum(y_normalized * u[:, 1:], dim=1, keepdim=True) / sqrtK + res = u - alpha * v + return self.proj_tan(res, x, c) + + def to_poincare(self, x, c): + K = 1. / c + sqrtK = K ** 0.5 + d = x.size(-1) - 1 + return sqrtK * x.narrow(-1, 1, d) / (x[:, 0:1] + sqrtK) + diff --git a/HGCN/manifolds/poincare.py b/HGCN/manifolds/poincare.py new file mode 100644 index 0000000000000000000000000000000000000000..601b5808980bfbb3dcff40c5354f13a1ca37e67c --- /dev/null +++ b/HGCN/manifolds/poincare.py @@ -0,0 +1,145 @@ +"""Poincare ball manifold.""" + +import torch + +from Ghypeddings.HGCN.manifolds.base import Manifold +from Ghypeddings.HGCN.utils.math_utils import artanh, tanh + + +class PoincareBall(Manifold): + """ + PoicareBall Manifold class. + + We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c + + Note that 1/sqrt(c) is the Poincare ball radius. + + """ + + def __init__(self, ): + super(PoincareBall, self).__init__() + self.name = 'PoincareBall' + self.min_norm = 1e-15 + self.eps = {torch.float32: 4e-3, torch.float64: 1e-5} + + def sqdist(self, p1, p2, c): + sqrt_c = c ** 0.5 + dist_c = artanh( + sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False) + ) + dist = dist_c * 2 / sqrt_c + return dist ** 2 + + def _lambda_x(self, x, c): + x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True) + return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm) + + def egrad2rgrad(self, p, dp, c): + lambda_p = self._lambda_x(p, c) + dp /= lambda_p.pow(2) + return dp + + def proj(self, x, c): + norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm) + maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5) + cond = norm > maxnorm + projected = x / norm * maxnorm + return torch.where(cond, projected, x) + + def proj_tan(self, u, p, c): + return u + + def proj_tan0(self, u, c): + return u + + def expmap(self, u, p, c): + sqrt_c = c ** 0.5 + u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + second_term = ( + tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm) + * u + / (sqrt_c * u_norm) + ) + gamma_1 = self.mobius_add(p, second_term, c) + return gamma_1 + + def logmap(self, p1, p2, c): + sub = self.mobius_add(-p1, p2, c) + sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + lam = self._lambda_x(p1, c) + sqrt_c = c ** 0.5 + return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm + + def expmap0(self, u, c): + sqrt_c = c ** 0.5 + u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm) + gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm) + return gamma_1 + + def logmap0(self, p, c): + sqrt_c = c ** 0.5 + p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm + return scale * p + + def mobius_add(self, x, y, c, dim=-1): + x2 = x.pow(2).sum(dim=dim, keepdim=True) + y2 = y.pow(2).sum(dim=dim, keepdim=True) + xy = (x * y).sum(dim=dim, keepdim=True) + num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y + denom = 1 + 2 * c * xy + c ** 2 * x2 * y2 + return num / denom.clamp_min(self.min_norm) + + def mobius_matvec(self, m, x, c): + sqrt_c = c ** 0.5 + x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm) + mx = x @ m.transpose(-1, -2) + mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm) + res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c) + cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8) + res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device) + res = torch.where(cond, res_0, res_c) + return res + + def init_weights(self, w, c, irange=1e-5): + w.data.uniform_(-irange, irange) + return w + + def _gyration(self, u, v, w, c, dim: int = -1): + u2 = u.pow(2).sum(dim=dim, keepdim=True) + v2 = v.pow(2).sum(dim=dim, keepdim=True) + uv = (u * v).sum(dim=dim, keepdim=True) + uw = (u * w).sum(dim=dim, keepdim=True) + vw = (v * w).sum(dim=dim, keepdim=True) + c2 = c ** 2 + a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw + b = -c2 * vw * u2 - c * uw + d = 1 + 2 * c * uv + c2 * u2 * v2 + return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm) + + def inner(self, x, c, u, v=None, keepdim=False): + if v is None: + v = u + lambda_x = self._lambda_x(x, c) + return lambda_x ** 2 * (u * v).sum(dim=-1, keepdim=keepdim) + + def ptransp(self, x, y, u, c): + lambda_x = self._lambda_x(x, c) + lambda_y = self._lambda_x(y, c) + return self._gyration(y, -x, u, c) * lambda_x / lambda_y + + def ptransp_(self, x, y, u, c): + lambda_x = self._lambda_x(x, c) + lambda_y = self._lambda_x(y, c) + return self._gyration(y, -x, u, c) * lambda_x / lambda_y + + def ptransp0(self, x, u, c): + lambda_x = self._lambda_x(x, c) + return 2 * u / lambda_x.clamp_min(self.min_norm) + + def to_hyperboloid(self, x, c): + K = 1./ c + sqrtK = K ** 0.5 + sqnorm = torch.norm(x, p=2, dim=1, keepdim=True) ** 2 + return sqrtK * torch.cat([K + sqnorm, 2 * sqrtK * x], dim=1) / (K - sqnorm) + diff --git a/HGCN/models/__init__.py b/HGCN/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGCN/models/base_models.py b/HGCN/models/base_models.py new file mode 100644 index 0000000000000000000000000000000000000000..e9acf7a144744da2739339a436c9212629053479 --- /dev/null +++ b/HGCN/models/base_models.py @@ -0,0 +1,85 @@ +"""Base model class.""" + +import numpy as np +from sklearn.metrics import roc_auc_score, average_precision_score +import torch +import torch.nn as nn +import torch.nn.functional as F + +import Ghypeddings.HGCN.manifolds as manifolds +import Ghypeddings.HGCN.models.encoders as encoders +from Ghypeddings.HGCN.models.decoders import model2decoder +from Ghypeddings.HGCN.utils.eval_utils import acc_f1 + + +class BaseModel(nn.Module): + """ + Base model for graph embedding tasks. + """ + + def __init__(self, args): + super(BaseModel, self).__init__() + self.manifold_name = args.select_manifold + if args.c is not None: + self.c = torch.tensor([args.c]) + if not args.cuda == -1: + self.c = self.c.to(args.device) + else: + self.c = nn.Parameter(torch.Tensor([1.])) + self.manifold = getattr(manifolds, self.manifold_name)() + if self.manifold.name == 'Hyperboloid': + args.feat_dim = args.feat_dim + 1 + self.nnodes = args.n_nodes + self.encoder = getattr(encoders, 'HGCN')(self.c, args) + + def encode(self, x, adj): + if self.manifold.name == 'Hyperboloid': + o = torch.zeros_like(x) + x = torch.cat([o[:, 0:1], x], dim=1) + h = self.encoder.encode(x, adj) + return h + + def compute_metrics(self, embeddings, data, split): + raise NotImplementedError + + def init_metric_dict(self): + raise NotImplementedError + + def has_improved(self, m1, m2): + raise NotImplementedError + + +class NCModel(BaseModel): + """ + Base model for node classification task. + """ + + def __init__(self, args): + super(NCModel, self).__init__(args) + self.decoder = model2decoder(self.c, args) + if args.n_classes > 2: + self.f1_average = 'micro' + else: + self.f1_average = 'binary' + + self.weights = torch.Tensor([1.] * args.n_classes) + if not args.cuda == -1: + self.weights = self.weights.to(args.device) + + def decode(self, h, adj, idx): + output = self.decoder.decode(h, adj) + return F.log_softmax(output[idx], dim=1) + + def compute_metrics(self, embeddings, data, split): + idx = data[f'idx_{split}'] + output = self.decode(embeddings, data['adj_train_norm'], idx) + loss = F.nll_loss(output, data['labels'][idx], self.weights) + acc, f1,recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average) + metrics = {'loss': loss, 'acc': acc, 'f1': f1,'recall':recall,'precision':precision,'roc_auc':roc_auc} + return metrics + + def init_metric_dict(self): + return {'acc': -1, 'f1': -1} + + def has_improved(self, m1, m2): + return m1["f1"] < m2["f1"] diff --git a/HGCN/models/decoders.py b/HGCN/models/decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..f20046bcaca78d98d08a8a94da17f6881347b0d2 --- /dev/null +++ b/HGCN/models/decoders.py @@ -0,0 +1,52 @@ +"""Graph decoders.""" +import Ghypeddings.HGCN.manifolds as manifolds +import torch.nn as nn +import torch.nn.functional as F + +from Ghypeddings.HGCN.layers.layers import Linear + + +class Decoder(nn.Module): + """ + Decoder abstract class for node classification tasks. + """ + + def __init__(self, c): + super(Decoder, self).__init__() + self.c = c + + def decode(self, x, adj): + if self.decode_adj: + input = (x, adj) + probs, _ = self.cls.forward(input) + else: + probs = self.cls.forward(x) + return probs + + +class LinearDecoder(Decoder): + """ + MLP Decoder for Hyperbolic/Euclidean node classification models. + """ + + def __init__(self, c, args): + super(LinearDecoder, self).__init__(c) + self.manifold = getattr(manifolds, args.select_manifold)() + self.input_dim = args.dim + self.output_dim = args.n_classes + self.bias = args.bias + self.cls = Linear(self.input_dim, self.output_dim, args.dropout, lambda x: x, self.bias) + self.decode_adj = False + + def decode(self, x, adj): + h = self.manifold.proj_tan0(self.manifold.logmap0(x, c=self.c), c=self.c) + return super(LinearDecoder, self).decode(h, adj) + + def extra_repr(self): + return 'in_features={}, out_features={}, bias={}, c={}'.format( + self.input_dim, self.output_dim, self.bias, self.c + ) + + +model2decoder = LinearDecoder + diff --git a/HGCN/models/encoders.py b/HGCN/models/encoders.py new file mode 100644 index 0000000000000000000000000000000000000000..344b8dd35f7d76f0783daeddaa6243beb5393680 --- /dev/null +++ b/HGCN/models/encoders.py @@ -0,0 +1,58 @@ +"""Graph encoders.""" + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +import Ghypeddings.HGCN.manifolds as manifolds +import Ghypeddings.HGCN.layers.hyp_layers as hyp_layers +import Ghypeddings.HGCN.utils.math_utils as pmath + + +class Encoder(nn.Module): + """ + Encoder abstract class. + """ + + def __init__(self, c): + super(Encoder, self).__init__() + self.c = c + + def encode(self, x, adj): + if self.encode_graph: + input = (x, adj) + output, _ = self.layers.forward(input) + else: + output = self.layers.forward(x) + return output + +class HGCN(Encoder): + """ + Hyperbolic-GCN. + """ + + def __init__(self, c, args): + super(HGCN, self).__init__(c) + self.manifold = getattr(manifolds, args.select_manifold)() + assert args.num_layers > 1 + dims, acts, self.curvatures = hyp_layers.get_dim_act_curv(args) + self.curvatures.append(self.c) + hgc_layers = [] + for i in range(len(dims) - 1): + c_in, c_out = self.curvatures[i], self.curvatures[i + 1] + in_dim, out_dim = dims[i], dims[i + 1] + act = acts[i] + hgc_layers.append( + hyp_layers.HyperbolicGraphConvolution( + self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att, args.local_agg + ) + ) + self.layers = nn.Sequential(*hgc_layers) + self.encode_graph = True + + def encode(self, x, adj): + x_tan = self.manifold.proj_tan0(x, self.curvatures[0]) + x_hyp = self.manifold.expmap0(x_tan, c=self.curvatures[0]) + x_hyp = self.manifold.proj(x_hyp, c=self.curvatures[0]) + return super(HGCN, self).encode(x_hyp, adj) diff --git a/HGCN/optimizers/__init__.py b/HGCN/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..411e319d0d10a157da5a9e05a85f468983dcb4be --- /dev/null +++ b/HGCN/optimizers/__init__.py @@ -0,0 +1,2 @@ +from torch.optim import Adam +from Ghypeddings.HGCN.optimizers.radam import RiemannianAdam diff --git a/HGCN/optimizers/radam.py b/HGCN/optimizers/radam.py new file mode 100644 index 0000000000000000000000000000000000000000..c7033935d2acb22bb55679828d15564b17896e34 --- /dev/null +++ b/HGCN/optimizers/radam.py @@ -0,0 +1,172 @@ +"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/).""" +import torch.optim +from Ghypeddings.HGCN.manifolds import Euclidean, ManifoldParameter + +_default_manifold = Euclidean() + +class OptimMixin(object): + def __init__(self, *args, stabilize=None, **kwargs): + self._stabilize = stabilize + super().__init__(*args, **kwargs) + + def stabilize_group(self, group): + pass + + def stabilize(self): + """Stabilize parameters if they are off-manifold due to numerical reasons + """ + for group in self.param_groups: + self.stabilize_group(group) + + +def copy_or_set_(dest, source): + """ + A workaround to respect strides of :code:`dest` when copying :code:`source` + (https://github.com/geoopt/geoopt/issues/70) + Parameters + ---------- + dest : torch.Tensor + Destination tensor where to store new data + source : torch.Tensor + Source data to put in the new tensor + Returns + ------- + dest + torch.Tensor, modified inplace + """ + if dest.stride() != source.stride(): + return dest.copy_(source) + else: + return dest.set_(source) + + +class RiemannianAdam(OptimMixin, torch.optim.Adam): + r"""Riemannian Adam with the same API as :class:`torch.optim.Adam` + Parameters + ---------- + params : iterable + iterable of parameters to optimize or dicts defining + parameter groups + lr : float (optional) + learning rate (default: 1e-3) + betas : Tuple[float, float] (optional) + coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + eps : float (optional) + term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay : float (optional) + weight decay (L2 penalty) (default: 0) + amsgrad : bool (optional) + whether to use the AMSGrad variant of this + algorithm from the paper `On the Convergence of Adam and Beyond`_ + (default: False) + Other Parameters + ---------------- + stabilize : int + Stabilize parameters if they are off-manifold due to numerical + reasons every ``stabilize`` steps (default: ``None`` -- no stabilize) + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def step(self, closure=None): + """Performs a single optimization step. + Arguments + --------- + closure : callable (optional) + A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + with torch.no_grad(): + for group in self.param_groups: + if "step" not in group: + group["step"] = 0 + betas = group["betas"] + weight_decay = group["weight_decay"] + eps = group["eps"] + learning_rate = group["lr"] + amsgrad = group["amsgrad"] + for point in group["params"]: + grad = point.grad + if grad is None: + continue + if isinstance(point, (ManifoldParameter)): + manifold = point.manifold + c = point.c + else: + manifold = _default_manifold + c = None + if grad.is_sparse: + raise RuntimeError( + "Riemannian Adam does not support sparse gradients yet (PR is welcome)" + ) + + state = self.state[point] + + # State initialization + if len(state) == 0: + state["step"] = 0 + # Exponential moving average of gradient values + state["exp_avg"] = torch.zeros_like(point) + # Exponential moving average of squared gradient values + state["exp_avg_sq"] = torch.zeros_like(point) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state["max_exp_avg_sq"] = torch.zeros_like(point) + # make local variables for easy access + exp_avg = state["exp_avg"] + exp_avg_sq = state["exp_avg_sq"] + # actual step + grad.add_(weight_decay, point) + grad = manifold.egrad2rgrad(point, grad, c) + exp_avg.mul_(betas[0]).add_(1 - betas[0], grad) + exp_avg_sq.mul_(betas[1]).add_( + 1 - betas[1], manifold.inner(point, c, grad, keepdim=True) + ) + if amsgrad: + max_exp_avg_sq = state["max_exp_avg_sq"] + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = max_exp_avg_sq.sqrt().add_(eps) + else: + denom = exp_avg_sq.sqrt().add_(eps) + group["step"] += 1 + bias_correction1 = 1 - betas[0] ** group["step"] + bias_correction2 = 1 - betas[1] ** group["step"] + step_size = ( + learning_rate * bias_correction2 ** 0.5 / bias_correction1 + ) + + # copy the state, we need it for retraction + # get the direction for ascend + direction = exp_avg / denom + # transport the exponential averaging to the new point + new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c) + exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c) + # use copy only for user facing point + copy_or_set_(point, new_point) + exp_avg.set_(exp_avg_new) + + group["step"] += 1 + if self._stabilize is not None and group["step"] % self._stabilize == 0: + self.stabilize_group(group) + return loss + + @torch.no_grad() + def stabilize_group(self, group): + for p in group["params"]: + if not isinstance(p, ManifoldParameter): + continue + state = self.state[p] + if not state: # due to None grads + continue + manifold = p.manifold + c = p.c + exp_avg = state["exp_avg"] + copy_or_set_(p, manifold.proj(p, c)) + exp_avg.set_(manifold.proj_tan(exp_avg, u, c)) diff --git a/HGCN/utils/__init__.py b/HGCN/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGCN/utils/data_utils.py b/HGCN/utils/data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b2a01103d7f5b862a001bb60af306db49eba9c3e --- /dev/null +++ b/HGCN/utils/data_utils.py @@ -0,0 +1,89 @@ +"""Data utils functions for pre-processing and data loading.""" +import os +import pickle as pkl +import sys + +import networkx as nx +import numpy as np +import scipy.sparse as sp +import torch +import pandas as pd + +from sklearn.preprocessing import MinMaxScaler + + +def process_data(args, adj,features,labels): + data = process_data_nc(args,adj,features,labels) + data['adj_train_norm'], data['features'] = process( + data['adj_train'], data['features'],args.normalize_adj,args.normalize_feats + ) + return data + + +def process(adj, features, normalize_adj, normalize_feats): + if sp.isspmatrix(features): + features = np.array(features.todense()) + if normalize_feats: + features = normalize(features) + features = torch.Tensor(features) + if normalize_adj: + adj = normalize(adj + sp.eye(adj.shape[0])) + adj = sparse_mx_to_torch_sparse_tensor(adj) + return adj, features + + +def normalize(mx): + """Row-normalize sparse matrix.""" + rowsum = np.array(mx.sum(1)) + r_inv = np.power(rowsum, -1).flatten() + r_inv[np.isinf(r_inv)] = 0. + r_mat_inv = sp.diags(r_inv) + mx = r_mat_inv.dot(mx) + return mx + + +def sparse_mx_to_torch_sparse_tensor(sparse_mx): + """Convert a scipy sparse matrix to a torch sparse tensor.""" + sparse_mx = sparse_mx.tocoo() + indices = torch.from_numpy( + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64) + ) + values = torch.Tensor(sparse_mx.data) + shape = torch.Size(sparse_mx.shape) + return torch.sparse.FloatTensor(indices, values, shape) + + +def augment(adj, features, normalize_feats=True): + deg = np.squeeze(np.sum(adj, axis=0).astype(int)) + deg[deg > 5] = 5 + deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze() + const_f = torch.ones(features.size(0), 1) + features = torch.cat((features, deg_onehot, const_f), dim=1) + return features + + +def split_data(labels, val_prop, test_prop, seed): + np.random.seed(seed) + nb_nodes = labels.shape[0] + all_idx = np.arange(nb_nodes) + pos_idx = labels.nonzero()[0] + neg_idx = (1. - labels).nonzero()[0] + np.random.shuffle(pos_idx) + np.random.shuffle(neg_idx) + pos_idx = pos_idx.tolist() + neg_idx = neg_idx.tolist() + nb_pos_neg = min(len(pos_idx), len(neg_idx)) + nb_val = round(val_prop * nb_pos_neg) + nb_test = round(test_prop * nb_pos_neg) + idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[ + nb_val + nb_test:] + idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[ + nb_val + nb_test:] + return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg + + +def process_data_nc(args,adj,features,labels): + idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed) + labels = torch.LongTensor(labels) + data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test} + return data diff --git a/HGCN/utils/eval_utils.py b/HGCN/utils/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..840a48bf45cc08944925411885698019442f5870 --- /dev/null +++ b/HGCN/utils/eval_utils.py @@ -0,0 +1,14 @@ +from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score + +def acc_f1(output, labels, average='binary'): + preds = output.max(1)[1].type_as(labels) + if preds.is_cuda: + preds = preds.cpu() + labels = labels.cpu() + accuracy = accuracy_score(labels, preds) + recall = recall_score(labels, preds) + precision = precision_score(labels, preds) + roc_auc = roc_auc_score(labels,preds ) + f1 = f1_score(labels, preds, average=average) + return accuracy, f1 , recall,precision,roc_auc + diff --git a/HGCN/utils/math_utils.py b/HGCN/utils/math_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9cf278ed7ce59b97f4793f5def3218f3e830d473 --- /dev/null +++ b/HGCN/utils/math_utils.py @@ -0,0 +1,69 @@ +"""Math utils functions.""" + +import torch + + +def cosh(x, clamp=15): + return x.clamp(-clamp, clamp).cosh() + + +def sinh(x, clamp=15): + return x.clamp(-clamp, clamp).sinh() + + +def tanh(x, clamp=15): + return x.clamp(-clamp, clamp).tanh() + + +def arcosh(x): + return Arcosh.apply(x) + + +def arsinh(x): + return Arsinh.apply(x) + + +def artanh(x): + return Artanh.apply(x) + + +class Artanh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + x = x.clamp(-1 + 1e-15, 1 - 1e-15) + ctx.save_for_backward(x) + z = x.double() + return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (1 - input ** 2) + + +class Arsinh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + z = x.double() + return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (1 + input ** 2) ** 0.5 + + +class Arcosh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + x = x.clamp(min=1.0 + 1e-15) + ctx.save_for_backward(x) + z = x.double() + return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (input ** 2 - 1) ** 0.5 + diff --git a/HGCN/utils/train_utils.py b/HGCN/utils/train_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6e4385c5c977b1ea47ee9ffb6afe1d7f013c7fcc --- /dev/null +++ b/HGCN/utils/train_utils.py @@ -0,0 +1,45 @@ +import os + +import numpy as np +import torch +import torch.nn.functional as F +import torch.nn.modules.loss +import argparse + +def format_metrics(metrics, split): + """Format metric in metric dict for logging.""" + return " ".join( + ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()]) + +def create_args(*args): + parser = argparse.ArgumentParser() + parser.add_argument('--dim', type=int, default=args[0]) + parser.add_argument('--c', type=int, default=args[1]) + parser.add_argument('--num_layers', type=int, default=args[2]) + parser.add_argument('--bias', type=bool, default=args[3]) + parser.add_argument('--act', type=str, default=args[4]) + parser.add_argument('--select_manifold', type=str, default=args[5]) + parser.add_argument('--grad_clip', type=float, default=args[6]) + parser.add_argument('--optimizer', type=str, default=args[7]) + parser.add_argument('--weight_decay', type=float, default=args[8]) + parser.add_argument('--lr', type=float, default=args[9]) + parser.add_argument('--gamma', type=float, default=args[10]) + parser.add_argument('--lr_reduce_freq', type=int, default=args[11]) + parser.add_argument('--cuda', type=int, default=args[12]) + parser.add_argument('--epochs', type=int, default=args[13]) + parser.add_argument('--min_epochs', type=int, default=args[14]) + parser.add_argument('--patience', type=int, default=args[15]) + parser.add_argument('--seed', type=int, default=args[16]) + parser.add_argument('--log_freq', type=int, default=args[17]) + parser.add_argument('--eval_freq', type=int, default=args[18]) + parser.add_argument('--val_prop', type=float, default=args[19]) + parser.add_argument('--test_prop', type=float, default=args[20]) + parser.add_argument('--double_precision', type=int, default=args[21]) + parser.add_argument('--dropout', type=float, default=args[22]) + parser.add_argument('--use_att', type=bool, default=args[23]) + parser.add_argument('--alpha', type=float, default=args[24]) + parser.add_argument('--local_agg', type=bool, default=args[25]) + parser.add_argument('--normalize_adj', type=bool, default=args[26]) + parser.add_argument('--normalize_feats', type=bool, default=args[27]) + flags, unknown = parser.parse_known_args() + return flags \ No newline at end of file diff --git a/HGNN/.gitignore b/HGNN/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c --- /dev/null +++ b/HGNN/.gitignore @@ -0,0 +1 @@ +__pycache__/ \ No newline at end of file diff --git a/HGNN/__init__.py b/HGNN/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGNN/dataset/NodeClassificationDataset.py b/HGNN/dataset/NodeClassificationDataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ef6a3c813c067a1564277d5509081dbe91489828 --- /dev/null +++ b/HGNN/dataset/NodeClassificationDataset.py @@ -0,0 +1,160 @@ +import numpy as np +import pickle as pkl +import networkx as nx +import scipy.sparse as sp +from scipy.sparse import save_npz, load_npz +from scipy.sparse.linalg import eigsh +import sys +from torch.utils.data import Dataset, DataLoader +from Ghypeddings.HGNN.utils import * +import pandas as pd +from sklearn.preprocessing import MinMaxScaler + +def parse_index_file(filename): + """Parse index file.""" + index = [] + for line in open(filename): + index.append(int(line.strip())) + return index + +def sample_mask(idx, l): + """Create mask.""" + mask = np.zeros(l) + mask[idx] = 1 + return np.array(mask, dtype=np.bool_) + +def preprocess_features(features): + """Row-normalize feature matrix and convert to tuple representation""" + rowsum = np.array(features.sum(1)).astype(float) + r_inv = np.power(rowsum, -1).flatten() + r_inv[np.isinf(r_inv)] = 0 + r_mat_inv = sp.diags(r_inv) + features = r_mat_inv.dot(features) + return features + +class NodeClassificationDataset(Dataset): + """ + Extend the Dataset class for graph datasets + """ + def __init__(self, args, logger,adj,features,labels): + self.args = args + self.process_data(adj,features,labels) + + def _filling_adjacency_numpy(self,data, N, source_ip_index, destination_ip_index): + try: + adjacency = np.zeros((N,N), dtype=bool) + except Exception as e: + print(f"An error occurred: {e}") + + source_ips = data[:, source_ip_index] + destination_ips = data[:, destination_ip_index] + mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips)) + adjacency[mask] = True + adjacency = adjacency - np.eye(N) + return adjacency + + def compact_adjacency(self,adj): + max_neighbors = int(np.max(np.sum(adj, axis=1))) + shape = (adj.shape[0],max_neighbors) + c_adj = np.zeros(shape) + c_adj[:,:] = -1 + indices , neighbors = np.where(adj == 1) + + j=-1 + l = indices[0] + for i,k in zip(indices,neighbors): + if i == l: + j+=1 + else: + l=i + j=0 + c_adj[i,j]=int(k) + return c_adj + + def compact_weight_matrix(self,c_adj): + return np.where(c_adj >= 0, 1, 0) + + def one_hot_labels(self,y): + array = np.zeros((len(y),2)) + for i,j in zip(range(len(y)),y): + if j: + array[i,1]=1 + else: + array[i,0]=1 + + return array + + def split_data(self,labels, test_prop,val_prop): + np.random.seed(self.args.seed) + #nb_nodes = labels.shape[0] + #all_idx = np.arange(nb_nodes) + # pos_idx = labels.nonzero()[0] + # neg_idx = (1. - labels).nonzero()[0] + pos_idx = labels[:,1].nonzero()[0] + neg_idx = labels[:,0].nonzero()[0] + np.random.shuffle(pos_idx) + np.random.shuffle(neg_idx) + pos_idx = pos_idx.tolist() + neg_idx = neg_idx.tolist() + nb_pos_neg = min(len(pos_idx), len(neg_idx)) + nb_val = round(val_prop * nb_pos_neg) + nb_test = round(test_prop * nb_pos_neg) + idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[ + nb_val + nb_test:] + idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[ + nb_val + nb_test:] + return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg + + def process_data(self, adj,features,labels): + + adj = self.compact_adjacency(adj) + weight = self.compact_weight_matrix(adj) + adj[adj == -1] = 0 + + labels = self.one_hot_labels(labels) + + idx_test, idx_train, idx_val = self.split_data(labels,self.args.test_prop,self.args.val_prop) + + train_mask = sample_mask(idx_train, labels.shape[0]) + val_mask = sample_mask(idx_val, labels.shape[0]) + test_mask = sample_mask(idx_test, labels.shape[0]) + + y_train = np.zeros(labels.shape) + y_val = np.zeros(labels.shape) + y_test = np.zeros(labels.shape) + y_train[train_mask, :] = labels[train_mask, :] + y_val[val_mask, :] = labels[val_mask, :] + y_test[test_mask, :] = labels[test_mask, :] + + self.adj = adj + self.weight = weight + + self.features = preprocess_features(features) if self.args.normalize_feats else features + self.features = features + assert np.isnan(features).any()== False + self.y_train = y_train + self.y_val = y_val + self.y_test = y_test + self.train_mask = train_mask.astype(int) + self.val_mask = val_mask.astype(int) + self.test_mask = test_mask.astype(int) + self.args.node_num = self.features.shape[0] + self.args.input_dim = self.features.shape[1] + self.args.num_class = y_train.shape[1] + + + def __len__(self): + return 1 + + def __getitem__(self, idx): + return { + 'adj': self.adj, + 'weight': self.weight, + 'features': self.features, + 'y_train' : self.y_train, + 'y_val' : self.y_val, + 'y_test' : self.y_test, + 'train_mask' : self.train_mask, + 'val_mask' : self.val_mask, + 'test_mask' : self.test_mask, + } diff --git a/HGNN/dataset/__init__.py b/HGNN/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGNN/gnn/RiemannianGNN.py b/HGNN/gnn/RiemannianGNN.py new file mode 100644 index 0000000000000000000000000000000000000000..67eecfcc3be8a8b836fa66f5a7f784fc792cd85b --- /dev/null +++ b/HGNN/gnn/RiemannianGNN.py @@ -0,0 +1,151 @@ +import torch as th +import torch.nn as nn +import torch.nn.functional as F +from Ghypeddings.HGNN.utils import * + +class RiemannianGNN(nn.Module): + + def __init__(self, args, logger, manifold): + super(RiemannianGNN, self).__init__() + self.args = args + self.logger = logger + self.manifold = manifold + self.set_up_params() + self.activation = get_activation(self.args) + self.dropout = nn.Dropout(self.args.dropout) + + def create_params(self): + """ + create the GNN params for a specific msg type + """ + msg_weight = [] + layer = self.args.num_layers if not self.args.tie_weight else 1 + for _ in range(layer): + # weight in euclidean space + if self.args.select_manifold == 'poincare': + M = th.zeros([self.args.dim, self.args.dim], requires_grad=True) + elif self.args.select_manifold == 'lorentz': # one degree of freedom less + M = th.zeros([self.args.dim, self.args.dim - 1], requires_grad=True) + init_weight(M, self.args.proj_init) + M = nn.Parameter(M) + self.args.eucl_vars.append(M) + msg_weight.append(M) + return nn.ParameterList(msg_weight) + + def set_up_params(self): + """ + set up the params for all message types + """ + self.type_of_msg = 1 + + for i in range(0, self.type_of_msg): + setattr(self, "msg_%d_weight" % i, self.create_params()) + + + def retrieve_params(self, weight, step): + """ + Args: + weight: a list of weights + step: a certain layer + """ + if self.args.select_manifold == 'poincare': + layer_weight = weight[step] + elif self.args.select_manifold == 'lorentz': # Ensure valid tangent vectors for (1, 0, ...) + layer_weight = th.cat((th.zeros((self.args.dim, 1)).cuda(), weight[step]), dim=1) + return layer_weight + + def apply_activation(self, node_repr): + """ + apply non-linearity for different manifolds + """ + if self.args.select_manifold == "poincare": + return self.activation(node_repr) + elif self.args.select_manifold == "lorentz": + return self.manifold.from_poincare_to_lorentz( + self.activation(self.manifold.from_lorentz_to_poincare(node_repr)) + ) + + def split_graph_by_negative_edge(self, adj_mat, weight): + """ + Split the graph according to positive and negative edges. + """ + mask = weight > 0 + neg_mask = weight < 0 + + pos_adj_mat = adj_mat * mask.long() + neg_adj_mat = adj_mat * neg_mask.long() + pos_weight = weight * mask.float() + neg_weight = -weight * neg_mask.float() + return pos_adj_mat, pos_weight, neg_adj_mat, neg_weight + + def split_graph_by_type(self, adj_mat, weight): + """ + split the graph according to edge type for multi-relational datasets + """ + multi_relation_adj_mat = [] + multi_relation_weight = [] + for relation in range(1, self.args.edge_type): + mask = (weight.int() == relation) + multi_relation_adj_mat.append(adj_mat * mask.long()) + multi_relation_weight.append(mask.float()) + return multi_relation_adj_mat, multi_relation_weight + + def split_input(self, adj_mat, weight): + """ + Split the adjacency matrix and weight matrix for multi-relational datasets + and datasets with enhanced inverse edges, e.g. Ethereum. + """ + return [adj_mat], [weight] + + def aggregate_msg(self, node_repr, adj_mat, weight, layer_weight, mask): + """ + message passing for a specific message type. + """ + node_num, max_neighbor = adj_mat.size(0), adj_mat.size(1) + msg = th.mm(node_repr, layer_weight) * mask + # select out the neighbors of each node + neighbors = th.index_select(msg, 0, adj_mat.view(-1)) # [node_num * max_neighbor, embed_size] + neighbors = neighbors.view(node_num, max_neighbor, -1) + # weighted sum of the neighbors' representations + neighbors = weight.unsqueeze(2) * neighbors # [node_num, max_neighbor, embed_size] + combined_msg = th.sum(neighbors, dim=1) # [node_num, embed_size] + return combined_msg + + def get_combined_msg(self, step, node_repr, adj_mat, weight, mask): + """ + perform message passing in the tangent space of x' + """ + # use the first layer only if tying weights + gnn_layer = 0 if self.args.tie_weight else step + combined_msg = None + for relation in range(0, self.type_of_msg): + layer_weight = self.retrieve_params(getattr(self, "msg_%d_weight" % relation), gnn_layer) + aggregated_msg = self.aggregate_msg(node_repr, + adj_mat[relation], + weight[relation], + layer_weight, mask) + combined_msg = aggregated_msg if combined_msg is None else (combined_msg + aggregated_msg) + return combined_msg + + def forward(self, node_repr, adj_list, weight, mask): + """ + Args: + node_repr: [node_num, embed_size] + node_repr is in Euclidean space. + If node_repr is in hyperbolic space, invoke log_map_zero first. + adj_list: [node_num, max_neighbor] adjacency list + weight: [node_num, max_neighbor] weights of the adjacency list + mask: [node_num, 1] 1 denote real node, 0 padded node + Return: + [node_num, embed_size] in hyperbolic space + """ + # split the adjacency list and weights based on edge types + adj_list, weight = self.split_input(adj_list, weight) + # gnn layers + for step in range(self.args.num_layers): + node_repr = self.manifold.log_map_zero(node_repr) * mask if step > 0 else node_repr * mask + combined_msg = self.get_combined_msg(step, node_repr, adj_list, weight, mask) + combined_msg = self.dropout(combined_msg) * mask + node_repr = self.manifold.exp_map_zero(combined_msg) * mask + node_repr = self.apply_activation(node_repr) * mask + return node_repr diff --git a/HGNN/gnn/__init__.py b/HGNN/gnn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6c53e88b5c75fe891a396a8d629fae431dfe63d6 --- /dev/null +++ b/HGNN/gnn/__init__.py @@ -0,0 +1 @@ +from Ghypeddings.HGNN.gnn.RiemannianGNN import RiemannianGNN diff --git a/HGNN/hgnn.py b/HGNN/hgnn.py new file mode 100644 index 0000000000000000000000000000000000000000..4e10742634d05623691ec8e66de926a135ae2b9f --- /dev/null +++ b/HGNN/hgnn.py @@ -0,0 +1,70 @@ +from Ghypeddings.HGNN.task import * +from Ghypeddings.HGNN.utils import * +from Ghypeddings.HGNN.manifold import * +from Ghypeddings.HGNN.gnn import RiemannianGNN + +class HGNN: + def __init__(self, + adj, + features, + labels, + dim, + c=None, + num_layers=2, + bias=True, + act='leaky_relu', + alpha=0.2, + select_manifold='poincare', + num_centroid=100, + eucl_vars=[], + hyp_vars=[], + grad_clip=1.0, + optimizer='sgd', + weight_decay=0.05, + lr=0.01, + lr_scheduler='cosine', + lr_gamma=0.5, + lr_hyperbolic=0.1, + hyper_optimizer='ramsgrad', + proj_init='xavier', + tie_weight=True, + epochs=50, + patience=100, + seed=42, + log_freq=1, + eval_freq=1, + val_prop=0.5, + test_prop=0.3, + double_precision=0, + dropout=0.1, + normalize_adj=False, + normalize_feats=True): + + self.args = create_args(dim,c,num_layers,bias,act,alpha,select_manifold,num_centroid,eucl_vars,hyp_vars,grad_clip,optimizer,weight_decay,lr,lr_scheduler,lr_gamma,lr_hyperbolic,hyper_optimizer,proj_init,tie_weight,epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats) + + set_seed(self.args.seed) + self.logger = create_logger() + if self.args.select_manifold == 'lorentz': + self.args.dim += 1 + if self.args.select_manifold == 'lorentz': + self.manifold= LorentzManifold(self.args, self.logger) + elif self.args.select_manifold == 'poincare': + self.manifold= PoincareManifold(self.args,self.logger) + rgnn = RiemannianGNN(self.args, self.logger, self.manifold) + self.gnn = NodeClassificationTask(self.args, self.logger, rgnn, self.manifold, adj,features,labels) + + def fit(self): + return self.gnn.run_gnn() + + def predict(self): + return self.gnn.evaluate(self.gnn.loader, 'test', self.gnn.model, self.gnn.loss_function) + + def save_embeddings(self): + labels = np.argmax(th.squeeze(self.gnn.labels).numpy(),axis=1) + #tb_embeddings_euc = self.gnn.manifold.log_map_zero(self.gnn.early_stop.best_emb) + for_classification_hyp = np.hstack((self.gnn.early_stop.best_emb.cpu().detach().numpy(),labels.reshape(-1,1))) + #for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),labels.reshape(-1,1))) + hyp_file_path = os.path.join(os.getcwd(),'hgnn_embeddings_hyp.csv') + #euc_file_path = os.path.join(os.getcwd(),'hgnn_embeddings_euc.csv') + np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',') + #np.savetxt(euc_file_path, for_classification_euc, delimiter=',') \ No newline at end of file diff --git a/HGNN/hyperbolic_module/CentroidDistance.py b/HGNN/hyperbolic_module/CentroidDistance.py new file mode 100644 index 0000000000000000000000000000000000000000..9d868cb98ea1d10da977fd7bb8b22d9f0cfb0853 --- /dev/null +++ b/HGNN/hyperbolic_module/CentroidDistance.py @@ -0,0 +1,54 @@ +import torch as th +import torch.nn as nn +import torch.nn.functional as F +from Ghypeddings.HGNN.utils import * + +class CentroidDistance(nn.Module): + """ + Implement a model that calculates the pairwise distances between node representations + and centroids + """ + def __init__(self, args, logger, manifold): + super(CentroidDistance, self).__init__() + self.args = args + self.logger = logger + self.manifold = manifold + + # centroid embedding + self.centroid_embedding = nn.Embedding( + args.num_centroid, args.dim, + sparse=False, + scale_grad_by_freq=False, + ) + self.manifold.init_embed(self.centroid_embedding) + args.hyp_vars.append(self.centroid_embedding) + + def forward(self, node_repr, mask): + """ + Args: + node_repr: [node_num, embed_size] + mask: [node_num, 1] 1 denote real node, 0 padded node + return: + graph_centroid_dist: [1, num_centroid] + node_centroid_dist: [1, node_num, num_centroid] + """ + node_num = node_repr.size(0) + + # broadcast and reshape node_repr to [node_num * num_centroid, embed_size] + node_repr = node_repr.unsqueeze(1).expand( + -1, + self.args.num_centroid, + -1).contiguous().view(-1, self.args.dim) + + # broadcast and reshape centroid embeddings to [node_num * num_centroid, embed_size] + centroid_repr = self.centroid_embedding(th.arange(self.args.num_centroid).cuda()) + centroid_repr = centroid_repr.unsqueeze(0).expand( + node_num, + -1, + -1).contiguous().view(-1, self.args.dim) + # get distance + node_centroid_dist = self.manifold.distance(node_repr, centroid_repr) + node_centroid_dist = node_centroid_dist.view(1, node_num, self.args.num_centroid) * mask + # average pooling over nodes + graph_centroid_dist = th.sum(node_centroid_dist, dim=1) / th.sum(mask) + return graph_centroid_dist, node_centroid_dist diff --git a/HGNN/hyperbolic_module/PoincareDistance.py b/HGNN/hyperbolic_module/PoincareDistance.py new file mode 100644 index 0000000000000000000000000000000000000000..4bc423409e286c13382d0a76bd97931f1c840a54 --- /dev/null +++ b/HGNN/hyperbolic_module/PoincareDistance.py @@ -0,0 +1,38 @@ +import torch as th +from torch.autograd import Function +import torch.nn as nn +import numpy as np +from torch.autograd import Function, Variable + +class PoincareDistance(Function): + @staticmethod + def grad(x, v, sqnormx, sqnormv, sqdist, eps): + alpha = (1 - sqnormx) + beta = (1 - sqnormv) + z = 1 + 2 * sqdist / (alpha * beta) + a = ((sqnormv - 2 * th.sum(x * v, dim=-1) + 1) / th.pow(alpha, 2))\ + .unsqueeze(-1).expand_as(x) + a = a * x - v / alpha.unsqueeze(-1).expand_as(v) + z = th.sqrt(th.pow(z, 2) - 1) + z = th.clamp(z * beta, min=eps).unsqueeze(-1) + return 4 * a / z.expand_as(x) + + @staticmethod + def forward(ctx, u, v, eps): + squnorm = th.clamp(th.sum(u * u, dim=-1), 0, 1 - eps) + sqvnorm = th.clamp(th.sum(v * v, dim=-1), 0, 1 - eps) + sqdist = th.sum(th.pow(u - v, 2), dim=-1) + ctx.eps = eps + ctx.save_for_backward(u, v, squnorm, sqvnorm, sqdist) + x = sqdist / ((1 - squnorm) * (1 - sqvnorm)) * 2 + 1 + # arcosh + z = th.sqrt(th.pow(x, 2) - 1) + return th.log(x + z) + + @staticmethod + def backward(ctx, g): + u, v, squnorm, sqvnorm, sqdist = ctx.saved_tensors + g = g.unsqueeze(-1) + gu = PoincareDistance.grad(u, v, squnorm, sqvnorm, sqdist, ctx.eps) + gv = PoincareDistance.grad(v, u, sqvnorm, squnorm, sqdist, ctx.eps) + return g.expand_as(gu) * gu, g.expand_as(gv) * gv, None diff --git a/HGNN/hyperbolic_module/__init__.py b/HGNN/hyperbolic_module/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGNN/manifold/LorentzManifold.py b/HGNN/manifold/LorentzManifold.py new file mode 100644 index 0000000000000000000000000000000000000000..c6e9bbcc36dc0769a486fa887b472a07f1ab1492 --- /dev/null +++ b/HGNN/manifold/LorentzManifold.py @@ -0,0 +1,165 @@ +import torch as th +import torch.nn as nn +import numpy as np +from torch.autograd import Function, Variable +from Ghypeddings.HGNN.utils import * + +_eps = 1e-10 + +class LorentzManifold: + + def __init__(self, args, logger, eps=1e-3, norm_clip=1, max_norm=1e3): + self.args = args + self.logger = logger + self.eps = eps + self.norm_clip = norm_clip + self.max_norm = max_norm + + @staticmethod + def ldot(u, v, keepdim=False): + """ + Lorentzian Scalar Product + Args: + u: [batch_size, d + 1] + v: [batch_size, d + 1] + Return: + keepdim: False [batch_size] + keepdim: True [batch_size, 1] + """ + d = u.size(1) - 1 + uv = u * v + uv = th.cat((-uv.narrow(1, 0, 1), uv.narrow(1, 1, d)), dim=1) + return th.sum(uv, dim=1, keepdim=keepdim) + + def from_lorentz_to_poincare(self, x): + """ + Args: + u: [batch_size, d + 1] + """ + d = x.size(-1) - 1 + return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1) + + def from_poincare_to_lorentz(self, x): + """ + Args: + u: [batch_size, d] + """ + x_norm_square = th_dot(x, x) + return th.cat((1 + x_norm_square, 2 * x), dim=1) / (1 - x_norm_square + self.eps) + + def distance(self, u, v): + d = -LorentzDot.apply(u, v) + return Acosh.apply(d, self.eps) + + def normalize(self, w): + """ + Normalize vector such that it is located on the hyperboloid + Args: + w: [batch_size, d + 1] + """ + d = w.size(-1) - 1 + narrowed = w.narrow(-1, 1, d) + if self.max_norm: + narrowed = th.renorm(narrowed.view(-1, d), 2, 0, self.max_norm) + first = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True) + first = th.sqrt(first) + return th.cat((first, narrowed), dim=1) + + def init_embed(self, embed, irange=1e-2): + embed.weight.data.uniform_(-irange, irange) + embed.weight.data.copy_(self.normalize(embed.weight.data)) + + def rgrad(self, p, d_p): + """Riemannian gradient for hyperboloid""" + u = d_p + x = p + u.narrow(-1, 0, 1).mul_(-1) + u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x) + return d_p + + def exp_map_zero(self, v): + zeros = th.zeros_like(v) + zeros[:, 0] = 1 + return self.exp_map_x(zeros, v) + + def exp_map_x(self, p, d_p, d_p_normalize=True, p_normalize=True): + if d_p_normalize: + d_p = self.normalize_tan(p, d_p) + + ldv = self.ldot(d_p, d_p, keepdim=True) + nd_p = th.sqrt(th.clamp(ldv + self.eps, _eps)) + + t = th.clamp(nd_p, max=self.norm_clip) + newp = (th.cosh(t) * p) + (th.sinh(t) * d_p / nd_p) + + if p_normalize: + newp = self.normalize(newp) + return newp + + def normalize_tan(self, x_all, v_all): + d = v_all.size(1) - 1 + x = x_all.narrow(1, 1, d) + xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True) + tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True) + tmp = th.sqrt(tmp) + return th.cat((xv / tmp, v_all.narrow(1, 1, d)), dim=1) + + def log_map_zero(self, y, i=-1): + zeros = th.zeros_like(y) + zeros[:, 0] = 1 + return self.log_map_x(zeros, y) + + def log_map_x(self, x, y, normalize=False): + """Logarithmic map on the Lorentz Manifold""" + xy = self.ldot(x, y).unsqueeze(-1) + tmp = th.sqrt(th.clamp(xy * xy - 1 + self.eps, _eps)) + v = Acosh.apply(-xy, self.eps) / ( + tmp + ) * th.addcmul(y, xy, x) + if normalize: + result = self.normalize_tan(x, v) + else: + result = v + return result + + def parallel_transport(self, x, y, v): + """Parallel transport for hyperboloid""" + v_ = v + x_ = x + y_ = y + + xy = self.ldot(x_, y_, keepdim=True).expand_as(x_) + vy = self.ldot(v_, y_, keepdim=True).expand_as(x_) + vnew = v_ + vy / (1 - xy) * (x_ + y_) + return vnew + + def metric_tensor(self, x, u, v): + return self.ldot(u, v, keepdim=True) + +class LorentzDot(Function): + @staticmethod + def forward(ctx, u, v): + ctx.save_for_backward(u, v) + return LorentzManifold.ldot(u, v) + + @staticmethod + def backward(ctx, g): + u, v = ctx.saved_tensors + g = g.unsqueeze(-1).expand_as(u).clone() + g.narrow(-1, 0, 1).mul_(-1) + return g * v, g * u + +class Acosh(Function): + @staticmethod + def forward(ctx, x, eps): + z = th.sqrt(th.clamp(x * x - 1 + eps, _eps)) + ctx.save_for_backward(z) + ctx.eps = eps + return th.log(x + z) + + @staticmethod + def backward(ctx, g): + z, = ctx.saved_tensors + z = th.clamp(z, min=ctx.eps) + z = g / z + return z, None diff --git a/HGNN/manifold/PoincareManifold.py b/HGNN/manifold/PoincareManifold.py new file mode 100644 index 0000000000000000000000000000000000000000..0a3c97c31eb609a62ffb675fcad4e865ef048fe1 --- /dev/null +++ b/HGNN/manifold/PoincareManifold.py @@ -0,0 +1,112 @@ +import torch as th +import torch.nn as nn +import numpy as np +from torch.autograd import Function, Variable +from Ghypeddings.HGNN.hyperbolic_module.PoincareDistance import PoincareDistance +from Ghypeddings.HGNN.utils import * + +class PoincareManifold: + + def __init__(self, args, logger, EPS=1e-5, PROJ_EPS=1e-5): + self.args = args + self.logger = logger + self.EPS = EPS + self.PROJ_EPS = PROJ_EPS + self.tanh = nn.Tanh() + + def normalize(self, x): + return clip_by_norm(x, (1. - self.PROJ_EPS)) + + def init_embed(self, embed, irange=1e-2): + embed.weight.data.uniform_(-irange, irange) + embed.weight.data.copy_(self.normalize(embed.weight.data)) + + def mob_add(self, u, v): + """ + Add two vectors in hyperbolic space + """ + v = v + self.EPS + th_dot_u_v = 2. * th_dot(u, v) + th_norm_u_sq = th_dot(u, u) + th_norm_v_sq = th_dot(v, v) + denominator = 1. + th_dot_u_v + th_norm_v_sq * th_norm_u_sq + result = (1. + th_dot_u_v + th_norm_v_sq) / (denominator + self.EPS) * u + \ + (1. - th_norm_u_sq) / (denominator + self.EPS) * v + return self.normalize(result) + + def distance(self, u, v): + return PoincareDistance.apply(u, v, 1e-5) + + def lambda_x(self, x): + """ + A conformal factor + """ + return 2. / (1 - th_dot(x, x)) + + def log_map_zero(self, y): + diff = y + self.EPS + norm_diff = th_norm(diff) + return 1. / th_atanh(norm_diff, self.EPS) / norm_diff * diff + + def log_map_x(self, x, y): + diff = self.mob_add(-x, y) + self.EPS + norm_diff = th_norm(diff) + lam = self.lambda_x(x) + return (( 2. / lam) * th_atanh(norm_diff, self.EPS) / norm_diff) * diff + + def metric_tensor(self, x, u, v): + """ + The metric tensor in hyperbolic space. + In-place operations for saving memory. (do not use this function in forward calls) + """ + u_dot_v = th_dot(u, v) + lambda_x = self.lambda_x(x) + lambda_x *= lambda_x + lambda_x *= u_dot_v + return lambda_x + + def exp_map_zero(self, v): + """ + Exp map from tangent space of zero to hyperbolic space + Args: + v: [batch_size, *] in tangent space + """ + v = v + self.EPS + norm_v = th_norm(v) # [batch_size, 1] + result = self.tanh(norm_v) / (norm_v) * v + return self.normalize(result) + + def exp_map_x(self, x, v): + """ + Exp map from tangent space of x to hyperbolic space + """ + v = v + self.EPS # Perturbe v to avoid dealing with v = 0 + norm_v = th_norm(v) + second_term = (self.tanh(self.lambda_x(x) * norm_v / 2) / norm_v) * v + return self.normalize(self.mob_add(x, second_term)) + + def gyr(self, u, v, w): + u_norm = th_dot(u, u) + v_norm = th_dot(v, v) + u_dot_w = th_dot(u, w) + v_dot_w = th_dot(v, w) + u_dot_v = th_dot(u, v) + A = - u_dot_w * v_norm + v_dot_w + 2 * u_dot_v * v_dot_w + B = - v_dot_w * u_norm - u_dot_w + D = 1 + 2 * u_dot_v + u_norm * v_norm + return w + 2 * (A * u + B * v) / (D + self.EPS) + + def parallel_transport(self, src, dst, v): + return self.lambda_x(src) / th.clamp(self.lambda_x(dst), min=self.EPS) * self.gyr(dst, -src, v) + + def rgrad(self, p, d_p): + """ + Function to compute Riemannian gradient from the + Euclidean gradient in the Poincare ball. + Args: + p (Tensor): Current point in the ball + d_p (Tensor): Euclidean gradient at p + """ + p_sqnorm = th.sum(p.data ** 2, dim=-1, keepdim=True) + d_p = d_p * ((1 - p_sqnorm) ** 2 / 4.0).expand_as(d_p) + return d_p diff --git a/HGNN/manifold/__init__.py b/HGNN/manifold/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ada909b5c9f1f0e0467f0b7b368874b627cf3751 --- /dev/null +++ b/HGNN/manifold/__init__.py @@ -0,0 +1,2 @@ +from Ghypeddings.HGNN.manifold.PoincareManifold import * +from Ghypeddings.HGNN.manifold.LorentzManifold import * diff --git a/HGNN/optimizer/__init__.py b/HGNN/optimizer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HGNN/optimizer/ramsgrad.py b/HGNN/optimizer/ramsgrad.py new file mode 100644 index 0000000000000000000000000000000000000000..c51d3d7cae72d995edf555dd36e2535770e14708 --- /dev/null +++ b/HGNN/optimizer/ramsgrad.py @@ -0,0 +1,74 @@ +""" +Implement a AMSGrad: https://openreview.net/pdf?id=r1eiqi09K7 +""" +import torch as th +from torch.optim.optimizer import Optimizer, required +import os +import math +import numpy as np + +class RiemannianAMSGrad(Optimizer): + """ + Riemannian AMS gradient descent. + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float): learning rate + """ + + def __init__(self, args, manifold,params, lr, betas=(0.9, 0.99), eps=1e-8): + self.args = args + self.manifold = manifold + defaults = dict(lr=lr, betas=betas, eps=eps) + super(RiemannianAMSGrad, self).__init__(params, defaults) + + def step(self, lr=None): + """Performs a single optimization step. + Arguments: + lr (float, optional): learning rate for the current update. + """ + loss = None + with th.no_grad(): + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + grad = self.manifold.rgrad(p, grad) + if lr is None: + lr = group['lr'] + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + state['tau'] = th.zeros_like(p.data) + # Exponential moving average of gradient values + state['exp_avg'] = th.zeros_like(p.data) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = th.zeros_like(p.data) + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = th.zeros_like(p.data) + + exp_avg, exp_avg_sq, tau, max_exp_avg_sq = \ + state['exp_avg'], state['exp_avg_sq'], state['tau'], state['max_exp_avg_sq'] + + beta1, beta2 = group['betas'] + + state['step'] += 1 + + # Decay the first and second moment running average coefficient + exp_avg.data = beta1 * tau + (1 - beta1) * grad + exp_avg_sq.mul_(beta2).add_(1 - beta2, self.manifold.metric_tensor(p, grad, grad)) + th.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = max_exp_avg_sq.sqrt().clamp_(min=group['eps']) + + step_size = group['lr'] + + p_original = p.clone() + before_proj = self.manifold.exp_map_x(p, (-step_size * exp_avg).div_(denom)) + p.data = self.manifold.normalize(before_proj) + tau.data = self.manifold.parallel_transport(p_original, p, exp_avg) + return loss diff --git a/HGNN/optimizer/rsgd.py b/HGNN/optimizer/rsgd.py new file mode 100644 index 0000000000000000000000000000000000000000..14da1fe8e2f72ae731947ea4ffab607626865c6b --- /dev/null +++ b/HGNN/optimizer/rsgd.py @@ -0,0 +1,43 @@ +import torch as th +from torch.optim.optimizer import Optimizer, required +from Ghypeddings.HGNN.utils import * +import os +import math + +class RiemannianSGD(Optimizer): + """Riemannian stochastic gradient descent. + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + rgrad (Function): Function to compute the Riemannian gradient from + an Euclidean gradient + retraction (Function): Function to update the parameters via a + retraction of the Riemannian gradient + lr (float): learning rate + """ + + def __init__(self, args, params, lr): + defaults = dict(lr=lr) + self.args = args + super(RiemannianSGD, self).__init__(params, defaults) + + def step(self, lr=None): + """ + Performs a single optimization step. + Arguments: + lr (float, optional): learning rate for the current update. + """ + loss = None + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + d_p = p.grad.data + d_p = self.args.manifold.rgrad(p, d_p) + if lr is None: + lr = group['lr'] + p.data = self.args.manifold.normalize( + self.args.manifold.exp_map_x(p, -lr * d_p) + ) + return loss diff --git a/HGNN/task/BaseTask.py b/HGNN/task/BaseTask.py new file mode 100644 index 0000000000000000000000000000000000000000..2486800e402e2e1bc8a5b44559f5c259b53b605c --- /dev/null +++ b/HGNN/task/BaseTask.py @@ -0,0 +1,43 @@ +import numpy as np +from Ghypeddings.HGNN.utils import * +import torch as th +import torch.nn as nn +from torch.utils.data import Dataset, DataLoader +import torch.optim as optim +import torch.distributed as dist +from torch.utils.data.distributed import DistributedSampler + +class BaseTask(object): + """ + A base class that supports loading datasets, early stop and reporting statistics + """ + def __init__(self, args, logger, criterion='max'): + """ + criterion: min/max + """ + self.args = args + self.logger = logger + self.early_stop = EarlyStoppingCriterion(self.args.patience, criterion) + + def reset_epoch_stats(self, epoch, prefix): + """ + prefix: train/dev/test + """ + self.epoch_stats = { + 'prefix': prefix, + 'epoch': epoch, + 'loss': 0, + 'num_correct': 0, + 'num_total': 0, + } + + def update_epoch_stats(self, loss, score, label, is_regression=False): + with th.no_grad(): + self.epoch_stats['loss'] += loss.item() + self.epoch_stats['num_total'] += label.size(0) + if not is_regression: + self.epoch_stats['num_correct'] += th.sum(th.eq(th.argmax(score, dim=1), label)).item() + + def report_best(self): + self.logger.info("best val %.6f" + % (self.early_stop.best_dev_score)) diff --git a/HGNN/task/NodeClassification.py b/HGNN/task/NodeClassification.py new file mode 100644 index 0000000000000000000000000000000000000000..fd2ed241331ad6e1971dd5fdf441a6c6034e7f21 --- /dev/null +++ b/HGNN/task/NodeClassification.py @@ -0,0 +1,50 @@ +import torch as th +import torch.nn as nn +import torch.nn.functional as F +from Ghypeddings.HGNN.utils import * +from Ghypeddings.HGNN.hyperbolic_module.CentroidDistance import CentroidDistance + +class NodeClassification(nn.Module): + + def __init__(self, args, logger, rgnn, manifold): + super(NodeClassification, self).__init__() + self.args = args + self.logger = logger + self.manifold = manifold + self.c = nn.Parameter(th.Tensor([1.])) + + self.feature_linear = nn.Linear(self.args.input_dim, + self.args.dim + ) + nn_init(self.feature_linear, self.args.proj_init) + self.args.eucl_vars.append(self.feature_linear) + + self.distance = CentroidDistance(args, logger, manifold) + + self.rgnn = rgnn + self.output_linear = nn.Linear(self.args.num_centroid, + self.args.num_class + ) + nn_init(self.output_linear, self.args.proj_init) + self.args.eucl_vars.append(self.output_linear) + + self.log_softmax = nn.LogSoftmax(dim=1) + self.activation = get_activation(self.args) + + def forward(self, adj, weight, features): + """ + Args: + adj: the neighbor ids of each node [1, node_num, max_neighbor] + weight: the weight of each neighbor [1, node_num, max_neighbor] + features: [1, node_num, input_dim] + """ + assert adj.size(0) == 1 + adj, weight, features = adj.squeeze(0), weight.squeeze(0), features.squeeze(0) + node_repr = self.activation(self.feature_linear(features)) + assert th.isnan(node_repr).any().item() == False + mask = th.ones((self.args.node_num, 1)).cuda() # [node_num, 1] + node_repr = self.rgnn(node_repr, adj, weight, mask) # [node_num, embed_size] + + _, node_centroid_sim = self.distance(node_repr, mask) # [1, node_num, num_centroid] + class_logit = self.output_linear(node_centroid_sim.squeeze()) + return self.log_softmax(class_logit) , node_repr \ No newline at end of file diff --git a/HGNN/task/NodeClassificationTask.py b/HGNN/task/NodeClassificationTask.py new file mode 100644 index 0000000000000000000000000000000000000000..cdf9fc41662155c27661e355acca7d254ef59c3e --- /dev/null +++ b/HGNN/task/NodeClassificationTask.py @@ -0,0 +1,136 @@ +import torch as th +import torch.nn as nn +import torch.nn.functional as F +from Ghypeddings.HGNN.utils import * +from torch.utils.data import DataLoader +import torch.optim as optim +from Ghypeddings.HGNN.task.BaseTask import BaseTask +import numpy as np +from Ghypeddings.HGNN.dataset.NodeClassificationDataset import NodeClassificationDataset +from Ghypeddings.HGNN.task.NodeClassification import NodeClassification +import time +from sklearn.metrics import roc_auc_score,accuracy_score,f1_score,precision_score,recall_score + +def cross_entropy(log_prob, label, mask): + label, mask = label.squeeze(), mask.squeeze() + negative_log_prob = -th.sum(label * log_prob, dim=1) + return th.sum(mask * negative_log_prob, dim=0) / th.sum(mask) + +def get_accuracy(label, log_prob, mask): + lab = label.clone() + lab = lab.squeeze() + mask_copy = mask.clone().cpu().numpy()[0].astype(np.bool_) + pred_class = th.argmax(log_prob, dim=1).cpu().numpy()[mask_copy] + real_class = th.argmax(lab, dim=1).cpu().numpy()[mask_copy] + acc= accuracy_score(y_true=real_class,y_pred=pred_class) + f1= f1_score(y_true=real_class,y_pred=pred_class) + recall= recall_score(y_true=real_class,y_pred=pred_class) + precision= precision_score(y_true=real_class,y_pred=pred_class) + print(np.sum(real_class) , np.sum(pred_class)) + roc_auc = roc_auc_score(real_class,pred_class) + return acc,f1,recall,precision,roc_auc + +class NodeClassificationTask(BaseTask): + + def __init__(self, args, logger, rgnn, manifold,adj,features,labels): + super(NodeClassificationTask, self).__init__(args, logger, criterion='max') + self.args = args + self.logger = logger + self.manifold = manifold + self.hyperbolic = True + self.rgnn = rgnn + self.loader = self.process_data(adj,features,labels) + self.model = NodeClassification(self.args, self.logger, self.rgnn, self.manifold).cuda() + self.loss_function = cross_entropy + + def forward(self, model, sample, loss_function): + scores , embeddings = model( + sample['adj'].cuda().long(), + sample['weight'].cuda().float(), + sample['features'].cuda().float(), + ) + loss = loss_function(scores, + sample['y_train'].cuda().float(), + sample['train_mask'].cuda().float()) + return scores, loss , embeddings + + def run_gnn(self): + loader = self.loader + model = self.model + loss_function = self.loss_function + + self.args.manifold = self.manifold + optimizer, lr_scheduler, hyperbolic_optimizer, hyperbolic_lr_scheduler = \ + set_up_optimizer_scheduler(self.hyperbolic, self.args, model,self.manifold) + self.labels = None + + best_losses = [] + real_losses = [] + + t_total = time.time() + for epoch in range(self.args.epochs): + model.train() + for i, sample in enumerate(loader): + model.zero_grad() + scores, loss , embeddings = self.forward(model, sample, loss_function) + loss.backward() + if self.args.grad_clip > 0.0: + th.nn.utils.clip_grad_norm_(model.parameters(), self.args.grad_clip) + optimizer.step() + if self.hyperbolic and len(self.args.hyp_vars) != 0: + hyperbolic_optimizer.step() + self.labels = sample['y_train'] + accuracy,f1,recall,precision,roc_auc = get_accuracy( + sample['y_train'].cuda().float(), + scores, + sample['train_mask'].cuda().float()) + + real_losses.append(loss.item()) + if(len(best_losses) == 0): + best_losses.append(real_losses[0]) + elif (best_losses[-1] > real_losses[-1]): + best_losses.append(real_losses[-1]) + else: + best_losses.append(best_losses[-1]) + + if (epoch + 1) % self.args.log_freq == 0: + self.logger.info("%s epoch %d: accuracy %.4f f1 %.4f recall %.4f precision %.4f roc_auc %.4f loss: %.4f \n" % ( + 'train', + epoch, + accuracy,f1,recall,precision,roc_auc,loss.item())) + + dev_loss, accuracy ,f1,recall,precision,roc_auc = self.evaluate(loader, 'val', model, loss_function) + + lr_scheduler.step() + + if self.hyperbolic and len(self.args.hyp_vars) != 0: + hyperbolic_lr_scheduler.step() + if not self.early_stop.step(dev_loss, epoch , embeddings): + break + + self.logger.info("Training Finished!") + self.logger.info("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + + return {'real':real_losses,'best':best_losses}, accuracy,f1,recall,precision,roc_auc,time.time() - t_total + + def evaluate(self, data_loader, prefix, model, loss_function): + model.eval() + with th.no_grad(): + for i, sample in enumerate(data_loader): + scores, loss , _ = self.forward(model, sample, loss_function) + if prefix == 'val': + accuracy,f1,recall,precision,roc_auc = get_accuracy( + sample['y_val'].cuda().float(), + scores, + sample['val_mask'].cuda().float()) + elif prefix == 'test': + accuracy,f1,recall,precision,roc_auc = get_accuracy( + sample['y_test'].cuda().float(), + scores, + sample['test_mask'].cuda().float()) + + return loss.item(), accuracy,f1,recall,precision,roc_auc + + def process_data(self,adj,features,labels): + dataset = NodeClassificationDataset(self.args, self.logger,adj,features,labels) + return DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) diff --git a/HGNN/task/__init__.py b/HGNN/task/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e4bd57382df4059527d0f60eeb0b175ad5c5d2f1 --- /dev/null +++ b/HGNN/task/__init__.py @@ -0,0 +1 @@ +from Ghypeddings.HGNN.task.NodeClassificationTask import * \ No newline at end of file diff --git a/HGNN/utils/EarlyStoppingCriterion.py b/HGNN/utils/EarlyStoppingCriterion.py new file mode 100644 index 0000000000000000000000000000000000000000..7e57381bd8bce016e7d9d9e0f6739e725d7dc521 --- /dev/null +++ b/HGNN/utils/EarlyStoppingCriterion.py @@ -0,0 +1,51 @@ +class EarlyStoppingCriterion(object): + """ + Arguments: + patience (int): The maximum number of epochs with no improvement before early stopping should take place + mode (str, can only be 'max' or 'min'): To take the maximum or minimum of the score for optimization + min_delta (float, optional): Minimum change in the score to qualify as an improvement (default: 0.0) + """ + + def __init__(self, patience, mode, min_delta=0.0): + assert patience >= 0 + assert mode in {'min', 'max'} + assert min_delta >= 0.0 + self.patience = patience + self.mode = mode + self.min_delta = min_delta + + self._count = 0 + self.best_dev_score = None + self.best_epoch = None + self.is_improved = None + self.best_emb = None + + def step(self, cur_dev_score, epoch , embeddings): + """ + Checks if training should be continued given the current score. + + Arguments: + cur_dev_score (float): the current development score + cur_test_score (float): the current test score + Output: + bool: if training should be continued + """ + if self.best_dev_score is None: + self.best_dev_score = cur_dev_score + self.best_epoch = epoch + self.best_emb = embeddings + return True + else: + if self.mode == 'max': + self.is_improved = (cur_dev_score > self.best_dev_score + self.min_delta) + else: + self.is_improved = (cur_dev_score < self.best_dev_score - self.min_delta) + + if self.is_improved: + self._count = 0 + self.best_dev_score = cur_dev_score + self.best_epoch = epoch + self.best_emb = embeddings + else: + self._count += 1 + return self._count <= self.patience diff --git a/HGNN/utils/__init__.py b/HGNN/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b3da1a96d729c4cfe3eb9edc1a122debcdd215aa --- /dev/null +++ b/HGNN/utils/__init__.py @@ -0,0 +1,3 @@ +from Ghypeddings.HGNN.utils.utils import * +from Ghypeddings.HGNN.utils.EarlyStoppingCriterion import EarlyStoppingCriterion +from Ghypeddings.HGNN.utils.logger import * diff --git a/HGNN/utils/logger.py b/HGNN/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..6f55e772da8da3784679cb1499dcdf6a13368759 --- /dev/null +++ b/HGNN/utils/logger.py @@ -0,0 +1,54 @@ +import logging +import time +from datetime import timedelta +from Ghypeddings.HGNN.utils import make_dir + +class LogFormatter(): + + def __init__(self): + self.start_time = time.time() + + def format(self, record): + elapsed_seconds = round(record.created - self.start_time) + + prefix = "%s - %s - %s" % ( + record.levelname, + time.strftime('%x %X'), + timedelta(seconds=elapsed_seconds) + ) + message = record.getMessage() + message = message.replace('\n', '\n' + ' ' * (len(prefix) + 3)) + return "%s - %s" % (prefix, message) + +def create_logger(): + """ + Create a logger. + """ + #make_dir('log') + # create log formatter + log_formatter = LogFormatter() + + # create file handler and set level to debug + # file_handler = logging.FileHandler(filepath, "a") + # file_handler.setLevel(logging.DEBUG) + # file_handler.setFormatter(log_formatter) + + # create console handler and set level to info + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_handler.setFormatter(log_formatter) + + # create logger and set level to debug + logger = logging.getLogger() + logger.handlers = [] + logger.setLevel(logging.DEBUG) + logger.propagate = False + #logger.addHandler(file_handler) + logger.addHandler(console_handler) + + # reset logger elapsed time + def reset_time(): + log_formatter.start_time = time.time() + logger.reset_time = reset_time + + return logger diff --git a/HGNN/utils/utils.py b/HGNN/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cae6a571d4b2491d1b025926a4b47d2b2fb481e6 --- /dev/null +++ b/HGNN/utils/utils.py @@ -0,0 +1,284 @@ +from collections import defaultdict +import os +import pickle +import json +import torch.nn as nn +import torch as th +import torch.optim as optim +import numpy as np +import random +from Ghypeddings.HGNN.optimizer.ramsgrad import RiemannianAMSGrad +from Ghypeddings.HGNN.optimizer.rsgd import RiemannianSGD +import math +import subprocess +import argparse + +def str2bool(v): + return v.lower() == "true" + +def make_dir(path): + if not os.path.exists(path): + try: + os.mkdir(path) + except: + pass + +def pickle_dump(file_name, content): + with open(file_name, 'wb') as out_file: + pickle.dump(content, out_file, pickle.HIGHEST_PROTOCOL) + +def pickle_load(file_name): + with open(file_name, 'rb') as f: + return pickle.load(f) + +def init_weight(weight, method): + """ + Initialize parameters + Args: + weight: a Parameter object + method: initialization method + """ + if method == 'orthogonal': + nn.init.orthogonal_(weight) + elif method == 'xavier': + nn.init.xavier_uniform_(weight) + elif method == 'kaiming': + nn.init.kaiming_uniform_(weight) + elif method == 'none': + pass + else: + raise Exception('Unknown init method') + + +def nn_init(nn_module, method='orthogonal'): + """ + Initialize a Sequential or Module object + Args: + nn_module: Sequential or Module + method: initialization method + """ + if method == 'none': + return + for param_name, _ in nn_module.named_parameters(): + if isinstance(nn_module, nn.Sequential): + # for a Sequential object, the param_name contains both id and param name + i, name = param_name.split('.', 1) + param = getattr(nn_module[int(i)], name) + else: + param = getattr(nn_module, param_name) + if param_name.find('weight') > -1: + init_weight(param, method) + elif param_name.find('bias') > -1: + nn.init.uniform_(param, -1e-4, 1e-4) + +class NoneScheduler: + def step(self): + pass + +def get_lr_scheduler(args, optimizer): + if args.lr_scheduler == 'exponential': + return optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_gamma) + elif args.lr_scheduler == 'cosine': + return optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=0) + elif args.lr_scheduler == 'cycle': + return optim.lr_scheduler.CyclicLR(optimizer, 0, max_lr=args.lr, step_size_up=20, cycle_momentum=False) + elif args.lr_scheduler == 'none': + return NoneScheduler() + +def get_optimizer(args, params): + if args.optimizer == 'sgd': + optimizer = optim.SGD(params, lr=args.lr, weight_decay=args.weight_decay) + elif args.optimizer == 'adam': + optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) + elif args.optimizer == 'amsgrad': + optimizer = optim.Adam(params, lr=args.lr, amsgrad=True, weight_decay=args.weight_decay) + return optimizer + +def get_hyperbolic_optimizer(args, manifold,params): + if args.hyper_optimizer == 'rsgd': + optimizer = RiemannianSGD( + args, + params, + lr=args.lr_hyperbolic, + ) + elif args.hyper_optimizer == 'ramsgrad': + optimizer = RiemannianAMSGrad( + args, + manifold, + params, + lr=args.lr_hyperbolic, + ) + else: + print("unsupported hyper optimizer") + exit(1) + return optimizer + +def set_seed(seed): + """ + Set the random seed + """ + random.seed(seed) + np.random.seed(seed) + th.manual_seed(seed) + th.cuda.manual_seed(seed) + th.cuda.manual_seed_all(seed) + +def pad_sequence(data_list, maxlen, value=0): + return [row + [value] * (maxlen - len(row)) for row in data_list] + +def normalize_weight(adj_mat, weight): + degree = [1 / math.sqrt(sum(np.abs(w))) for w in weight] + for dst in range(len(adj_mat)): + for src_idx in range(len(adj_mat[dst])): + src = adj_mat[dst][src_idx] + weight[dst][src_idx] = degree[dst] * weight[dst][src_idx] * degree[src] + +def set_up_distributed_training_multi_gpu(args): + #args.device_id = args.local_rank + args.device_id = 0 + th.cuda.set_device(args.device_id) + args.distributed_rank = args.device_id + th.distributed.init_process_group(backend='nccl', + init_method='env://') + +def save_model_weights(args, model, path): + """ + save model weights out to file + """ + if args.distributed_rank == 0: + make_dir(path) + th.save(model.state_dict(), os.path.join(path, args.name)) + +def load_model_weights(model, path): + """ + load saved weights + """ + model.load_state_dict(th.load(path)) + +def th_atanh(x, EPS): + values = th.min(x, th.Tensor([1.0 - EPS]).cuda()) + return 0.5 * (th.log(1 + values + EPS) - th.log(1 - values + EPS)) + +def th_norm(x, dim=1): + """ + Args + x: [batch size, dim] + Output: + [batch size, 1] + """ + if(len(x.shape) == 1): + x = x.unsqueeze(0) + return th.norm(x, 2, dim, keepdim=True) + +def th_dot(x, y, keepdim=True): + tmp = x*y + if(len(tmp.shape) == 1): + tmp = tmp.unsqueeze(0) + return th.sum(tmp, dim=1, keepdim=keepdim) + +def clip_by_norm(x, clip_norm): + return th.renorm(x, 2, 0, clip_norm) + +def get_params(params_list, vars_list): + """ + Add parameters in vars_list to param_list + """ + for i in vars_list: + if issubclass(i.__class__, nn.Module): + params_list.extend(list(i.parameters())) + elif issubclass(i.__class__, nn.Parameter): + params_list.append(i) + else: + print("Encounter unknown objects") + exit(1) + +def categorize_params(args): + """ + Categorize parameters into hyperbolic ones and euclidean ones + """ + hyperbolic_params, euclidean_params = [], [] + get_params(euclidean_params, args.eucl_vars) + get_params(hyperbolic_params, args.hyp_vars) + return hyperbolic_params, euclidean_params + +def get_activation(args): + if args.act == 'leaky_relu': + return nn.LeakyReLU(args.alpha) + elif args.act == 'rrelu': + return nn.RReLU() + elif args.act == 'relu': + return nn.ReLU() + elif args.act == 'elu': + return nn.ELU() + elif args.act == 'prelu': + return nn.PReLU() + elif args.act == 'selu': + return nn.SELU() + +def set_up_optimizer_scheduler(hyperbolic, args, model , manifold): + if hyperbolic: + hyperbolic_params, euclidean_params = categorize_params(args) + #assert(len(list(model.parameters())) == len(hyperbolic_params) + len(euclidean_params)) + optimizer = get_optimizer(args, euclidean_params) + lr_scheduler = get_lr_scheduler(args, optimizer) + if len(hyperbolic_params) > 0: + hyperbolic_optimizer = get_hyperbolic_optimizer(args,manifold, hyperbolic_params) + hyperbolic_lr_scheduler = get_lr_scheduler(args, hyperbolic_optimizer) + else: + hyperbolic_optimizer, hyperbolic_lr_scheduler = None, None + return optimizer, lr_scheduler, hyperbolic_optimizer, hyperbolic_lr_scheduler + else: + optimizer = get_optimizer(args, model.parameters()) + lr_scheduler = get_lr_scheduler(args, optimizer) + return optimizer, lr_scheduler, None, None + +# reimplement clamp functions to avoid killing gradient during backpropagation +def clamp_max(x, max_value): + t = th.clamp(max_value - x.detach(), max=0) + return x + t + +def clamp_min(x, min_value): + t = th.clamp(min_value - x.detach(), min=0) + return x + t + +def one_hot_vec(length, pos): + vec = [0] * length + vec[pos] = 1 + return vec + + +def create_args(*args): + parser = argparse.ArgumentParser() + parser.add_argument('--dim', type=int, default=args[0]) + parser.add_argument('--c', type=int, default=args[1]) + parser.add_argument('--num_layers', type=int, default=args[2]) + parser.add_argument('--bias', type=bool, default=args[3]) + parser.add_argument('--act', type=str, default=args[4]) + parser.add_argument('--alpha', type=float, default=args[5]) + parser.add_argument('--select_manifold', type=str, default=args[6]) + parser.add_argument('--num_centroid', type=int, default=args[7]) + parser.add_argument('--eucl_vars', nargs='+', default=args[8]) + parser.add_argument('--hyp_vars', nargs='+', default=args[9]) + parser.add_argument('--grad_clip', type=float, default=args[10]) + parser.add_argument('--optimizer', type=str, default=args[11]) + parser.add_argument('--weight_decay', type=float, default=args[12]) + parser.add_argument('--lr', type=float, default=args[13]) + parser.add_argument('--lr_scheduler', type=str, default=args[14]) + parser.add_argument('--lr_gamma', type=float, default=args[15]) + parser.add_argument('--lr_hyperbolic', type=float, default=args[16]) + parser.add_argument('--hyper_optimizer', type=str, default=args[17]) + parser.add_argument('--proj_init', type=str, default=args[18]) + parser.add_argument('--tie_weight', type=bool, default=args[19]) + parser.add_argument('--epochs', type=int, default=args[20]) + parser.add_argument('--patience', type=int, default=args[21]) + parser.add_argument('--seed', type=int, default=args[22]) + parser.add_argument('--log_freq', type=int, default=args[23]) + parser.add_argument('--eval_freq', type=int, default=args[24]) + parser.add_argument('--val_prop', type=float, default=args[25]) + parser.add_argument('--test_prop', type=float, default=args[26]) + parser.add_argument('--double_precision', type=int, default=args[27]) + parser.add_argument('--dropout', type=float, default=args[28]) + parser.add_argument('--normalize_adj', type=bool, default=args[29]) + parser.add_argument('--normalize_feats', type=bool, default=args[30]) + flags, unknown = parser.parse_known_args() + return flags \ No newline at end of file diff --git a/PVAE/__init__.py b/PVAE/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/PVAE/distributions/__init__.py b/PVAE/distributions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..360ae4e2832bd8984779dd74f26bce9f9df9266a --- /dev/null +++ b/PVAE/distributions/__init__.py @@ -0,0 +1,4 @@ +from Ghypeddings.PVAE.distributions.riemannian_normal import RiemannianNormal +from Ghypeddings.PVAE.distributions.hyperbolic_radius import HyperbolicRadius +from Ghypeddings.PVAE.distributions.wrapped_normal import WrappedNormal +from Ghypeddings.PVAE.distributions.hyperspherical_uniform import HypersphericalUniform diff --git a/PVAE/distributions/ars.py b/PVAE/distributions/ars.py new file mode 100644 index 0000000000000000000000000000000000000000..cdd7e7253c2aaf7590f1ee4368f41de55430eaac --- /dev/null +++ b/PVAE/distributions/ars.py @@ -0,0 +1,135 @@ +import torch + +infty = torch.tensor(float('Inf')) + +def diff(x): + return x[:, 1:] - x[:, :-1] + +class ARS(): + ''' + This class implements the Adaptive Rejection Sampling technique of Gilks and Wild '92. + Where possible, naming convention has been borrowed from this paper. + The PDF must be log-concave. + Currently does not exploit lower hull described in paper- which is fine for drawing + only small amount of samples at a time. + ''' + + def __init__(self, logpdf, grad_logpdf, device, xi, lb=-infty, ub=infty, use_lower=False, ns=50, **fargs): + ''' + initialize the upper (and if needed lower) hulls with the specified params + + Parameters + ========== + f: function that computes log(f(u,...)), for given u, where f(u) is proportional to the + density we want to sample from + fprima: d/du log(f(u,...)) + xi: ordered vector of starting points in wich log(f(u,...) is defined + to initialize the hulls + use_lower: True means the lower sqeezing will be used; which is more efficient + for drawing large numbers of samples + + + lb: lower bound of the domain + ub: upper bound of the domain + ns: maximum number of points defining the hulls + fargs: arguments for f and fprima + ''' + self.device = device + + self.lb = lb + self.ub = ub + + self.logpdf = logpdf + self.grad_logpdf = grad_logpdf + self.fargs = fargs + + #set limit on how many points to maintain on hull + self.ns = ns + self.xi = xi.to(self.device) # initialize x, the vector of absicassae at which the function h has been evaluated + self.B, self.K = self.xi.size() # hull size + self.h = torch.zeros(self.B, ns).to(self.device) + self.hprime = torch.zeros(self.B, ns).to(self.device) + self.x = torch.zeros(self.B, ns).to(self.device) + self.h[:, :self.K] = self.logpdf(self.xi, **self.fargs) + self.hprime[:, :self.K] = self.grad_logpdf(self.xi, **self.fargs) + self.x[:, :self.K] = self.xi + # Avoid under/overflow errors. the envelope and pdf are only + # proportional to the true pdf, so can choose any constant of proportionality. + self.offset = self.h.max(-1)[0].view(-1, 1) + self.h = self.h - self.offset + + # Derivative at first point in xi must be > 0 + # Derivative at last point in xi must be < 0 + if not (self.hprime[:, 0] > 0).all(): raise IOError('initial anchor points must span mode of PDF (left)') + if not (self.hprime[:, self.K-1] < 0).all(): raise IOError('initial anchor points must span mode of PDF (right)') + self.insert() + + + def sample(self, shape=torch.Size()): + ''' + Draw N samples and update upper and lower hulls accordingly + ''' + shape = shape if isinstance(shape, torch.Size) else torch.Size([shape]) + samples = torch.ones(self.B, *shape).to(self.device) + bool_mask = (torch.ones(self.B, *shape) == 1).to(self.device) + count = 0 + while bool_mask.sum() != 0: + count += 1 + xt, i = self.sampleUpper(shape) + ht = self.logpdf(xt, **self.fargs) + # hprimet = self.grad_logpdf(xt, **self.fargs) + ht = ht - self.offset + ut = self.h.gather(1, i) + (xt - self.x.gather(1, i)) * self.hprime.gather(1, i) + + # Accept sample? + u = torch.rand(shape).to(self.device) + accept = u < torch.exp(ht - ut) + reject = ~accept + samples[bool_mask * accept] = xt[bool_mask * accept] + bool_mask[bool_mask * accept] = reject[bool_mask * accept] + # Update hull with new function evaluations + # if self.K < self.ns: + # nb_insert = self.ns - self.K + # self.insert(nb_insert, xt[:, :nb_insert], ht[:, :nb_insert], hprimet[:, :nb_insert]) + + return samples.t().unsqueeze(-1) + + + def insert(self, nbnew=0, xnew=None, hnew=None, hprimenew=None): + ''' + Update hulls with new point(s) if none given, just recalculate hull from existing x,h,hprime + # ''' + # if xnew is not None: + # self.x[:, self.K:self.K+nbnew] = xnew + # self.x, idx = self.x.sort() + # self.h[:, self.K:self.K+nbnew] = hnew + # self.h = self.h.gather(1, idx) + # self.hprime[:, self.K:self.K+nbnew] = hprimenew + # self.hprime = self.hprime.gather(1, idx) + + # self.K += xnew.size(-1) + + self.z = torch.zeros(self.B, self.K + 1).to(self.device) + self.z[:, 0] = self.lb; self.z[:, self.K] = self.ub + self.z[:, 1:self.K] = (diff(self.h[:, :self.K]) - diff(self.x[:, :self.K] * self.hprime[:, :self.K])) / -diff(self.hprime[:, :self.K]) + idx = [0]+list(range(self.K)) + self.u = self.h[:, idx] + self.hprime[:, idx] * (self.z-self.x[:, idx]) + + self.s = diff(torch.exp(self.u)) / self.hprime[:, :self.K] + self.s[self.hprime[:, :self.K] == 0.] = 0. # should be 0 when gradient is 0 + self.cs = torch.cat((torch.zeros(self.B, 1).to(self.device), torch.cumsum(self.s, dim=-1)), dim=-1) + self.cu = self.cs[:, -1] + + def sampleUpper(self, shape=torch.Size()): + ''' + Return a single value randomly sampled from the upper hull and index of segment + ''' + + u = torch.rand(self.B, *shape).to(self.device) + i = (self.cs/self.cu.unsqueeze(-1)).unsqueeze(-1) <= u.unsqueeze(1).expand(*self.cs.shape, *shape) + idx = i.sum(1) - 1 + + xt = self.x.gather(1, idx) + (-self.h.gather(1, idx) + torch.log(self.hprime.gather(1, idx)*(self.cu.unsqueeze(-1)*u - self.cs.gather(1, idx)) + + torch.exp(self.u.gather(1, idx)))) / self.hprime.gather(1, idx) + + return xt, idx diff --git a/PVAE/distributions/hyperbolic_radius.py b/PVAE/distributions/hyperbolic_radius.py new file mode 100644 index 0000000000000000000000000000000000000000..cf559a831fb0a963da2fe8f045a36ead6313abaf --- /dev/null +++ b/PVAE/distributions/hyperbolic_radius.py @@ -0,0 +1,295 @@ +import math +import torch +from torch.autograd import Function, grad +import torch.distributions as dist +from Ghypeddings.PVAE.utils import Constants, logsinh, log_sum_exp_signs, rexpand +from numbers import Number +from Ghypeddings.PVAE.distributions.ars import ARS + + +def cdf_r(value, scale, c, dim): + value = value.double() + scale = scale.double() + c = c.double() + + if dim == 2: + return 1 / torch.erf(c.sqrt() * scale / math.sqrt(2)) * .5 * \ + (2 * torch.erf(c.sqrt() * scale / math.sqrt(2)) + torch.erf((value - c.sqrt() * scale.pow(2)) / math.sqrt(2) / scale) - \ + torch.erf((c.sqrt() * scale.pow(2) + value) / math.sqrt(2) / scale)) + else: + device = value.device + + k_float = rexpand(torch.arange(dim), *value.size()).double().to(device) + dim = torch.tensor(dim).to(device).double() + + s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log( \ + torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \ + + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \ + ) + s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + + signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)] + signs = rexpand(signs, *value.size()) + + S1 = log_sum_exp_signs(s1, signs, dim=0) + S2 = log_sum_exp_signs(s2, signs, dim=0) + + output = torch.exp(S1 - S2) + zero_value_idx = value == 0. + output[zero_value_idx] = 0. + return output.float() + + +def grad_cdf_value_scale(value, scale, c, dim): + device = value.device + + dim = torch.tensor(int(dim)).to(device).double() + + signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)] + signs = rexpand(signs, *value.size()) + k_float = rexpand(torch.arange(dim), *value.size()).double().to(device) + + log_arg1 = (dim - 1 - 2 * k_float).pow(2) * c * scale * \ + (\ + torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \ + + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \ + ) + + log_arg2 = math.sqrt(2 / math.pi) * ( \ + (dim - 1 - 2 * k_float) * c.sqrt() * torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) \ + - ((value / scale.pow(2) + (dim - 1 - 2 * k_float) * c.sqrt()) * torch.exp(-(value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)).pow(2) / (2 * scale.pow(2)))) \ + ) + + log_arg = log_arg1 + log_arg2 + sign_log_arg = torch.sign(log_arg) + + s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log(sign_log_arg * log_arg) + + log_grad_sum_sigma = log_sum_exp_signs(s, signs * sign_log_arg, dim=0) + grad_sum_sigma = torch.sum(signs * sign_log_arg * torch.exp(s), dim=0) + + s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log( \ + torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \ + + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \ + ) + + S1 = log_sum_exp_signs(s1, signs, dim=0) + grad_log_cdf_scale = grad_sum_sigma / S1.exp() + log_unormalised_prob = - value.pow(2) / (2 * scale.pow(2)) + (dim - 1) * logsinh(c.sqrt() * value) - (dim - 1) / 2 * c.log() + + with torch.autograd.enable_grad(): + scale = scale.float() + logZ = _log_normalizer_closed_grad.apply(scale, c, dim) + grad_logZ_scale = grad(logZ, scale, grad_outputs=torch.ones_like(scale)) + + grad_log_cdf_scale = - grad_logZ_scale[0] + 1 / scale + grad_log_cdf_scale.float() + cdf = cdf_r(value.double(), scale.double(), c.double(), int(dim)).float().squeeze(0) + grad_scale = cdf * grad_log_cdf_scale + + grad_value = (log_unormalised_prob.float() - logZ).exp() + return grad_value, grad_scale + + +class _log_normalizer_closed_grad(Function): + @staticmethod + def forward(ctx, scale, c, dim): + scale = scale.double() + c = c.double() + ctx.scale = scale.clone().detach() + ctx.c = c.clone().detach() + ctx.dim = dim + + device = scale.device + output = .5 * (Constants.logpi - Constants.log2) + scale.log() -(int(dim) - 1) * (c.log() / 2 + Constants.log2) + dim = torch.tensor(int(dim)).to(device).double() + + k_float = rexpand(torch.arange(int(dim)), *scale.size()).double().to(device) + s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)] + signs = rexpand(signs, *scale.size()) + ctx.log_sum_term = log_sum_exp_signs(s, signs, dim=0) + output = output + ctx.log_sum_term + + return output.float() + + @staticmethod + def backward(ctx, grad_output): + grad_input = grad_output.clone() + + device = grad_input.device + scale = ctx.scale + c = ctx.c + dim = torch.tensor(int(ctx.dim)).to(device).double() + + k_float = rexpand(torch.arange(int(dim)), *scale.size()).double().to(device) + signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)] + signs = rexpand(signs, *scale.size()) + + log_arg = (dim - 1 - 2 * k_float).pow(2) * c * scale * (1+torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \ + torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * 2 / math.sqrt(math.pi) * (dim - 1 - 2 * k_float) * c.sqrt() / math.sqrt(2) + log_arg_signs = torch.sign(log_arg) + s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log(log_arg_signs * log_arg) + log_grad_sum_sigma = log_sum_exp_signs(s, log_arg_signs * signs, dim=0) + + grad_scale = torch.exp(log_grad_sum_sigma - ctx.log_sum_term) + grad_scale = 1 / ctx.scale + grad_scale + + grad_scale = (grad_input * grad_scale.float()).view(-1, *grad_input.shape).sum(0) + return (grad_scale, None, None) + + +class impl_rsample(Function): + @staticmethod + def forward(ctx, value, scale, c, dim): + ctx.scale = scale.clone().detach().double().requires_grad_(True) + ctx.value = value.clone().detach().double().requires_grad_(True) + ctx.c = c.clone().detach().double().requires_grad_(True) + ctx.dim = dim + return value + + @staticmethod + def backward(ctx, grad_output): + grad_input = grad_output.clone() + grad_cdf_value, grad_cdf_scale = grad_cdf_value_scale(ctx.value, ctx.scale, ctx.c, ctx.dim) + assert not torch.isnan(grad_cdf_value).any() + assert not torch.isnan(grad_cdf_scale).any() + grad_value_scale = -(grad_cdf_value).pow(-1) * grad_cdf_scale.expand(grad_input.shape) + grad_scale = (grad_input * grad_value_scale).view(-1, *grad_cdf_scale.shape).sum(0) + # grad_value_c = -(grad_cdf_value).pow(-1) * grad_cdf_c.expand(grad_input.shape) + # grad_c = (grad_input * grad_value_c).view(-1, *grad_cdf_c.shape).sum(0) + return (None, grad_scale, None, None) + + +class HyperbolicRadius(dist.Distribution): + support = dist.constraints.positive + has_rsample = True + + def __init__(self, dim, c, scale, ars=True, validate_args=None): + self.dim = dim + self.c = c + self.scale = scale + self.device = scale.device + self.ars = ars + if isinstance(scale, Number): + batch_shape = torch.Size() + else: + batch_shape = self.scale.size() + self.log_normalizer = self._log_normalizer() + if torch.isnan(self.log_normalizer).any() or torch.isinf(self.log_normalizer).any(): + print('nan or inf in log_normalizer', torch.cat((self.log_normalizer, self.scale), dim=1)) + raise + super(HyperbolicRadius, self).__init__(batch_shape) + + def rsample(self, sample_shape=torch.Size()): + value = self.sample(sample_shape) + return impl_rsample.apply(value, self.scale, self.c, self.dim) + + def sample(self, sample_shape=torch.Size()): + if sample_shape == torch.Size(): sample_shape=torch.Size([1]) + with torch.no_grad(): + mean = self.mean + stddev = self.stddev + if torch.isnan(stddev).any(): stddev[torch.isnan(stddev)] = self.scale[torch.isnan(stddev)] + if torch.isnan(mean).any(): mean[torch.isnan(mean)] = ((self.dim - 1) * self.scale.pow(2) * self.c.sqrt())[torch.isnan(mean)] + steps = torch.linspace(0.1, 3, 10).to(self.device) + steps = torch.cat((-steps.flip(0), steps)) + xi = [mean + s * torch.min(stddev, .95 * mean / 3) for s in steps] + xi = torch.cat(xi, dim=1) + ars = ARS(self.log_prob, self.grad_log_prob, self.device, xi=xi, ns=20, lb=0) + value = ars.sample(sample_shape) + return value + + def __while_loop(self, logM, proposal, sample_shape): + shape = self._extended_shape(sample_shape) + r, bool_mask = torch.ones(shape).to(self.device), (torch.ones(shape) == 1).to(self.device) + count = 0 + while bool_mask.sum() != 0: + count += 1 + r_ = proposal.sample(sample_shape).to(self.device) + u = torch.rand(shape).to(self.device) + log_ratio = self.log_prob(r_) - proposal.log_prob(r_) - logM + accept = log_ratio > torch.log(u) + reject = 1 - accept + r[bool_mask * accept] = r_[bool_mask * accept] + bool_mask[bool_mask * accept] = reject[bool_mask * accept] + return r + + def log_prob(self, value): + res = - value.pow(2) / (2 * self.scale.pow(2)) + (self.dim - 1) * logsinh(self.c.sqrt() * value) \ + - (self.dim - 1) / 2 * self.c.log() - self.log_normalizer#.expand(value.shape) + assert not torch.isnan(res).any() + return res + + def grad_log_prob(self, value): + res = - value / self.scale.pow(2) + (self.dim - 1) * self.c.sqrt() * torch.cosh(self.c.sqrt() * value) / torch.sinh(self.c.sqrt() * value) + return res + + def cdf(self, value): + return cdf_r(value, self.scale, self.c, self.dim) + + @property + def mean(self): + c = self.c.double() + scale = self.scale.double() + dim = torch.tensor(int(self.dim)).double().to(self.device) + signs = torch.tensor([1., -1.]).double().to(self.device).repeat(((self.dim+1) // 2)*2)[:self.dim].unsqueeze(-1).unsqueeze(-1).expand(self.dim, *self.scale.size()) + + k_float = rexpand(torch.arange(self.dim), *self.scale.size()).double().to(self.device) + s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + S2 = log_sum_exp_signs(s2, signs, dim=0) + + log_arg = (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2) * (1 + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \ + torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * scale * math.sqrt(2 / math.pi) + log_arg_signs = torch.sign(log_arg) + s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log(log_arg_signs * log_arg) + S1 = log_sum_exp_signs(s1, signs * log_arg_signs, dim=0) + + output = torch.exp(S1 - S2) + return output.float() + + @property + def variance(self): + c = self.c.double() + scale = self.scale.double() + dim = torch.tensor(int(self.dim)).double().to(self.device) + signs = torch.tensor([1., -1.]).double().to(self.device).repeat(((int(dim)+1) // 2)*2)[:int(dim)].unsqueeze(-1).unsqueeze(-1).expand(int(dim), *self.scale.size()) + + k_float = rexpand(torch.arange(self.dim), *self.scale.size()).double().to(self.device) + s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + S2 = log_sum_exp_signs(s2, signs, dim=0) + + log_arg = (1 + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2)) * (1 + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \ + (dim - 1 - 2 * k_float) * c.sqrt() * torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * scale * math.sqrt(2 / math.pi) + log_arg_signs = torch.sign(log_arg) + s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \ + + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \ + + 2 * scale.log() \ + + torch.log(log_arg_signs * log_arg) + S1 = log_sum_exp_signs(s1, signs * log_arg_signs, dim=0) + + output = torch.exp(S1 - S2) + output = output.float() - self.mean.pow(2) + return output + + @property + def stddev(self): return self.variance.sqrt() + + def _log_normalizer(self): return _log_normalizer_closed_grad.apply(self.scale, self.c, self.dim) diff --git a/PVAE/distributions/hyperspherical_uniform.py b/PVAE/distributions/hyperspherical_uniform.py new file mode 100644 index 0000000000000000000000000000000000000000..8a31f12840af77f161816e0c3b2cc8fdaede3020 --- /dev/null +++ b/PVAE/distributions/hyperspherical_uniform.py @@ -0,0 +1,42 @@ +import math +import torch +from torch.distributions.utils import _standard_normal + +class HypersphericalUniform(torch.distributions.Distribution): + """ source: https://github.com/nicola-decao/s-vae-pytorch/blob/master/hyperspherical_vae/distributions/von_mises_fisher.py """ + + support = torch.distributions.constraints.real + has_rsample = False + _mean_carrier_measure = 0 + + @property + def dim(self): + return self._dim + + def __init__(self, dim, device='cpu', validate_args=None): + super(HypersphericalUniform, self).__init__(torch.Size([dim]), validate_args=validate_args) + self._dim = dim + self._device = device + + def sample(self, shape=torch.Size()): + with torch.no_grad(): + return self.rsample(shape) + + def rsample(self, sample_shape=torch.Size()): + shape = torch.Size([*sample_shape, self._dim + 1]) + output = _standard_normal(shape, dtype=torch.float, device=self._device) + + return output / output.norm(dim=-1, keepdim=True) + + def entropy(self): + return self.__log_surface_area() + + def log_prob(self, x): + return - torch.ones(x.shape[:-1]).to(self._device) * self._log_normalizer() + + def _log_normalizer(self): + return self._log_surface_area().to(self._device) + + def _log_surface_area(self): + return math.log(2) + ((self._dim + 1) / 2) * math.log(math.pi) - torch.lgamma( + torch.Tensor([(self._dim + 1) / 2])) diff --git a/PVAE/distributions/riemannian_normal.py b/PVAE/distributions/riemannian_normal.py new file mode 100644 index 0000000000000000000000000000000000000000..ea59144a3c11fa2be735c4c581de0269d84948d8 --- /dev/null +++ b/PVAE/distributions/riemannian_normal.py @@ -0,0 +1,49 @@ +import torch +import torch.distributions as dist +from torch.distributions import constraints +from numbers import Number +from Ghypeddings.PVAE.distributions.hyperbolic_radius import HyperbolicRadius +from Ghypeddings.PVAE.distributions.hyperspherical_uniform import HypersphericalUniform + + +class RiemannianNormal(dist.Distribution): + arg_constraints = {'loc': dist.constraints.interval(-1, 1), 'scale': dist.constraints.positive} + support = dist.constraints.interval(-1, 1) + has_rsample = True + + @property + def mean(self): + return self.loc + + def __init__(self, loc, scale, manifold, validate_args=None): + assert not (torch.isnan(loc).any() or torch.isnan(scale).any()) + self.manifold = manifold + self.loc = loc + self.manifold.assert_check_point_on_manifold(self.loc) + self.scale = scale.clamp(min=0.1, max=7.) + self.radius = HyperbolicRadius(manifold.dim, manifold.c, self.scale) + self.direction = HypersphericalUniform(manifold.dim - 1, device=loc.device) + if isinstance(loc, Number) and isinstance(scale, Number): + batch_shape = torch.Size() + else: + batch_shape = self.loc.size() + super(RiemannianNormal, self).__init__(batch_shape, validate_args=validate_args) + + def sample(self, shape=torch.Size()): + with torch.no_grad(): + return self.rsample(shape) + + def rsample(self, sample_shape=torch.Size()): + shape = self._extended_shape(sample_shape) + alpha = self.direction.sample(torch.Size([*shape[:-1]])) + radius = self.radius.rsample(sample_shape) + # u = radius * alpha / self.manifold.lambda_x(self.loc, keepdim=True) + # res = self.manifold.expmap(self.loc, u) + res = self.manifold.expmap_polar(self.loc, alpha, radius) + return res + + def log_prob(self, value): + loc = self.loc.expand(value.shape) + radius_sq = self.manifold.dist(loc, value, keepdim=True).pow(2) + res = - radius_sq / 2 / self.scale.pow(2) - self.direction._log_normalizer() - self.radius.log_normalizer + return res diff --git a/PVAE/distributions/wrapped_normal.py b/PVAE/distributions/wrapped_normal.py new file mode 100644 index 0000000000000000000000000000000000000000..29566d92498a14c25d860d8ba3450780282b70c1 --- /dev/null +++ b/PVAE/distributions/wrapped_normal.py @@ -0,0 +1,65 @@ +import torch +from torch.nn import functional as F +from torch.distributions import Normal, Independent +from numbers import Number +from torch.distributions.utils import _standard_normal, broadcast_all + + +class WrappedNormal(torch.distributions.Distribution): + + arg_constraints = {'loc': torch.distributions.constraints.real, + 'scale': torch.distributions.constraints.positive} + support = torch.distributions.constraints.real + has_rsample = True + _mean_carrier_measure = 0 + + @property + def mean(self): + return self.loc + + @property + def stddev(self): + raise NotImplementedError + + @property + def scale(self): + return F.softplus(self._scale) if self.softplus else self._scale + + def __init__(self, loc, scale, manifold, validate_args=None, softplus=False): + self.dtype = loc.dtype + self.softplus = softplus + self.loc, self._scale = broadcast_all(loc, scale) + self.manifold = manifold + self.manifold.assert_check_point_on_manifold(self.loc) + self.device = loc.device + if isinstance(loc, Number) and isinstance(scale, Number): + batch_shape, event_shape = torch.Size(), torch.Size() + else: + batch_shape = self.loc.shape[:-1] + event_shape = torch.Size([self.manifold.dim]) + super(WrappedNormal, self).__init__(batch_shape, event_shape, validate_args=validate_args) + + def sample(self, shape=torch.Size()): + with torch.no_grad(): + return self.rsample(shape) + + def rsample(self, sample_shape=torch.Size()): + shape = self._extended_shape(sample_shape) + v = self.scale * _standard_normal(shape, dtype=self.loc.dtype, device=self.loc.device) + self.manifold.assert_check_vector_on_tangent(self.manifold.zero, v) + v = v / self.manifold.lambda_x(self.manifold.zero, keepdim=True) + u = self.manifold.transp(self.manifold.zero, self.loc, v) + z = self.manifold.expmap(self.loc, u) + return z + + def log_prob(self, x): + shape = x.shape + loc = self.loc.unsqueeze(0).expand(x.shape[0], *self.batch_shape, self.manifold.coord_dim) + if len(shape) < len(loc.shape): x = x.unsqueeze(1) + v = self.manifold.logmap(loc, x) + v = self.manifold.transp(loc, self.manifold.zero, v) + u = v * self.manifold.lambda_x(self.manifold.zero, keepdim=True) + norm_pdf = Normal(torch.zeros_like(self.scale), self.scale).log_prob(u).sum(-1, keepdim=True) + logdetexp = self.manifold.logdetexp(loc, x, keepdim=True) + result = norm_pdf - logdetexp + return result diff --git a/PVAE/manifolds/__init__.py b/PVAE/manifolds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cd1d10798fac6c83bf106a00d054ff2073df7b52 --- /dev/null +++ b/PVAE/manifolds/__init__.py @@ -0,0 +1,4 @@ +from Ghypeddings.PVAE.manifolds.euclidean import Euclidean +from Ghypeddings.PVAE.manifolds.poincareball import PoincareBall + +__all__ = [Euclidean, PoincareBall] \ No newline at end of file diff --git a/PVAE/manifolds/euclidean.py b/PVAE/manifolds/euclidean.py new file mode 100644 index 0000000000000000000000000000000000000000..a0b362b7818847a4928b6cc647c90f95de6847fc --- /dev/null +++ b/PVAE/manifolds/euclidean.py @@ -0,0 +1,42 @@ +import torch +from geoopt.manifolds import Euclidean as EuclideanParent + + +class Euclidean(EuclideanParent): + + def __init__(self, dim, c=0.): + super().__init__(1) + self.register_buffer("dim", torch.as_tensor(dim, dtype=torch.int)) + self.register_buffer("c", torch.as_tensor(c, dtype=torch.get_default_dtype())) + + @property + def coord_dim(self): + return int(self.dim) + + @property + def device(self): + return self.c.device + + @property + def zero(self): + return torch.zeros(1, self.dim).to(self.device) + + def logdetexp(self, x, y, is_vector=False, keepdim=False): + result = torch.zeros(x.shape[:-1]).to(x) + if keepdim: result = result.unsqueeze(-1) + return result + + def expmap0(self, u): + return u + + def logmap0(self, u): + return u + + def proju0(self, u): + return self.proju(self.zero.expand_as(u), u) + + def transp0(self, x, u): + return self.transp(self.zero.expand_as(u), x, u) + + def lambda_x(self, x, *, keepdim=False, dim=-1): + return torch.ones_like(x.sum(dim=dim, keepdim=keepdim)) diff --git a/PVAE/manifolds/poincareball.py b/PVAE/manifolds/poincareball.py new file mode 100644 index 0000000000000000000000000000000000000000..924511de237cf5d038ef82d39b7be0e6cb30503a --- /dev/null +++ b/PVAE/manifolds/poincareball.py @@ -0,0 +1,84 @@ +import torch +from geoopt.manifolds import PoincareBall as PoincareBallParent +from geoopt.manifolds.stereographic.math import _lambda_x, arsinh, tanh + +MIN_NORM = 1e-15 + + +class PoincareBall(PoincareBallParent): + + def __init__(self, dim, c=1.0): + super().__init__(c) + self.register_buffer("dim", torch.as_tensor(dim, dtype=torch.int)) + + def proju0(self, u): + return self.proju(self.zero.expand_as(u), u) + + @property + def coord_dim(self): + return int(self.dim) + + @property + def device(self): + return self.c.device + + @property + def zero(self): + return torch.zeros(1, self.dim).to(self.device) + + def logdetexp(self, x, y, is_vector=False, keepdim=False): + d = self.norm(x, y, keepdim=keepdim) if is_vector else self.dist(x, y, keepdim=keepdim) + d[d == 0] = 1e-15 + return (self.dim - 1) * (torch.sinh(self.c.sqrt()*d) / self.c.sqrt() / d).log() + + def inner(self, x, u, v=None, *, keepdim=False, dim=-1): + if v is None: v = u + return _lambda_x(x, self.c, keepdim=keepdim, dim=dim) ** 2 * (u * v).sum( + dim=dim, keepdim=keepdim + ) + + def expmap_polar(self, x, u, r, dim: int = -1): + sqrt_c = self.c ** 0.5 + u_norm = u.norm(dim=dim, p=2, keepdim=True).clamp_min(MIN_NORM) + second_term = ( + tanh(sqrt_c / 2 * r) + * u + / (sqrt_c * u_norm) + ) + gamma_1 = self.mobius_add(x, second_term, dim=dim) + return gamma_1 + + def normdist2plane(self, x, a, p, keepdim: bool = False, signed: bool = False, dim: int = -1, norm: bool = False): + c = self.c + sqrt_c = c ** 0.5 + diff = self.mobius_add(-p, x, dim=dim) + diff_norm2 = diff.pow(2).sum(dim=dim, keepdim=keepdim).clamp_min(MIN_NORM) + sc_diff_a = (diff * a).sum(dim=dim, keepdim=keepdim) + if not signed: + sc_diff_a = sc_diff_a.abs() + a_norm = a.norm(dim=dim, keepdim=keepdim, p=2).clamp_min(MIN_NORM) + num = 2 * sqrt_c * sc_diff_a + denom = (1 - c * diff_norm2) * a_norm + res = arsinh(num / denom.clamp_min(MIN_NORM)) / sqrt_c + if norm: + res = res * a_norm# * self.lambda_x(a, dim=dim, keepdim=keepdim) + return res + + + +class PoincareBallExact(PoincareBall): + __doc__ = r""" + See Also + -------- + :class:`PoincareBall` + Notes + ----- + The implementation of retraction is an exact exponential map, this retraction will be used in optimization + """ + + retr_transp = PoincareBall.expmap_transp + transp_follow_retr = PoincareBall.transp_follow_expmap + retr = PoincareBall.expmap + + def extra_repr(self): + return "exact" diff --git a/PVAE/models/__init__.py b/PVAE/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bdb822cc3ef52a1adbb1e24f356b3857d5479067 --- /dev/null +++ b/PVAE/models/__init__.py @@ -0,0 +1,2 @@ +from Ghypeddings.PVAE.models.tabular import Tabular +__all__ = [Tabular] \ No newline at end of file diff --git a/PVAE/models/architectures.py b/PVAE/models/architectures.py new file mode 100644 index 0000000000000000000000000000000000000000..92a049661f17533ce909b5b3f3fe4f1b79c53595 --- /dev/null +++ b/PVAE/models/architectures.py @@ -0,0 +1,180 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from numpy import prod +from Ghypeddings.PVAE.utils import Constants +from Ghypeddings.PVAE.ops.manifold_layers import GeodesicLayer, MobiusLayer, LogZero, ExpZero +from torch.nn.modules.module import Module + +def get_dim_act(args): + """ + Helper function to get dimension and activation at every layer. + :param args: + :return: + """ + if not args.act: + act = lambda x: x + else: + act = getattr(F, args.act) + acts = [act] * (args.num_layers - 1) + dims = [args.feat_dim] + ([args.hidden_dim] * (args.num_layers - 1)) + + return dims, acts + + +class Encoder(nn.Module): + """ + Encoder abstract class. + """ + + def __init__(self, c): + super(Encoder, self).__init__() + self.c = c + + def encode(self, x, adj): + input = (x, adj) + output, _ = self.layers.forward(input) + return output + +class GraphConvolution(Module): + """ + Simple GCN layer. + """ + + def __init__(self, in_features, out_features, dropout, act, use_bias): + super(GraphConvolution, self).__init__() + self.dropout = dropout + self.linear = nn.Linear(in_features, out_features, use_bias) + self.act = act + self.in_features = in_features + self.out_features = out_features + + def forward(self, input): + x, adj = input + hidden = self.linear.forward(x) + hidden = F.dropout(hidden, self.dropout, training=self.training) + if adj.is_sparse: + support = torch.spmm(adj, hidden) + else: + support = torch.mm(adj, hidden) + output = self.act(support), adj + return output + + def extra_repr(self): + return 'input_dim={}, output_dim={}'.format( + self.in_features, self.out_features + ) + +class GCN(Encoder): + """ + Graph Convolution Networks. + """ + + def __init__(self, c, args): + super(GCN, self).__init__(c) + assert args.num_layers > 0 + dims, acts = get_dim_act(args) + gc_layers = [] + for i in range(len(dims) - 1): + in_dim, out_dim = dims[i], dims[i + 1] + act = acts[i] + gc_layers.append(GraphConvolution(in_dim, out_dim, args.dropout, act, args.bias)) + self.layers = nn.Sequential(*gc_layers) + + +def extra_hidden_layer(hidden_dim, non_lin): + return nn.Sequential(nn.Linear(hidden_dim, hidden_dim), non_lin) + +class EncWrapped(nn.Module): + """ Usual encoder followed by an exponential map """ + def __init__(self,c,args, manifold, data_size, non_lin, num_hidden_layers, hidden_dim, prior_iso): + super(EncWrapped, self).__init__() + self.manifold = manifold + self.data_size = data_size + self.enc = GCN(c,args) + self.fc21 = nn.Linear(hidden_dim, manifold.coord_dim) + self.fc22 = nn.Linear(hidden_dim, manifold.coord_dim if not prior_iso else 1) + + def forward(self,adj,x): + e = self.enc.encode(x,adj) + mu = self.fc21(e) # flatten data + mu = self.manifold.expmap0(mu) + return mu, F.softplus(self.fc22(e)) + Constants.eta, self.manifold + + +class DecWrapped(nn.Module): + """ Usual encoder preceded by a logarithm map """ + def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim): + super(DecWrapped, self).__init__() + self.data_size = data_size + self.manifold = manifold + modules = [] + modules.append(nn.Sequential(nn.Linear(manifold.coord_dim, hidden_dim), non_lin)) + modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)]) + self.dec = nn.Sequential(*modules) + # self.fc31 = nn.Linear(hidden_dim, prod(data_size)) + self.fc31 = nn.Linear(hidden_dim, data_size[1]) + + def forward(self, z): + z = self.manifold.logmap0(z) + d = self.dec(z) + # mu = self.fc31(d).view(*z.size()[:-1], *self.data_size) # reshape data + mu = self.fc31(d).view(*z.size()[:-1], 1, self.data_size[1]) + return mu, torch.ones_like(mu) + + +class DecGeo(nn.Module): + """ First layer is a Hypergyroplane followed by usual decoder """ + def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim): + super(DecGeo, self).__init__() + self.data_size = data_size + modules = [] + modules.append(nn.Sequential(GeodesicLayer(manifold.coord_dim, hidden_dim, manifold), non_lin)) + modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)]) + self.dec = nn.Sequential(*modules) + self.fc31 = nn.Linear(hidden_dim, data_size[1]) + + def forward(self, z): + d = self.dec(z) + # mu = self.fc31(d).view(*z.size()[:-1], *self.data_size) # reshape data + mu = self.fc31(d).view(*z.size()[:-1], 1, self.data_size[1]) + return mu, torch.ones_like(mu) + + +class EncMob(nn.Module): + """ Last layer is a Mobius layers """ + def __init__(self,c,args, manifold, data_size, non_lin, num_hidden_layers, hidden_dim, prior_iso): + super(EncMob, self).__init__() + self.manifold = manifold + self.data_size = data_size + # modules = [] + # modules.append(nn.Sequential(nn.Linear(data_size[1], hidden_dim), non_lin)) + # modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)]) + # self.enc = nn.Sequential(*modules) + self.enc = GCN(c,args) + self.fc21 = MobiusLayer(hidden_dim, manifold.coord_dim, manifold) + self.fc22 = nn.Linear(hidden_dim, manifold.coord_dim if not prior_iso else 1) + + def forward(self,adj,x): + #e = self.enc(x.view(*x.size()[:-len(self.data_size)], -1)) # flatten data + e = self.enc.encode(x,adj) + mu = self.fc21(e) # flatten data + mu = self.manifold.expmap0(mu) + return mu, F.softplus(self.fc22(e)) + Constants.eta, self.manifold + + +class DecMob(nn.Module): + """ First layer is a Mobius Matrix multiplication """ + def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim): + super(DecMob, self).__init__() + self.data_size = data_size + modules = [] + modules.append(nn.Sequential(MobiusLayer(manifold.coord_dim, hidden_dim, manifold), LogZero(manifold), non_lin)) + modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)]) + self.dec = nn.Sequential(*modules) + self.fc31 = nn.Linear(hidden_dim, prod(data_size)) + + def forward(self, z): + d = self.dec(z) + mu = self.fc31(d).view(*z.size()[:-1], *self.data_size) # reshape data + return mu, torch.ones_like(mu) diff --git a/PVAE/models/tabular.py b/PVAE/models/tabular.py new file mode 100644 index 0000000000000000000000000000000000000000..2c5b4d571562880727795fea74d2e8560b793624 --- /dev/null +++ b/PVAE/models/tabular.py @@ -0,0 +1,36 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.distributions as dist +from torch.utils.data import DataLoader + +import math +from Ghypeddings.PVAE.models.vae import VAE + +from Ghypeddings.PVAE.distributions import RiemannianNormal, WrappedNormal +from torch.distributions import Normal +import Ghypeddings.PVAE.manifolds as manifolds +from Ghypeddings.PVAE.models.architectures import EncWrapped, DecWrapped, EncMob, DecMob, DecGeo +from Ghypeddings.PVAE.utils import get_activation + +class Tabular(VAE): + """ Derive a specific sub-class of a VAE for tabular data. """ + def __init__(self, params): + c = nn.Parameter(params.c * torch.ones(1), requires_grad=False) + manifold = getattr(manifolds, 'PoincareBall')(params.dim, c) + super(Tabular, self).__init__( + eval(params.prior), # prior distribution + eval(params.posterior), # posterior distribution + dist.Normal, # likelihood distribution + eval('Enc' + params.enc)(params.c,params,manifold, params.data_size, get_activation(params), params.num_layers, params.hidden_dim, params.prior_iso), + eval('Dec' + params.dec)(manifold, params.data_size, get_activation(params), params.num_layers, params.hidden_dim), + params + ) + self.manifold = manifold + self._pz_mu = nn.Parameter(torch.zeros(1, params.dim), requires_grad=False) + self._pz_logvar = nn.Parameter(torch.zeros(1, 1), requires_grad=params.learn_prior_std) + self.modelName = 'Tabular' + + @property + def pz_params(self): + return self._pz_mu.mul(1), F.softplus(self._pz_logvar).div(math.log(2)).mul(self.prior_std), self.manifold \ No newline at end of file diff --git a/PVAE/models/vae.py b/PVAE/models/vae.py new file mode 100644 index 0000000000000000000000000000000000000000..2cb79df5b30a7370118d1d77aa88f47c4b341e2f --- /dev/null +++ b/PVAE/models/vae.py @@ -0,0 +1,63 @@ +# Base VAE class definition + +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.distributions as dist +from Ghypeddings.PVAE.utils import get_mean_param + +class VAE(nn.Module): + def __init__(self, prior_dist, posterior_dist, likelihood_dist, enc, dec, params): + super(VAE, self).__init__() + self.pz = prior_dist + self.px_z = likelihood_dist + self.qz_x = posterior_dist + self.enc = enc + self.dec = dec + self.modelName = None + self.params = params + self.data_size = params.data_size + self.prior_std = params.prior_std + + if self.px_z == dist.RelaxedBernoulli: + self.px_z.log_prob = lambda self, value: \ + -F.binary_cross_entropy_with_logits( + self.probs if value.dim() <= self.probs.dim() else self.probs.expand_as(value), + value.expand(self.batch_shape) if value.dim() <= self.probs.dim() else value, + reduction='none' + ) + + def generate(self, N, K): + self.eval() + with torch.no_grad(): + mean_pz = get_mean_param(self.pz_params) + mean = get_mean_param(self.dec(mean_pz)) + px_z_params = self.dec(self.pz(*self.pz_params).sample(torch.Size([N]))) + means = get_mean_param(px_z_params) + samples = self.px_z(*px_z_params).sample(torch.Size([K])) + + return mean, \ + means.view(-1, *means.size()[2:]), \ + samples.view(-1, *samples.size()[3:]) + + def reconstruct(self, data , edge_index): + self.eval() + with torch.no_grad(): + qz_x = self.qz_x(*self.enc(edge_index,data)) + px_z_params = self.dec(qz_x.rsample(torch.Size([1])).squeeze(0)) + + return get_mean_param(px_z_params) + + def forward(self, x , edge_index, K=1): + embeddings = self.enc(edge_index,x) + qz_x = self.qz_x(*embeddings) + zs = qz_x.rsample(torch.Size([K])) + px_z = self.px_z(*self.dec(zs)) + return qz_x, px_z, zs , embeddings + + @property + def pz_params(self): + return self._pz_mu.mul(1), F.softplus(self._pz_logvar).div(math.log(2)).mul(self.prior_std_scale) + + def init_last_layer_bias(self, dataset): pass diff --git a/PVAE/objectives.py b/PVAE/objectives.py new file mode 100644 index 0000000000000000000000000000000000000000..fd9afeabd8f589bb33659f7f7b1aae36264e4159 --- /dev/null +++ b/PVAE/objectives.py @@ -0,0 +1,46 @@ +import torch +import torch.distributions as dist +from numpy import prod +from Ghypeddings.PVAE.utils import has_analytic_kl, log_mean_exp +import torch.nn.functional as F + +def vae_objective(model, idx, x , graph, K=1, beta=1.0, components=False, analytical_kl=False, **kwargs): + """Computes E_{p(x)}[ELBO] """ + qz_x, px_z, zs , embeddings = model(x, graph,K) + _, B, D = zs.size() + flat_rest = torch.Size([*px_z.batch_shape[:2], -1]) + x = x.unsqueeze(0).unsqueeze(2) + lpx_z = px_z.log_prob(x.expand(px_z.batch_shape)).view(flat_rest).sum(-1) + pz = model.pz(*model.pz_params) + kld = dist.kl_divergence(qz_x, pz).unsqueeze(0).sum(-1) if \ + has_analytic_kl(type(qz_x), model.pz) and analytical_kl else \ + qz_x.log_prob(zs).sum(-1) - pz.log_prob(zs).sum(-1) + lpx_z_selected = lpx_z[:, idx] + kld_selected = kld[:, idx] + obj = -lpx_z_selected.mean(0).sum() + beta * kld_selected.mean(0).sum() + return (qz_x, px_z, lpx_z_selected, kld_selected, obj , embeddings) if components else obj + +def _iwae_objective_vec(model, x, K): + """Helper for IWAE estimate for log p_\theta(x) -- full vectorisation.""" + qz_x, px_z, zs = model(x, K) + flat_rest = torch.Size([*px_z.batch_shape[:2], -1]) + lpz = model.pz(*model.pz_params).log_prob(zs).sum(-1) + lpx_z = px_z.log_prob(x.expand(zs.size(0), *x.size())).view(flat_rest).sum(-1) + lqz_x = qz_x.log_prob(zs).sum(-1) + obj = lpz.squeeze(-1) + lpx_z.view(lpz.squeeze(-1).shape) - lqz_x.squeeze(-1) + return -log_mean_exp(obj).sum() + + +def iwae_objective(model, x, K): + """Computes an importance-weighted ELBO estimate for log p_\theta(x) + Iterates over the batch as necessary. + Appropriate negation (for minimisation) happens in the helper + """ + split_size = int(x.size(0) / (K * prod(x.size()) / (3e7))) # rough heuristic + if split_size >= x.size(0): + obj = _iwae_objective_vec(model, x, K) + else: + obj = 0 + for bx in x.split(split_size): + obj = obj + _iwae_objective_vec(model, bx, K) + return obj diff --git a/PVAE/ops/__init__.py b/PVAE/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/PVAE/ops/manifold_layers.py b/PVAE/ops/manifold_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..643d80fa75b50dc7a9e92ff9a3a7b305afb440cd --- /dev/null +++ b/PVAE/ops/manifold_layers.py @@ -0,0 +1,90 @@ +import math +import torch +from torch import nn +from torch.nn.parameter import Parameter +from torch.nn import init +from Ghypeddings.PVAE.manifolds import PoincareBall, Euclidean +from geoopt import ManifoldParameter + + +class RiemannianLayer(nn.Module): + def __init__(self, in_features, out_features, manifold, over_param, weight_norm): + super(RiemannianLayer, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.manifold = manifold + + self._weight = Parameter(torch.Tensor(out_features, in_features)) + self.over_param = over_param + self.weight_norm = weight_norm + if self.over_param: + self._bias = ManifoldParameter(torch.Tensor(out_features, in_features), manifold=manifold) + else: + self._bias = Parameter(torch.Tensor(out_features, 1)) + self.reset_parameters() + + @property + def weight(self): + return self.manifold.transp0(self.bias, self._weight) # weight \in T_0 => weight \in T_bias + + @property + def bias(self): + if self.over_param: + return self._bias + else: + return self.manifold.expmap0(self._weight * self._bias) # reparameterisation of a point on the manifold + + def reset_parameters(self): + init.kaiming_normal_(self._weight, a=math.sqrt(5)) + fan_in, _ = init._calculate_fan_in_and_fan_out(self._weight) + bound = 4 / math.sqrt(fan_in) + init.uniform_(self._bias, -bound, bound) + if self.over_param: + with torch.no_grad(): self._bias.set_(self.manifold.expmap0(self._bias)) + + +class GeodesicLayer(RiemannianLayer): + def __init__(self, in_features, out_features, manifold, over_param=False, weight_norm=False): + super(GeodesicLayer, self).__init__(in_features, out_features, manifold, over_param, weight_norm) + + def forward(self, input): + input = input.unsqueeze(-2).expand(*input.shape[:-(len(input.shape) - 2)], self.out_features, self.in_features) + res = self.manifold.normdist2plane(input, self.bias, self.weight, + signed=True, norm=self.weight_norm) + return res + + +class Linear(nn.Linear): + def __init__(self, in_features, out_features, **kwargs): + super(Linear, self).__init__( + in_features, + out_features, + ) + + +class MobiusLayer(RiemannianLayer): + def __init__(self, in_features, out_features, manifold, over_param=False, weight_norm=False): + super(MobiusLayer, self).__init__(in_features, out_features, manifold, over_param, weight_norm) + + def forward(self, input): + res = self.manifold.mobius_matvec(self.weight, input) + return res + + +class ExpZero(nn.Module): + def __init__(self, manifold): + super(ExpZero, self).__init__() + self.manifold = manifold + + def forward(self, input): + return self.manifold.expmap0(input) + + +class LogZero(nn.Module): + def __init__(self, manifold): + super(LogZero, self).__init__() + self.manifold = manifold + + def forward(self, input): + return self.manifold.logmap0(input) + diff --git a/PVAE/pvae.py b/PVAE/pvae.py new file mode 100644 index 0000000000000000000000000000000000000000..9ed28d8c9b87c3cf4e033868c6148718d92d22c1 --- /dev/null +++ b/PVAE/pvae.py @@ -0,0 +1,183 @@ +import sys +sys.path.append(".") +sys.path.append("..") +import os +import datetime +from collections import defaultdict +import torch +from torch import optim +import numpy as np +import logging +import time + +from Ghypeddings.PVAE.utils import probe_infnan , process_data , create_args , perform_task +import Ghypeddings.PVAE.objectives as objectives +from Ghypeddings.PVAE.models import Tabular + +runId = datetime.datetime.now().isoformat().replace(':','_') +torch.backends.cudnn.benchmark = True + +class PVAE: + def __init__(self, + adj, + features, + labels, + dim, + hidden_dim, + num_layers=2, + c=1.0, + act='leaky_relu', + lr=0.05, + cuda=0, + epochs=100, + seed=42, + eval_freq=1, + val_prop=0.5, + test_prop=0.3, + dropout=0.1, + beta1=0.9, + beta2=.999, + K=20, + beta=.5, + analytical_kl=True, + posterior='WrappedNormal', + prior='WrappedNormal', + prior_iso=True, + prior_std=1., + learn_prior_std=True, + enc='Mob', + dec='Geo', + bias=True, + alpha=0.01, + classifier=None, + clusterer=None, + log_freq=0, + normalize_adj=False, + normalize_feats=True + ): + + self.args = create_args(dim,hidden_dim,num_layers,c,act,lr,cuda,epochs,seed,eval_freq,val_prop,test_prop,dropout,beta1,beta2,K,beta,analytical_kl,posterior,prior,prior_iso,prior_std,learn_prior_std,enc,dec,bias,alpha,classifier,clusterer,log_freq,normalize_adj,normalize_feats) + self.args.n_classes = len(np.unique(labels)) + self.args.feat_dim = features.shape[1] + self.data = process_data(self.args,adj,features,labels) + self.args.data_size = [adj.shape[0],self.args.feat_dim] + self.args.batch_size=1 + + if int(self.args.cuda) >= 0: + torch.cuda.manual_seed(self.args.seed) + self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu' + else: + self.args.device = 'cpu' + + self.args.prior_iso = self.args.prior_iso or self.args.posterior == 'RiemannianNormal' + + # Choosing and saving a random seed for reproducibility + if self.args.seed == 0: self.args.seed = int(torch.randint(0, 2**32 - 1, (1,)).item()) + print('seed', self.args.seed) + torch.manual_seed(self.args.seed) + np.random.seed(self.args.seed) + torch.cuda.manual_seed_all(self.args.seed) + torch.manual_seed(self.args.seed) + torch.backends.cudnn.deterministic = True + self.model = Tabular(self.args).to(self.args.device) + self.optimizer = optim.Adam(self.model.parameters(), lr=self.args.lr, amsgrad=True, betas=(self.args.beta1, self.args.beta2)) + self.loss_function = getattr(objectives,'vae_objective') + + if self.args.cuda is not None and int(self.args.cuda) >= 0 : + os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda) + self.model = self.model.to(self.args.device) + for x, val in self.data.items(): + if torch.is_tensor(self.data[x]): + self.data[x] = self.data[x].to(self.args.device) + + self.tb_embeddings = None + + + def fit(self): + + tot_params = sum([np.prod(p.size()) for p in self.model.parameters()]) + logging.info(f"Total number of parameters: {tot_params}") + + t_total = time.time() + agg = defaultdict(list) + b_loss, b_recon, b_kl , b_mlik , tb_loss = sys.float_info.max, sys.float_info.max ,sys.float_info.max,sys.float_info.max,sys.float_info.max + + best_losses = [] + real_losses = [] + + for epoch in range(self.args.epochs): + self.model.train() + self.optimizer.zero_grad() + + qz_x, px_z, lik, kl, loss , embeddings = self.loss_function(self.model,self.data['idx_train'], self.data['features'], self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True, analytical_kl=self.args.analytical_kl) + probe_infnan(loss, "Training loss:") + loss.backward() + self.optimizer.step() + + t_loss = loss.item() / len(self.data['idx_train']) + t_recon = -lik.mean(0).sum().item() / len(self.data['idx_train']) + t_kl = kl.sum(-1).mean(0).sum().item() / len(self.data['idx_train']) + + if(t_loss < b_loss): + b_loss = t_loss + b_recon = t_recon + b_kl = t_kl + + + agg['train_loss'].append(t_loss ) + agg['train_recon'].append(t_recon ) + agg['train_kl'].append(t_kl ) + + real_losses.append(t_recon) + if(len(best_losses) == 0): + best_losses.append(real_losses[0]) + elif (best_losses[-1] > real_losses[-1]): + best_losses.append(real_losses[-1]) + else: + best_losses.append(best_losses[-1]) + + if (epoch + 1) % self.args.log_freq == 0: + print('====> Epoch: {:03d} Loss: {:.2f} Recon: {:.2f} KL: {:.2f}'.format(epoch, agg['train_loss'][-1], agg['train_recon'][-1], agg['train_kl'][-1])) + + if (epoch + 1) % self.args.eval_freq == 0: + self.model.eval() + with torch.no_grad(): + qz_x, px_z, lik, kl, loss , embeddings= self.loss_function(self.model,self.data['idx_val'], self.data['features'],self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True) + tt_loss = loss.item() / len(self.data['idx_val']) + if(tt_loss < tb_loss): + tb_loss = tt_loss + self.tb_embeddings = embeddings[0] + + agg['test_loss'].append(tt_loss ) + print('====> Test loss: {:.4f}'.format(agg['test_loss'][-1])) + + + logging.info("Optimization Finished!") + logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + print('====> Training: Best Loss: {:.2f} Best Recon: {:.2f} Best KL: {:.2f}'.format(b_loss,b_recon,b_kl)) + print('====> Testing: Best Loss: {:.2f}'.format(tb_loss)) + + X = self.model.manifold.logmap0(self.tb_embeddings).cpu().detach().numpy() + y = self.data['labels'].cpu().reshape(-1,1) + acc,f1,recall,precision,roc_auc=perform_task(self.args,X,y) + return {'real':real_losses,'best':best_losses},acc,f1,recall,precision,roc_auc,time.time() - t_total + + def predict(self): + self.model.eval() + with torch.no_grad(): + qz_x, px_z, lik, kl, loss , embeddings=self.loss_function(self.model,self.data['idx_test'], self.data['features'],self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True) + tt_loss = loss.item() / len(self.data['idx_test']) + data = self.model.manifold.logmap0(embeddings[0]).cpu().detach().numpy() + labels = self.data['labels'].reshape(-1,1).cpu() + acc,f1,recall,precision,roc_auc=perform_task(self.args,data,labels) + return abs(tt_loss) , acc, f1 , recall,precision,roc_auc + + + def save_embeddings(self,directory,prefix): + tb_embeddings_euc = self.model.manifold.logmap0(self.tb_embeddings) + for_classification_hyp = np.hstack((self.tb_embeddings.cpu().detach().numpy(),self.data['labels'].reshape(-1,1).cpu())) + for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].reshape(-1,1).cpu())) + hyp_file_path = os.path.join(directory,f'{prefix}_embeddings_hyp.csv') + euc_file_path = os.path.join(directory,f'{prefix}_embeddings_euc.csv') + np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',') + np.savetxt(euc_file_path, for_classification_euc, delimiter=',') diff --git a/PVAE/utils.py b/PVAE/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..87b937ad038a97db9af279bb8b8363b3916432cf --- /dev/null +++ b/PVAE/utils.py @@ -0,0 +1,327 @@ +import sys +import math +import time +import os +import shutil +import torch +import torch.distributions as dist +from torch.autograd import Variable, Function, grad +from sklearn.preprocessing import MinMaxScaler +import pandas as pd +import numpy as np +import argparse +import torch.nn as nn +import scipy.sparse as sp + + +def lexpand(A, *dimensions): + """Expand tensor, adding new dimensions on left.""" + return A.expand(tuple(dimensions) + A.shape) + + +def rexpand(A, *dimensions): + """Expand tensor, adding new dimensions on right.""" + return A.view(A.shape + (1,)*len(dimensions)).expand(A.shape + tuple(dimensions)) + + +def assert_no_nan(name, g): + if torch.isnan(g).any(): raise Exception('nans in {}'.format(name)) + + +def assert_no_grad_nan(name, x): + if x.requires_grad: x.register_hook(lambda g: assert_no_nan(name, g)) + + +# Classes +class Constants(object): + eta = 1e-5 + log2 = math.log(2) + logpi = math.log(math.pi) + log2pi = math.log(2 * math.pi) + logceilc = 88 # largest cuda v s.t. exp(v) < inf + logfloorc = -104 # smallest cuda v s.t. exp(v) > 0 + invsqrt2pi = 1. / math.sqrt(2 * math.pi) + sqrthalfpi = math.sqrt(math.pi/2) + + +def logsinh(x): + # torch.log(sinh(x)) + return x + torch.log(1 - torch.exp(-2 * x)) - Constants.log2 + + +def logcosh(x): + # torch.log(cosh(x)) + return x + torch.log(1 + torch.exp(-2 * x)) - Constants.log2 + + +class Arccosh(Function): + # https://github.com/facebookresearch/poincare-embeddings/blob/master/model.py + @staticmethod + def forward(ctx, x): + ctx.z = torch.sqrt(x * x - 1) + return torch.log(x + ctx.z) + + @staticmethod + def backward(ctx, g): + z = torch.clamp(ctx.z, min=Constants.eta) + z = g / z + return z + + +class Arcsinh(Function): + @staticmethod + def forward(ctx, x): + ctx.z = torch.sqrt(x * x + 1) + return torch.log(x + ctx.z) + + @staticmethod + def backward(ctx, g): + z = torch.clamp(ctx.z, min=Constants.eta) + z = g / z + return z + + +# https://stackoverflow.com/questions/14906764/how-to-redirect-stdout-to-both-file-and-console-with-scripting +class Logger(object): + def __init__(self, filename): + self.terminal = sys.stdout + self.log = open(filename, "a") + + def write(self, message): + self.terminal.write(message) + self.log.write(message) + + def flush(self): + # this flush method is needed for python 3 compatibility. + # this handles the flush command by doing nothing. + # you might want to specify some extra behavior here. + pass + + +class Timer: + def __init__(self, name): + self.name = name + + def __enter__(self): + self.begin = time.time() + return self + + def __exit__(self, *args): + self.end = time.time() + self.elapsed = self.end - self.begin + self.elapsedH = time.gmtime(self.elapsed) + print('====> [{}] Time: {:7.3f}s or {}' + .format(self.name, + self.elapsed, + time.strftime("%H:%M:%S", self.elapsedH))) + + +# Functions +def save_vars(vs, filepath): + """ + Saves variables to the given filepath in a safe manner. + """ + if os.path.exists(filepath): + shutil.copyfile(filepath, '{}.old'.format(filepath)) + torch.save(vs, filepath) + + +def save_model(model, filepath): + """ + To load a saved model, simply use + `model.load_state_dict(torch.load('path-to-saved-model'))`. + """ + save_vars(model.state_dict(), filepath) + + +def log_mean_exp(value, dim=0, keepdim=False): + return log_sum_exp(value, dim, keepdim) - math.log(value.size(dim)) + + +def log_sum_exp(value, dim=0, keepdim=False): + m, _ = torch.max(value, dim=dim, keepdim=True) + value0 = value - m + if keepdim is False: + m = m.squeeze(dim) + return m + torch.log(torch.sum(torch.exp(value0), dim=dim, keepdim=keepdim)) + + +def log_sum_exp_signs(value, signs, dim=0, keepdim=False): + m, _ = torch.max(value, dim=dim, keepdim=True) + value0 = value - m + if keepdim is False: + m = m.squeeze(dim) + return m + torch.log(torch.sum(signs * torch.exp(value0), dim=dim, keepdim=keepdim)) + + +def get_mean_param(params): + """Return the parameter used to show reconstructions or generations. + For example, the mean for Normal, or probs for Bernoulli. + For Bernoulli, skip first parameter, as that's (scalar) temperature + """ + if params[0].dim() == 0: + return params[1] + # elif len(params) == 3: + # return params[1] + else: + return params[0] + + +def probe_infnan(v, name, extras={}): + nps = torch.isnan(v) + s = nps.sum().item() + if s > 0: + print('>>> {} >>>'.format(name)) + print(name, s) + print(v[nps]) + for k, val in extras.items(): + print(k, val, val.sum().item()) + quit() + + +def has_analytic_kl(type_p, type_q): + return (type_p, type_q) in torch.distributions.kl._KL_REGISTRY + + +def split_data(labels, test_prop,val_prop): + nb_nodes = labels.shape[0] + all_idx = np.arange(nb_nodes) + pos_idx = labels.nonzero()[0] + neg_idx = (1. - labels).nonzero()[0] + np.random.shuffle(pos_idx) + np.random.shuffle(neg_idx) + pos_idx = pos_idx.tolist() + neg_idx = neg_idx.tolist() + nb_pos_neg = min(len(pos_idx), len(neg_idx)) + nb_val = round(val_prop * nb_pos_neg) + nb_test = round(test_prop * nb_pos_neg) + idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[ + nb_val + nb_test:] + idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[ + nb_val + nb_test:] + return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg + +def process_data(args, adj,features,labels): + data = process_data_nc(args,adj,features,labels) + data['adj_train'], data['features'] = process( + data['adj_train'], data['features'],args.normalize_adj,args.normalize_feats + ) + return data + +def process_data_nc(args,adj,features,labels): + idx_test, idx_train , idx_val= split_data(labels, args.test_prop,args.val_prop) + labels = torch.LongTensor(labels) + data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_test': idx_test , 'idx_val':idx_val} + return data + +def process(adj, features, normalize_adj, normalize_feats): + if sp.isspmatrix(features): + features = np.array(features.todense()) + if normalize_feats: + features = normalize(features) + features = torch.Tensor(features) + if normalize_adj: + adj = normalize(adj) + adj = sparse_mx_to_torch_sparse_tensor(adj) + return adj, features + + +def normalize(mx): + """Row-normalize sparse matrix.""" + rowsum = np.array(mx.sum(1)) + r_inv = np.power(rowsum, -1).flatten() + r_inv[np.isinf(r_inv)] = 0. + r_mat_inv = sp.diags(r_inv) + mx = r_mat_inv.dot(mx) + return mx + + +def sparse_mx_to_torch_sparse_tensor(sparse_mx): + """Convert a scipy sparse matrix to a torch sparse tensor.""" + sparse_mx = sparse_mx.tocoo() + indices = torch.from_numpy( + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64) + ) + values = torch.Tensor(sparse_mx.data) + shape = torch.Size(sparse_mx.shape) + return torch.sparse.FloatTensor(indices, values, shape) + +def create_args(*args): + parser = argparse.ArgumentParser() + parser.add_argument('--dim', type=int, default=args[0]) + parser.add_argument('--hidden_dim', type=int, default=args[1]) + parser.add_argument('--num_layers', type=int, default=args[2]) + parser.add_argument('--c', type=int, default=args[3]) + parser.add_argument('--act', type=str, default=args[4]) + parser.add_argument('--lr', type=float, default=args[5]) + parser.add_argument('--cuda', type=int, default=args[6]) + parser.add_argument('--epochs', type=int, default=args[7]) + parser.add_argument('--seed', type=int, default=args[8]) + parser.add_argument('--eval_freq', type=int, default=args[9]) + parser.add_argument('--val_prop', type=float, default=args[10]) + parser.add_argument('--test_prop', type=float, default=args[11]) + parser.add_argument('--dropout', type=float, default=args[12]) + parser.add_argument('--beta1', type=float, default=args[13]) + parser.add_argument('--beta2', type=float, default=args[14]) + parser.add_argument('--K', type=int, default=args[15]) + parser.add_argument('--beta', type=float, default=args[16]) + parser.add_argument('--analytical_kl', type=bool, default=args[17]) + parser.add_argument('--posterior', type=str, default=args[18]) + parser.add_argument('--prior', type=str, default=args[19]) + parser.add_argument('--prior_iso', type=bool, default=args[20]) + parser.add_argument('--prior_std', type=float, default=args[21]) + parser.add_argument('--learn_prior_std', type=bool, default=args[22]) + parser.add_argument('--enc', type=str, default=args[23]) + parser.add_argument('--dec', type=str, default=args[24]) + parser.add_argument('--bias', type=bool, default=args[25]) + parser.add_argument('--alpha', type=float, default=args[26]) + parser.add_argument('--classifier', type=str, default=args[27]) + parser.add_argument('--clusterer', type=str, default=args[28]) + parser.add_argument('--log_freq', type=int, default=args[29]) + parser.add_argument('--normalize_adj', type=bool, default=args[30]) + parser.add_argument('--normalize_feats', type=bool, default=args[31]) + flags, unknown = parser.parse_known_args() + return flags + + +def get_activation(args): + if args.act == 'leaky_relu': + return nn.LeakyReLU(args.alpha) + elif args.act == 'rrelu': + return nn.RReLU() + elif args.act == 'relu': + return nn.ReLU() + elif args.act == 'elu': + return nn.ELU() + elif args.act == 'prelu': + return nn.PReLU() + elif args.act == 'selu': + return nn.SELU() + + +from Ghypeddings.classifiers import * +def perform_task(args,X,y): + if(args.classifier and args.clusterer): + print('You have to chose one of them!') + sys.exit(1) + elif(args.classifier): + if(args.classifier == 'svm'): + return SVM(X,y,args.test_prop,args.seed) + elif(args.classifier == 'mlp'): + return mlp(X,y,1,10) + elif(args.classifier == 'decision tree'): + return decision_tree(X,y,args.test_prop,args.seed) + elif(args.classifier == 'random forest'): + return random_forest(X,y,args.test_prop,args.seed) + elif(args.classifier == 'adaboost'): + return adaboost(X,y,args.test_prop,args.seed) + elif(args.classifier == 'knn'): + return KNN(X,y,args.test_prop,args.seed) + elif(args.classifier == 'naive bayes'): + return naive_bayes(X,y,args.test_prop,args.seed) + else: + raise NotImplementedError + elif(args.clusterer): + pass + else: + return 99,99,99,99,99 \ No newline at end of file diff --git a/Poincare/__init__.py b/Poincare/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9 --- /dev/null +++ b/Poincare/__init__.py @@ -0,0 +1,2 @@ +from __future__ import print_function +from __future__ import division diff --git a/Poincare/layers/__init__.py b/Poincare/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Poincare/layers/layers.py b/Poincare/layers/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..94778f8a79b92f2383dddcb7a96fc60d0fad6b70 --- /dev/null +++ b/Poincare/layers/layers.py @@ -0,0 +1,43 @@ +"""Euclidean layers.""" +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + + +def get_dim_act(args): + """ + Helper function to get dimension and activation at every layer. + :param args: + :return: + """ + if not args.act: + act = lambda x: x + else: + act = getattr(F, args.act) + acts = [act] * (args.num_layers - 1) + dims = [args.feat_dim] + ([args.dim] * (args.num_layers - 1)) + if args.task in ['lp', 'rec']: + dims += [args.dim] + acts += [act] + return dims, acts + +class Linear(Module): + """ + Simple Linear layer with dropout. + """ + + def __init__(self, in_features, out_features, dropout, act, use_bias): + super(Linear, self).__init__() + self.dropout = dropout + self.linear = nn.Linear(in_features, out_features, use_bias) + self.act = act + + def forward(self, x): + hidden = self.linear.forward(x) + hidden = F.dropout(hidden, self.dropout, training=self.training) + out = self.act(hidden) + return out diff --git a/Poincare/manifolds/__init__.py b/Poincare/manifolds/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1ac57200dff3cf341b4148b750fe1ecadb88c620 --- /dev/null +++ b/Poincare/manifolds/__init__.py @@ -0,0 +1,3 @@ +from Ghypeddings.Poincare.manifolds.base import ManifoldParameter +from Ghypeddings.Poincare.manifolds.poincare import PoincareBall +from Ghypeddings.Poincare.manifolds.euclidean import Euclidean \ No newline at end of file diff --git a/Poincare/manifolds/base.py b/Poincare/manifolds/base.py new file mode 100644 index 0000000000000000000000000000000000000000..925d4a6b2a59dae47a3a8ca33a7dcdcb20e0f08e --- /dev/null +++ b/Poincare/manifolds/base.py @@ -0,0 +1,88 @@ +"""Base manifold.""" + +from torch.nn import Parameter + + +class Manifold(object): + """ + Abstract class to define operations on a manifold. + """ + + def __init__(self): + super().__init__() + self.eps = 10e-8 + + def sqdist(self, p1, p2, c): + """Squared distance between pairs of points.""" + raise NotImplementedError + + def egrad2rgrad(self, p, dp, c): + """Converts Euclidean Gradient to Riemannian Gradients.""" + raise NotImplementedError + + def proj(self, p, c): + """Projects point p on the manifold.""" + raise NotImplementedError + + def proj_tan(self, u, p, c): + """Projects u on the tangent space of p.""" + raise NotImplementedError + + def proj_tan0(self, u, c): + """Projects u on the tangent space of the origin.""" + raise NotImplementedError + + def expmap(self, u, p, c): + """Exponential map of u at point p.""" + raise NotImplementedError + + def logmap(self, p1, p2, c): + """Logarithmic map of point p1 at point p2.""" + raise NotImplementedError + + def expmap0(self, u, c): + """Exponential map of u at the origin.""" + raise NotImplementedError + + def logmap0(self, p, c): + """Logarithmic map of point p at the origin.""" + raise NotImplementedError + + def mobius_add(self, x, y, c, dim=-1): + """Adds points x and y.""" + raise NotImplementedError + + def mobius_matvec(self, m, x, c): + """Performs hyperboic martrix-vector multiplication.""" + raise NotImplementedError + + def init_weights(self, w, c, irange=1e-5): + """Initializes random weigths on the manifold.""" + raise NotImplementedError + + def inner(self, p, c, u, v=None, keepdim=False): + """Inner product for tangent vectors at point x.""" + raise NotImplementedError + + def ptransp(self, x, y, u, c): + """Parallel transport of u from x to y.""" + raise NotImplementedError + + def ptransp0(self, x, u, c): + """Parallel transport of u from the origin to y.""" + raise NotImplementedError + + +class ManifoldParameter(Parameter): + """ + Subclass of torch.nn.Parameter for Riemannian optimization. + """ + def __new__(cls, data, requires_grad, manifold, c): + return Parameter.__new__(cls, data, requires_grad) + + def __init__(self, data, requires_grad, manifold, c): + self.c = c + self.manifold = manifold + + def __repr__(self): + return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__() diff --git a/Poincare/manifolds/euclidean.py b/Poincare/manifolds/euclidean.py new file mode 100644 index 0000000000000000000000000000000000000000..177ebb2bf8a03d211732408b84d5f5d8bbec962e --- /dev/null +++ b/Poincare/manifolds/euclidean.py @@ -0,0 +1,67 @@ +"""Euclidean manifold.""" + +from Ghypeddings.Poincare.manifolds.base import Manifold + + +class Euclidean(Manifold): + """ + Euclidean Manifold class. + """ + + def __init__(self): + super(Euclidean, self).__init__() + self.name = 'Euclidean' + + def normalize(self, p): + dim = p.size(-1) + p.view(-1, dim).renorm_(2, 0, 1.) + return p + + def sqdist(self, p1, p2, c): + return (p1 - p2).pow(2).sum(dim=-1) + + def egrad2rgrad(self, p, dp, c): + return dp + + def proj(self, p, c): + return p + + def proj_tan(self, u, p, c): + return u + + def proj_tan0(self, u, c): + return u + + def expmap(self, u, p, c): + return p + u + + def logmap(self, p1, p2, c): + return p2 - p1 + + def expmap0(self, u, c): + return u + + def logmap0(self, p, c): + return p + + def mobius_add(self, x, y, c, dim=-1): + return x + y + + def mobius_matvec(self, m, x, c): + mx = x @ m.transpose(-1, -2) + return mx + + def init_weights(self, w, c, irange=1e-5): + w.data.uniform_(-irange, irange) + return w + + def inner(self, p, c, u, v=None, keepdim=False): + if v is None: + v = u + return (u * v).sum(dim=-1, keepdim=keepdim) + + def ptransp(self, x, y, v, c): + return v + + def ptransp0(self, x, v, c): + return x + v diff --git a/Poincare/manifolds/poincare.py b/Poincare/manifolds/poincare.py new file mode 100644 index 0000000000000000000000000000000000000000..3f52cee6ada9b4a2db8f7ce5051907979a08c023 --- /dev/null +++ b/Poincare/manifolds/poincare.py @@ -0,0 +1,145 @@ +"""Poincare ball manifold.""" + +import torch + +from Ghypeddings.Poincare.manifolds.base import Manifold +from Ghypeddings.Poincare.utils.math_utils import artanh, tanh + + +class PoincareBall(Manifold): + """ + PoicareBall Manifold class. + + We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c + + Note that 1/sqrt(c) is the Poincare ball radius. + + """ + + def __init__(self, ): + super(PoincareBall, self).__init__() + self.name = 'PoincareBall' + self.min_norm = 1e-15 + self.eps = {torch.float32: 4e-3, torch.float64: 1e-5} + + def sqdist(self, p1, p2, c): + sqrt_c = c ** 0.5 + dist_c = artanh( + sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False) + ) + dist = dist_c * 2 / sqrt_c + return dist ** 2 + + def _lambda_x(self, x, c): + x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True) + return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm) + + def egrad2rgrad(self, p, dp, c): + lambda_p = self._lambda_x(p, c) + dp /= lambda_p.pow(2) + return dp + + def proj(self, x, c): + norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm) + maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5) + cond = norm > maxnorm + projected = x / norm * maxnorm + return torch.where(cond, projected, x) + + def proj_tan(self, u, p, c): + return u + + def proj_tan0(self, u, c): + return u + + def expmap(self, u, p, c): + sqrt_c = c ** 0.5 + u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + second_term = ( + tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm) + * u + / (sqrt_c * u_norm) + ) + gamma_1 = self.mobius_add(p, second_term, c) + return gamma_1 + + def logmap(self, p1, p2, c): + sub = self.mobius_add(-p1, p2, c) + sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + lam = self._lambda_x(p1, c) + sqrt_c = c ** 0.5 + return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm + + def expmap0(self, u, c): + sqrt_c = c ** 0.5 + u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm) + gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm) + return gamma_1 + + def logmap0(self, p, c): + sqrt_c = c ** 0.5 + p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm) + scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm + return scale * p + + def mobius_add(self, x, y, c, dim=-1): + x2 = x.pow(2).sum(dim=dim, keepdim=True) + y2 = y.pow(2).sum(dim=dim, keepdim=True) + xy = (x * y).sum(dim=dim, keepdim=True) + num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y + denom = 1 + 2 * c * xy + c ** 2 * x2 * y2 + return num / denom.clamp_min(self.min_norm) + + def mobius_matvec(self, m, x, c): + sqrt_c = c ** 0.5 + x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm) + mx = x @ m.transpose(-1, -2) + mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm) + res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c) + cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8) + res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device) + res = torch.where(cond, res_0, res_c) + return res + + def init_weights(self, w, c, irange=1e-5): + w.data.uniform_(-irange, irange) + return w + + def _gyration(self, u, v, w, c, dim: int = -1): + u2 = u.pow(2).sum(dim=dim, keepdim=True) + v2 = v.pow(2).sum(dim=dim, keepdim=True) + uv = (u * v).sum(dim=dim, keepdim=True) + uw = (u * w).sum(dim=dim, keepdim=True) + vw = (v * w).sum(dim=dim, keepdim=True) + c2 = c ** 2 + a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw + b = -c2 * vw * u2 - c * uw + d = 1 + 2 * c * uv + c2 * u2 * v2 + return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm) + + def inner(self, x, c, u, v=None, keepdim=False): + if v is None: + v = u + lambda_x = self._lambda_x(x, c) + return lambda_x ** 2 * (u * v).sum(dim=-1, keepdim=keepdim) + + def ptransp(self, x, y, u, c): + lambda_x = self._lambda_x(x, c) + lambda_y = self._lambda_x(y, c) + return self._gyration(y, -x, u, c) * lambda_x / lambda_y + + def ptransp_(self, x, y, u, c): + lambda_x = self._lambda_x(x, c) + lambda_y = self._lambda_x(y, c) + return self._gyration(y, -x, u, c) * lambda_x / lambda_y + + def ptransp0(self, x, u, c): + lambda_x = self._lambda_x(x, c) + return 2 * u / lambda_x.clamp_min(self.min_norm) + + def to_hyperboloid(self, x, c): + K = 1./ c + sqrtK = K ** 0.5 + sqnorm = torch.norm(x, p=2, dim=1, keepdim=True) ** 2 + return sqrtK * torch.cat([K + sqnorm, 2 * sqrtK * x], dim=1) / (K - sqnorm) + diff --git a/Poincare/models/__init__.py b/Poincare/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Poincare/models/base_models.py b/Poincare/models/base_models.py new file mode 100644 index 0000000000000000000000000000000000000000..142b9371cf08248d096f0ab313dd70fa8707f768 --- /dev/null +++ b/Poincare/models/base_models.py @@ -0,0 +1,77 @@ +"""Base model class.""" + +import numpy as np +from sklearn.metrics import roc_auc_score, average_precision_score +import torch +import torch.nn as nn +import torch.nn.functional as F + +import Ghypeddings.Poincare.manifolds as manifolds +import Ghypeddings.Poincare.models.encoders as encoders +from Ghypeddings.Poincare.models.decoders import model2decoder +from Ghypeddings.Poincare.utils.eval_utils import acc_f1 + + +class BaseModel(nn.Module): + """ + Base model for graph embedding tasks. + """ + + def __init__(self, args): + super(BaseModel, self).__init__() + self.manifold_name = 'PoincareBall' + self.c = torch.tensor([1.0]) + if not args.cuda == -1: + self.c = self.c.to(args.device) + self.manifold = getattr(manifolds, self.manifold_name)() + self.nnodes = args.n_nodes + self.encoder = getattr(encoders, 'Shallow')(self.c, args) + + def encode(self, x): + h = self.encoder.encode(x) + return h + + def compute_metrics(self, embeddings, data, split): + raise NotImplementedError + + def init_metric_dict(self): + raise NotImplementedError + + def has_improved(self, m1, m2): + raise NotImplementedError + + +class NCModel(BaseModel): + """ + Base model for node classification task. + """ + + def __init__(self, args): + super(NCModel, self).__init__(args) + self.decoder = model2decoder(1.0, args) + if args.n_classes > 2: + self.f1_average = 'micro' + else: + self.f1_average = 'binary' + + self.weights = torch.Tensor([1.] * args.n_classes) + if not args.cuda == -1: + self.weights = self.weights.to(args.device) + + def decode(self, h, idx): + output = self.decoder.decode(h) + return F.log_softmax(output[idx], dim=1) + + def compute_metrics(self, embeddings, data, split): + idx = data[f'idx_{split}'] + output = self.decode(embeddings, idx) + loss = F.nll_loss(output, data['labels'][idx], self.weights) + acc, f1,recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average) + metrics = {'loss': loss, 'acc': acc, 'f1': f1,'recall':recall,'precision':precision,'roc_auc':roc_auc} + return metrics + + def init_metric_dict(self): + return {'acc': -1, 'f1': -1} + + def has_improved(self, m1, m2): + return m1["f1"] < m2["f1"] \ No newline at end of file diff --git a/Poincare/models/decoders.py b/Poincare/models/decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..8532b62830f9b8d0a050d64b23f2dc1b84ab8bd1 --- /dev/null +++ b/Poincare/models/decoders.py @@ -0,0 +1,46 @@ +"""Graph decoders.""" +import Ghypeddings.Poincare.manifolds as manifolds +import torch.nn as nn +import torch.nn.functional as F +from Ghypeddings.Poincare.layers.layers import Linear +import torch + +class Decoder(nn.Module): + """ + Decoder abstract class for node classification tasks. + """ + + def __init__(self, c): + super(Decoder, self).__init__() + self.c = c + + def decode(self, x): + probs = self.cls.forward(x) + return probs + + +class LinearDecoder(Decoder): + """ + MLP Decoder for Hyperbolic/Euclidean node classification models. + """ + + def __init__(self, c, args): + super(LinearDecoder, self).__init__(c) + self.manifold = getattr(manifolds, 'PoincareBall')() + self.input_dim = args.dim + args.feat_dim + self.output_dim = args.n_classes + self.bias = True + self.cls = Linear(self.input_dim, self.output_dim, args.dropout, lambda x: x, self.bias) + + def decode(self, x): + h = self.manifold.proj_tan0(self.manifold.logmap0(x, c=self.c), c=self.c) + return super(LinearDecoder, self).decode(h) + + def extra_repr(self): + return 'in_features={}, out_features={}, bias={}, c={}'.format( + self.input_dim, self.output_dim, self.bias, self.c + ) + + +model2decoder = LinearDecoder + diff --git a/Poincare/models/encoders.py b/Poincare/models/encoders.py new file mode 100644 index 0000000000000000000000000000000000000000..42e6504898f0f6e85db56f4fd597c467890e205a --- /dev/null +++ b/Poincare/models/encoders.py @@ -0,0 +1,42 @@ +"""Graph encoders.""" + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +import Ghypeddings.Poincare.manifolds as manifolds + +class Encoder(nn.Module): + """ + Encoder abstract class. + """ + + def __init__(self, c): + super(Encoder, self).__init__() + self.c = c + + def encode(self, x): + pass + +class Shallow(Encoder): + """ + Shallow Embedding method. + Learns embeddings or loads pretrained embeddings and uses an MLP for classification. + """ + + def __init__(self, c, args): + super(Shallow, self).__init__(c) + self.manifold = getattr(manifolds, 'PoincareBall')() + weights = torch.Tensor(args.n_nodes, args.dim) + weights = self.manifold.init_weights(weights, self.c) + trainable = True + self.lt = manifolds.ManifoldParameter(weights, trainable, self.manifold, self.c) + self.all_nodes = torch.LongTensor(list(range(args.n_nodes))) + layers = [] + self.layers = nn.Sequential(*layers) + + def encode(self, x): + h = self.lt[self.all_nodes, :] + h = torch.cat((h, x), 1) + return h diff --git a/Poincare/optimizers/__init__.py b/Poincare/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6b0d929f33f4e20f83e7cc3ce87c9fa8fd359447 --- /dev/null +++ b/Poincare/optimizers/__init__.py @@ -0,0 +1,2 @@ +from torch.optim import Adam +from Ghypeddings.Poincare.optimizers.radam import RiemannianAdam diff --git a/Poincare/optimizers/radam.py b/Poincare/optimizers/radam.py new file mode 100644 index 0000000000000000000000000000000000000000..f4904422f52d271dc7de85ed3069ef9972f3015b --- /dev/null +++ b/Poincare/optimizers/radam.py @@ -0,0 +1,172 @@ +"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/).""" +import torch.optim +from Ghypeddings.Poincare.manifolds import Euclidean, ManifoldParameter + +_default_manifold = Euclidean() + + +class OptimMixin(object): + def __init__(self, *args, stabilize=None, **kwargs): + self._stabilize = stabilize + super().__init__(*args, **kwargs) + + def stabilize_group(self, group): + pass + + def stabilize(self): + """Stabilize parameters if they are off-manifold due to numerical reasons + """ + for group in self.param_groups: + self.stabilize_group(group) + + +def copy_or_set_(dest, source): + """ + A workaround to respect strides of :code:`dest` when copying :code:`source` + (https://github.com/geoopt/geoopt/issues/70) + Parameters + ---------- + dest : torch.Tensor + Destination tensor where to store new data + source : torch.Tensor + Source data to put in the new tensor + Returns + ------- + dest + torch.Tensor, modified inplace + """ + if dest.stride() != source.stride(): + return dest.copy_(source) + else: + return dest.set_(source) + + +class RiemannianAdam(OptimMixin, torch.optim.Adam): + r"""Riemannian Adam with the same API as :class:`torch.optim.Adam` + Parameters + ---------- + params : iterable + iterable of parameters to optimize or dicts defining + parameter groups + lr : float (optional) + learning rate (default: 1e-3) + betas : Tuple[float, float] (optional) + coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + eps : float (optional) + term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay : float (optional) + weight decay (L2 penalty) (default: 0) + amsgrad : bool (optional) + whether to use the AMSGrad variant of this + algorithm from the paper `On the Convergence of Adam and Beyond`_ + (default: False) + Other Parameters + ---------------- + stabilize : int + Stabilize parameters if they are off-manifold due to numerical + reasons every ``stabilize`` steps (default: ``None`` -- no stabilize) + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def step(self, closure=None): + """Performs a single optimization step. + Arguments + --------- + closure : callable (optional) + A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + with torch.no_grad(): + for group in self.param_groups: + if "step" not in group: + group["step"] = 0 + betas = group["betas"] + weight_decay = group["weight_decay"] + eps = group["eps"] + learning_rate = group["lr"] + amsgrad = group["amsgrad"] + for point in group["params"]: + grad = point.grad + if grad is None: + continue + if isinstance(point, (ManifoldParameter)): + manifold = point.manifold + c = point.c + else: + manifold = _default_manifold + c = None + if grad.is_sparse: + raise RuntimeError( + "Riemannian Adam does not support sparse gradients yet (PR is welcome)" + ) + + state = self.state[point] + + # State initialization + if len(state) == 0: + state["step"] = 0 + # Exponential moving average of gradient values + state["exp_avg"] = torch.zeros_like(point) + # Exponential moving average of squared gradient values + state["exp_avg_sq"] = torch.zeros_like(point) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state["max_exp_avg_sq"] = torch.zeros_like(point) + # make local variables for easy access + exp_avg = state["exp_avg"] + exp_avg_sq = state["exp_avg_sq"] + # actual step + grad.add_(weight_decay, point) + grad = manifold.egrad2rgrad(point, grad, c) + exp_avg.mul_(betas[0]).add_(1 - betas[0], grad) + exp_avg_sq.mul_(betas[1]).add_( + 1 - betas[1], manifold.inner(point, c, grad, keepdim=True) + ) + if amsgrad: + max_exp_avg_sq = state["max_exp_avg_sq"] + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = max_exp_avg_sq.sqrt().add_(eps) + else: + denom = exp_avg_sq.sqrt().add_(eps) + group["step"] += 1 + bias_correction1 = 1 - betas[0] ** group["step"] + bias_correction2 = 1 - betas[1] ** group["step"] + step_size = ( + learning_rate * bias_correction2 ** 0.5 / bias_correction1 + ) + # copy the state, we need it for retraction + # get the direction for ascend + direction = exp_avg / denom + # transport the exponential averaging to the new point + new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c) + exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c) + # use copy only for user facing point + copy_or_set_(point, new_point) + exp_avg.set_(exp_avg_new) + + group["step"] += 1 + if self._stabilize is not None and group["step"] % self._stabilize == 0: + self.stabilize_group(group) + return loss + + @torch.no_grad() + def stabilize_group(self, group): + for p in group["params"]: + if not isinstance(p, ManifoldParameter): + continue + state = self.state[p] + if not state: # due to None grads + continue + manifold = p.manifold + c = p.c + exp_avg = state["exp_avg"] + copy_or_set_(p, manifold.proj(p, c)) + exp_avg.set_(manifold.proj_tan(exp_avg, u, c)) diff --git a/Poincare/poincare.py b/Poincare/poincare.py new file mode 100644 index 0000000000000000000000000000000000000000..e28549492de0664fdd1aa02a717a2782e27f69f2 --- /dev/null +++ b/Poincare/poincare.py @@ -0,0 +1,155 @@ +from __future__ import division +from __future__ import print_function + +import logging +import os +import time + +import numpy as np +import Ghypeddings.Poincare.optimizers as optimizers +import torch +from Ghypeddings.Poincare.models.base_models import NCModel +from Ghypeddings.Poincare.utils.data_utils import process_data +from Ghypeddings.Poincare.utils.train_utils import format_metrics, create_args + + +class POINCARE: + def __init__(self, + adj, + features, + labels, + dim, + grad_clip=None, + weight_decay=0.01, + lr=0.1, + gamma=0.5, + lr_reduce_freq=500, + cuda=0, + epochs=50, + min_epochs=50, + patience=None, + seed=42, + log_freq=1, + eval_freq=1, + val_prop=0.5, + test_prop=0.3, + double_precision=0, + dropout=0.1, + normalize_adj=False, + normalize_feats=True): + self.args = create_args(dim,grad_clip,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats) + self.args.n_nodes = adj.shape[0] + self.args.feat_dim = features.shape[1] + self.args.n_classes = len(np.unique(labels)) + self.data = process_data(self.args,adj,features,labels) + + np.random.seed(self.args.seed) + torch.manual_seed(self.args.seed) + if int(self.args.double_precision): + torch.set_default_dtype(torch.float64) + if int(self.args.cuda) >= 0: + torch.cuda.manual_seed(self.args.seed) + self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu' + self.args.patience = self.args.epochs if not self.args.patience else int(self.args.patience) + if not self.args.lr_reduce_freq: + self.args.lr_reduce_freq = self.args.epochs + self.model = NCModel(self.args) + self.optimizer = getattr(optimizers, 'RiemannianAdam')(params=self.model.parameters(), lr=self.args.lr, + weight_decay=self.args.weight_decay) + self.lr_scheduler = torch.optim.lr_scheduler.StepLR( + self.optimizer, + step_size=int(self.args.lr_reduce_freq), + gamma=float(self.args.gamma) + ) + + if self.args.cuda is not None and int(self.args.cuda) >= 0 : + os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda) + self.model = self.model.to(self.args.device) + for x, val in self.data.items(): + if torch.is_tensor(self.data[x]): + self.data[x] = self.data[x].to(self.args.device) + self.best_emb = None + + + def fit(self): + + logging.getLogger().setLevel(logging.INFO) + logging.info(str(self.model)) + tot_params = sum([np.prod(p.size()) for p in self.model.parameters()]) + logging.info(f"Total number of parameters: {tot_params}") + + t_total = time.time() + counter = 0 + best_val_metrics = self.model.init_metric_dict() + + best_losses = [] + real_losses = [] + + for epoch in range(self.args.epochs): + t = time.time() + self.model.train() + self.optimizer.zero_grad() + embeddings = self.model.encode(self.data['features']) + assert not torch.isnan(embeddings).any() + train_metrics = self.model.compute_metrics(embeddings, self.data, 'train') + train_metrics['loss'].backward() + if self.args.grad_clip is not None: + max_norm = float(self.args.grad_clip) + all_params = list(self.model.parameters()) + for param in all_params: + torch.nn.utils.clip_grad_norm_(param, max_norm) + self.optimizer.step() + self.lr_scheduler.step() + + real_losses.append(train_metrics['loss'].item()) + if(len(best_losses) == 0): + best_losses.append(real_losses[0]) + elif (best_losses[-1] > real_losses[-1]): + best_losses.append(real_losses[-1]) + else: + best_losses.append(best_losses[-1]) + + + if (epoch + 1) % self.args.log_freq == 0: + logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), + 'lr: {}'.format(self.lr_scheduler.get_lr()[0]), + format_metrics(train_metrics, 'train'), + 'time: {:.4f}s'.format(time.time() - t) + ])) + if (epoch + 1) % self.args.eval_freq == 0: + self.model.eval() + embeddings = self.model.encode(self.data['features']) + val_metrics = self.model.compute_metrics(embeddings, self.data, 'val') + + if (epoch + 1) % self.args.log_freq == 0: + logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')])) + + if self.model.has_improved(best_val_metrics, val_metrics): + self.best_emb = embeddings + best_val_metrics = val_metrics + counter = 0 + else: + counter += 1 + if counter == self.args.patience and epoch > self.args.min_epochs: + logging.info("Early stopping") + break + + logging.info("Training Finished!") + logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total)) + + return {'real':real_losses,'best':best_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total + + def predict(self): + self.model.eval() + embeddings = self.model.encode(self.data['features']) + val_metrics = self.model.compute_metrics(embeddings, self.data, 'test') + return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc'] + + def save_embeddings(self): + tb_embeddings_euc = self.model.manifold.logmap0(self.best_emb,self.model.decoder.c) + for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1))) + for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1))) + hyp_file_path = os.path.join(os.getcwd(),'poincare_embeddings_hyp.csv') + euc_file_path = os.path.join(os.getcwd(),'poincare_embeddings_euc.csv') + np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',') + np.savetxt(euc_file_path, for_classification_euc, delimiter=',') \ No newline at end of file diff --git a/Poincare/utils/__init__.py b/Poincare/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Poincare/utils/data_utils.py b/Poincare/utils/data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..bc5c634801fe17a9231ff2f582dfcae159377ad3 --- /dev/null +++ b/Poincare/utils/data_utils.py @@ -0,0 +1,83 @@ +"""Data utils functions for pre-processing and data loading.""" +import os +import pickle as pkl +import sys + +import networkx as nx +import numpy as np +import scipy.sparse as sp +import torch + + +def process_data(args, adj,features,labels): + data = process_data_nc(args,adj,features,labels) + data['adj_train_norm'], data['features'] = process( + data['adj_train'], data['features'], args.normalize_adj,args.normalize_feats + ) + return data + +def process(adj, features, normalize_adj, normalize_feats): + if sp.isspmatrix(features): + features = np.array(features.todense()) + if normalize_feats: + features = normalize(features) + features = torch.Tensor(features) + if normalize_adj: + adj = normalize(adj + sp.eye(adj.shape[0])) + adj = sparse_mx_to_torch_sparse_tensor(adj) + return adj, features + + +def normalize(mx): + """Row-normalize sparse matrix.""" + rowsum = np.array(mx.sum(1)) + r_inv = np.power(rowsum, -1).flatten() + r_inv[np.isinf(r_inv)] = 0. + r_mat_inv = sp.diags(r_inv) + mx = r_mat_inv.dot(mx) + return mx + + +def sparse_mx_to_torch_sparse_tensor(sparse_mx): + """Convert a scipy sparse matrix to a torch sparse tensor.""" + sparse_mx = sparse_mx.tocoo() + indices = torch.from_numpy( + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64) + ) + values = torch.Tensor(sparse_mx.data) + shape = torch.Size(sparse_mx.shape) + return torch.sparse.FloatTensor(indices, values, shape) + + +def augment(adj, features, normalize_feats=True): + deg = np.squeeze(np.sum(adj, axis=0).astype(int)) + deg[deg > 5] = 5 + deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze() + const_f = torch.ones(features.size(0), 1) + features = torch.cat((features, deg_onehot, const_f), dim=1) + return features + +def split_data(labels, val_prop, test_prop, seed): + np.random.seed(seed) + nb_nodes = labels.shape[0] + all_idx = np.arange(nb_nodes) + pos_idx = labels.nonzero()[0] + neg_idx = (1. - labels).nonzero()[0] + np.random.shuffle(pos_idx) + np.random.shuffle(neg_idx) + pos_idx = pos_idx.tolist() + neg_idx = neg_idx.tolist() + nb_pos_neg = min(len(pos_idx), len(neg_idx)) + nb_val = round(val_prop * nb_pos_neg) + nb_test = round(test_prop * nb_pos_neg) + idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[ + nb_val + nb_test:] + idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[ + nb_val + nb_test:] + return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg + +def process_data_nc(args,adj,features,labels): + idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed) + labels = torch.LongTensor(labels) + data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test} + return data diff --git a/Poincare/utils/eval_utils.py b/Poincare/utils/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7494c5f3e618155257bfa1f6af2a2c91acd2f526 --- /dev/null +++ b/Poincare/utils/eval_utils.py @@ -0,0 +1,14 @@ +from sklearn.metrics import accuracy_score, f1_score,precision_score,recall_score,roc_auc_score + +def acc_f1(output, labels, average='binary'): + preds = output.max(1)[1].type_as(labels) + if preds.is_cuda: + preds = preds.cpu() + labels = labels.cpu() + accuracy = accuracy_score(labels,preds) + recall = recall_score(labels,preds) + precision = precision_score(labels,preds) + roc_auc = roc_auc_score(labels,preds) + f1 = f1_score(labels,preds, average=average) + return accuracy, f1,recall,precision,roc_auc + diff --git a/Poincare/utils/math_utils.py b/Poincare/utils/math_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a2fee953984adca2f6f271db79f2b5624d9ad5bd --- /dev/null +++ b/Poincare/utils/math_utils.py @@ -0,0 +1,69 @@ +"""Math utils functions.""" + +import torch + + +def cosh(x, clamp=15): + return x.clamp(-clamp, clamp).cosh() + + +def sinh(x, clamp=15): + return x.clamp(-clamp, clamp).sinh() + + +def tanh(x, clamp=15): + return x.clamp(-clamp, clamp).tanh() + + +def arcosh(x): + return Arcosh.apply(x) + + +def arsinh(x): + return Arsinh.apply(x) + + +def artanh(x): + return Artanh.apply(x) + + +class Artanh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + x = x.clamp(-1 + 1e-7, 1 - 1e-7) + ctx.save_for_backward(x) + z = x.double() + return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (1 - input ** 2) + + +class Arsinh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + z = x.double() + return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-7).log_().to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (1 + input ** 2) ** 0.5 + + +class Arcosh(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + x = x.clamp(min=1.0 + 1e-7) + ctx.save_for_backward(x) + z = x.double() + return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-7).log_().to(x.dtype) + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + return grad_output / (input ** 2 - 1) ** 0.5 + diff --git a/Poincare/utils/train_utils.py b/Poincare/utils/train_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fb43e0d3044366d8c4d1c0bded82fa2b4e477edd --- /dev/null +++ b/Poincare/utils/train_utils.py @@ -0,0 +1,38 @@ +import os + +import numpy as np +import torch +import torch.nn.functional as F +import torch.nn.modules.loss +import argparse + + +def format_metrics(metrics, split): + """Format metric in metric dict for logging.""" + return " ".join( + ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()]) + + +def create_args(*args): + parser = argparse.ArgumentParser() + parser.add_argument('--dim', type=int, default=args[0]) + parser.add_argument('--grad_clip', type=float, default=args[1]) + parser.add_argument('--weight_decay', type=float, default=args[2]) + parser.add_argument('--lr', type=float, default=args[3]) + parser.add_argument('--gamma', type=float, default=args[4]) + parser.add_argument('--lr_reduce_freq', type=int, default=args[5]) + parser.add_argument('--cuda', type=int, default=args[6]) + parser.add_argument('--epochs', type=int, default=args[7]) + parser.add_argument('--min_epochs', type=int, default=args[8]) + parser.add_argument('--patience', type=int, default=args[9]) + parser.add_argument('--seed', type=int, default=args[10]) + parser.add_argument('--log_freq', type=int, default=args[11]) + parser.add_argument('--eval_freq', type=int, default=args[12]) + parser.add_argument('--val_prop', type=float, default=args[13]) + parser.add_argument('--test_prop', type=float, default=args[14]) + parser.add_argument('--double_precision', type=int, default=args[15]) + parser.add_argument('--dropout', type=float, default=args[16]) + parser.add_argument('--normalize_adj', type=bool, default=args[17]) + parser.add_argument('--normalize_feats', type=bool, default=args[18]) + flags, unknown = parser.parse_known_args() + return flags \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b0dec0fbdb6edb2deaadf67c24db44e9dd509930 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# G-Hypeddings diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad005b655e398136449951499c1fdb39e547cd5c --- /dev/null +++ b/__init__.py @@ -0,0 +1,10 @@ +from Ghypeddings.H2HGCN.h2hgcn import H2HGCN +from Ghypeddings.HGCAE.hgcae import HGCAE +from Ghypeddings.HGCN.hgcn import HGCN +from Ghypeddings.HGNN.hgnn import HGNN +from Ghypeddings.Poincare.poincare import POINCARE +from Ghypeddings.PVAE.pvae import PVAE + +from Ghypeddings.datasets.datasets import CIC_DDoS2019 +from Ghypeddings.datasets.datasets import AWID3 +from Ghypeddings.datasets.datasets import TON_IoT \ No newline at end of file diff --git a/classifiers/__init__.py b/classifiers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..74f7dd6b3457945bb91e5e4a35e11e1ff37cc23b --- /dev/null +++ b/classifiers/__init__.py @@ -0,0 +1,7 @@ +from Ghypeddings.classifiers.svm import SVM +from Ghypeddings.classifiers.mlp import mlp +from Ghypeddings.classifiers.decision_tree import decision_tree +from Ghypeddings.classifiers.random_forest import random_forest +from Ghypeddings.classifiers.adaboost import adaboost +from Ghypeddings.classifiers.knn import KNN +from Ghypeddings.classifiers.naive_bayes import naive_bayes \ No newline at end of file diff --git a/classifiers/adaboost.py b/classifiers/adaboost.py new file mode 100644 index 0000000000000000000000000000000000000000..c22a107ee4a296ff621ab57ba32249d3723bd992 --- /dev/null +++ b/classifiers/adaboost.py @@ -0,0 +1,16 @@ +from sklearn.ensemble import AdaBoostClassifier +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score + + +def adaboost(X,y,test_split,seed,n_estimators=10): + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed) + ada_boost = AdaBoostClassifier(n_estimators=n_estimators, random_state=seed) + ada_boost.fit(X_train, y_train) + y_pred = ada_boost.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + recall = recall_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + roc_auc = roc_auc_score(y_test, y_pred) + return accuracy,f1,recall,precision,roc_auc \ No newline at end of file diff --git a/classifiers/decision_tree.py b/classifiers/decision_tree.py new file mode 100644 index 0000000000000000000000000000000000000000..2ae89c09289a0c313a2fca52e068d93e7fb0d7cf --- /dev/null +++ b/classifiers/decision_tree.py @@ -0,0 +1,15 @@ +from sklearn.tree import DecisionTreeClassifier +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score + +def decision_tree(X,y,test_split,seed,max_depth=4): + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed) + clf = DecisionTreeClassifier(max_depth=max_depth) + clf.fit(X_train, y_train) + y_pred = clf.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + recall = recall_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + roc_auc = roc_auc_score(y_test, y_pred) + return accuracy,f1,recall,precision,roc_auc \ No newline at end of file diff --git a/classifiers/knn.py b/classifiers/knn.py new file mode 100644 index 0000000000000000000000000000000000000000..98a5b0c0f3547f3e43a31049d979e1c05d24a3dd --- /dev/null +++ b/classifiers/knn.py @@ -0,0 +1,15 @@ +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score + +def KNN(X,y,test_split,seed,k=20): + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed) + knn = KNeighborsClassifier(n_neighbors=k) + knn.fit(X_train, y_train) + y_pred = knn.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + recall = recall_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + roc_auc = roc_auc_score(y_test, y_pred) + return accuracy,f1,recall,precision,roc_auc \ No newline at end of file diff --git a/classifiers/mlp.py b/classifiers/mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..44fe0d9ccc453eddd6690b0e909b45bf1aa390a4 --- /dev/null +++ b/classifiers/mlp.py @@ -0,0 +1,17 @@ +from sklearn.neural_network import MLPClassifier +import time +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score + +def mlp(X,y,n_hidden_layers,hidden_dim,epochs=50,batch_size=64,test_split=.3,seed=42): + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed) + mlp = MLPClassifier(hidden_layer_sizes=(n_hidden_layers, hidden_dim),learning_rate='adaptive',batch_size=batch_size ,activation='relu', solver='adam', max_iter=epochs, random_state=seed) + mlp.fit(X_train, y_train) + y_pred = mlp.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + recall = recall_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + roc_auc = roc_auc_score(y_test, y_pred) + return accuracy,f1,recall,precision,roc_auc \ No newline at end of file diff --git a/classifiers/naive_bayes.py b/classifiers/naive_bayes.py new file mode 100644 index 0000000000000000000000000000000000000000..3a084829077a2b70c231f202f4231275725001c1 --- /dev/null +++ b/classifiers/naive_bayes.py @@ -0,0 +1,15 @@ +from sklearn.naive_bayes import GaussianNB +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score + +def naive_bayes(X,y,test_split,seed): + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed) + clf = GaussianNB() + clf.fit(X_train, y_train) + y_pred = clf.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + recall = recall_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + roc_auc = roc_auc_score(y_test, y_pred) + return accuracy,f1,recall,precision,roc_auc \ No newline at end of file diff --git a/classifiers/random_forest.py b/classifiers/random_forest.py new file mode 100644 index 0000000000000000000000000000000000000000..4d1c9441385a30e9a40bb815fca1d7783d1624a3 --- /dev/null +++ b/classifiers/random_forest.py @@ -0,0 +1,18 @@ +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score + + + + +def random_forest(X,y,test_split,seed,n_estimators=10,max_depth=4,max_features='sqrt'): + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed) + clf = RandomForestClassifier(max_features=max_features,n_estimators=n_estimators, max_depth=max_depth, random_state=seed) + clf.fit(X_train, y_train) + y_pred = clf.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + recall = recall_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + roc_auc = roc_auc_score(y_test, y_pred) + return accuracy,f1,recall,precision,roc_auc \ No newline at end of file diff --git a/classifiers/svm.py b/classifiers/svm.py new file mode 100644 index 0000000000000000000000000000000000000000..523a92b9b32c2ba382480adab900cbb5e5e6966e --- /dev/null +++ b/classifiers/svm.py @@ -0,0 +1,23 @@ +from sklearn import svm +import sklearn.model_selection as model_selection +from sklearn.metrics import accuracy_score +from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score + + +def SVM(X,y,test_split,seed,kernel='rbf',gamma=.5,C=.1,degree=3,average='binary'): + X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=1-test_split, test_size=test_split, random_state=seed) + + if(kernel == 'rbf'): + model = svm.SVC(kernel='rbf', gamma=gamma, C=C).fit(X_train, y_train) + elif(kernel == 'poly'): + model = svm.SVC(kernel='poly', degree=degree, C=C).fit(X_train, y_train) + else: + raise NotImplementedError + + y_pred = model.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred, average=average) + recall = recall_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + roc_auc = roc_auc_score(y_test, y_pred) + return accuracy,f1,recall,precision,roc_auc \ No newline at end of file diff --git a/clusterers/__init__.py b/clusterers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/datasets/__init__.py b/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/datasets/datasets.py b/datasets/datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..9e49310420190494ae7211c3e0381c656a9ade37 --- /dev/null +++ b/datasets/datasets.py @@ -0,0 +1,395 @@ +import os + +import pandas as pd +import numpy as np +from sklearn.preprocessing import MinMaxScaler +import pickle +import hashlib +from sklearn.preprocessing import LabelEncoder + +class Dataset: + def __init__(self,directory,adj_path,features_path,labels_path): + self.adj_path = adj_path + self.features_path = features_path + self.labels_path = labels_path + self.directory = directory + + def _get_files(self): + return [os.path.join(self.directory,file) for file in os.listdir(self.directory) if os.path.isfile(os.path.join(self.directory, file))] + + def save_samples(self,adj,features,labels): + with open(self.adj_path,'wb') as f: + pickle.dump(adj,f) + with open(self.features_path,'wb') as f: + pickle.dump(features,f) + with open(self.labels_path,'wb') as f: + pickle.dump(labels,f) + + def load_samples(self): + with open(self.adj_path,'rb') as f: + adj = pickle.load(f) + with open(self.features_path,'rb') as f: + features = pickle.load(f) + with open(self.labels_path,'rb') as f: + labels = pickle.load(f) + print('features:',features.shape) + return adj,features,labels + +class CIC_DDoS2019(Dataset): + def __init__(self): + super().__init__( + directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','original'), + features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','features.pkl'), + adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','adjacency.pkl'), + labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','labels.pkl') + ) + self.n_classes = 2 + + def build(self,n_nodes): + df = self._create_file_bc(n_nodes) + columns_to_exclude = ['Unnamed: 0', 'Flow ID', ' Source IP', ' Destination IP', ' Timestamp', 'SimillarHTTP'] + df = df.dropna(subset=df.columns.difference(columns_to_exclude)) + for column in df.columns: + max_value = df.loc[df[column] != np.inf, column].max() + min_value = df.loc[df[column] != -np.inf, column].min() + df.loc[df[column] == np.inf, column] = max_value + df.loc[df[column] == -np.inf, column] = min_value + data = df.to_numpy() + N = data.shape[0] + labels = np.where(data[:,87] == 'BENIGN', 0,1) + adj = self._filling_adjacency_numpy(data, N, 2, 4) + columns_to_exclude.append(' Label') + df.drop(columns_to_exclude, axis=1, inplace=True) + features = df.to_numpy() + scaler = MinMaxScaler() + features = scaler.fit_transform(features) + return adj, features, labels + + def _load_file(self,path,max_per_class,list_classes=[]): + df = pd.read_csv(path,low_memory=False) + if(len(list_classes)): + df = df[df[' Label'].isin(list_classes)] + df = df.groupby([' Label']).apply(lambda x: x.sample(max_per_class)).reset_index(drop=True) + return df + + def _create_file_bc(self,n_nodes): + file_paths = self._get_files() + max_per_class = int(n_nodes / (self.n_classes * len(file_paths))) +1 + dfs = [] + for path in file_paths: + class_name = path.split('\\')[-1].split('.')[0] + list_classes = ['BENIGN',class_name] + df = self._load_file(path,max_per_class,list_classes) + dfs.append(df) + print('finishing loading the file : {}'.format(path)) + df = pd.concat(dfs, ignore_index=True) + df = df.sample(frac=1).reset_index(drop=True) + print(df[' Label'].value_counts()) + return df + + def _filling_adjacency_numpy(self,data, N, source_ip_index, destination_ip_index): + try: + adjacency = np.zeros((N,N), dtype=bool) + except Exception as e: + print(f"An error occurred: {e}") + source_ips = data[:, source_ip_index] + destination_ips = data[:, destination_ip_index] + mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips)) + adjacency[mask] = True + return adjacency + +class AWID3(Dataset): + def __init__(self): + super().__init__( + directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','original'), + features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','features.pkl'), + adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','adjacency.pkl'), + labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','labels.pkl') + ) + self.n_classes = 2 + + def _hex_to_int(self,hex_string): + if('.' in hex_string): + print(hex_string) + hex_string = int(float(hex_string)) + return hex_string + return int(str(hex_string), 16) + + def _hash_value(self,old_value): + return hash(str(old_value))%1e19 + + def _encode_llc(self,old_value): + return len(str(old_value).split('-')) + + + def _encode_multiple_hex(self,old_value): + words = str(old_value).split('-') + return sum([self._hex_to_int(self._month_to_string(word)) for word in words]) + + def _encode_checksum_status(self,old_value): + words = str(old_value) + if '2' in words or '02' in words: + return 2 + else: + return 0 + + def _encode_to_binary(self,old_value): + words = str(old_value) + if '1' in words or 'Jan' in words: + return 1 + else: + return 0 + + def _month_to_string(self, month): + if month == 'Jan': + return '1' + elif month == 'Feb': + return '2' + elif month == 'Mar': + return '3' + elif month == 'Apr': + return '4' + elif month == 'May': + return '5' + elif month == 'Jun': + return '6' + elif month == 'Jul': + return '7' + elif month == 'Aug': + return '8' + elif month == 'Sep': + return '9' + elif month == 'Oct': + return '10' + elif month == 'Nov': + return '11' + elif month == 'Dec': + return '12' + else: + return month + + def _encode_to_avg(self,old_value): + if type(old_value) in [int,float]: + return old_value + if 'e-' in old_value: + words = float(old_value) + return words + else: + words = str(old_value).split('-') + words = [float(self._month_to_string(i)) for i in words] + return np.sum(words) + + def _encode_antsignal(self,old_value): + if type(old_value) in [int,float]: + return old_value + if 'e-' in old_value: + words = float(old_value) + return words + else: + words = str(old_value).split('-') + words = [-1*float(self._month_to_string(i)) for i in words if i != ''] + return np.sum(words) + + def _encode_http_request_method(self,old_value): + return hash(str(old_value))%100 + + def _encode_tls_protocol(self,old_value): + words = str(old_value) + if 'http2' in words: + return 1 + elif 'over' in words: + return 2 + else: + return 0 + + def _encode_ip_version(self,old_value): + words = str(old_value) + if '4' in words or '04' in words or 'Apr' in words: + return 4 + else: + return 6 + + def _encode_ip_protocol(self,old_value): + words = str(old_value) + if '17' in words: + return 17 + elif '6': + return 6 + elif '2': + return 2 + else: + return 0 + + def _process_data(self,df): + + df.drop(['frame.number','frame.time','wlan_rsna_eapol.keydes.data','wlan_rsna_eapol.keydes.nonce','wlan.country_info.code','wlan.country_info.fnm','wlan.ssid','wlan.tag','wlan.tag.length','tcp.ack','tcp.ack_raw','tcp.seq','tcp.seq_raw','dns.id','http.date','http.file_data','http.location','http.request.line','http.request.uri.path','http.request.uri.query','http.request.uri.query.parameter','http.request.version','http.response.code.desc','http.response.line','http.response.phrase','http.response.version','http.response_for.uri','http.server','json.value.string','json.key','tls.handshake.extensions_key_share_group','tls.handshake.session_ticket_length','tls.handshake.version','tls.record.version','tls.handshake.extension.type','http.host','dns.a','dhcp.option.router','dhcp.option.dhcp_server_id','dhcp.option.broadcast_address','dhcp.ip.server','dhcp.ip.relay','wlan.bssid','wlan.da','wlan.ra','wlan.sa','wlan.ta','arp.src.hw_mac','arp.dst.hw_mac','arp.dst.proto_ipv4','arp.src.proto_ipv4','dhcp.hw.mac_addr','dhcp.id','dhcp.ip.client','frame.time_delta','radiotap.mactime','wlan_radio.timestamp','dns.flags.authoritative','smb2.msg_id','smb2.pid','smb2.fid','smb2.sesid','http.last_modified','smb2.tid','http.referer','smb.server_component','smb2.filename','smb2.previous_sesid','nbss.continuation_data','tcp.checksum','data.data','tcp.payload','udp.payload','dns.qry.name','dns.resp.name','http.request.full_uri','http.content_type','smb2.acct','smb2.domain','smb2.host'], axis=1, inplace=True) + + to_binary = ['radiotap.present.tsft','tcp.flags.syn','tcp.flags.ack','tcp.flags.fin','tcp.analysis','tcp.analysis.flags','tcp.flags.push','tcp.flags.reset','tcp.analysis.retransmission','dns.retransmit_request'] + for b in to_binary: + df[b] = df[b].apply(self._encode_to_binary) + + to_hex = ['radiotap.rxflags','wlan.analysis.kck','wlan.analysis.kek','wlan.rsn.ie.gtk.key','wlan.rsn.ie.igtk.key','wlan.rsn.ie.pmkid','wlan.fc.ds','arp.proto.type','nbss.type','smb2.buffer_code','smb2.protocol_id','smb2.data_offset','smb2.session_flags'] + for h in to_hex: + df[h] = df[h].apply(self._encode_multiple_hex) + df[h] = df[h].astype(np.float64) + + to_avg = ['ip.ttl','data.len','tcp.dstport','tcp.srcport','udp.dstport','udp.srcport','tcp.option_len','udp.length','dns.count.add_rr','dns.count.answers','dns.count.auth_rr','dns.count.labels','dns.count.queries','dns.flags.checkdisable','dns.flags.opcode','dns.flags.response','dns.qry.name.len','dns.resp.ttl','dns.resp.len.1','tls.record.content_type','tcp.time_relative','udp.time_delta','udp.time_relative','tcp.analysis.rto_frame','tcp.time_delta','http.content_length','smb2.cmd','smb2.header_len'] + for a in to_avg: + df[a] = df[a].apply(self._encode_to_avg) + + # to_hash = ['data.data','tcp.payload','udp.payload','dns.qry.name','dns.resp.name','http.content_type','http.request.full_uri'] + # for hh in to_hash: + # df[hh] = df[hh].apply(self._hash_value) + + encoder = LabelEncoder() + string_to_int = ['arp','dhcp','mdns','dns','ssdp','http.connection','nbns','ldap'] + for i in string_to_int: + df[i] = encoder.fit_transform(df[i]) + + df['llc'] = df['llc'].apply(self._encode_llc) + df['tcp.checksum.status'] = df['tcp.checksum.status'].apply(self._encode_checksum_status) + df['dhcp.cookie'] = df['dhcp.cookie'].apply(lambda x: 0 if x == '0' else 1) + df['http.request.method'] = df['http.request.method'].apply(self._encode_http_request_method) + df['tls.app_data_proto'] = df['tls.app_data_proto'].apply(self._encode_tls_protocol) + df['ip.version'] = df['ip.version'].apply(self._encode_ip_version) + df['ip.proto'] = df['ip.proto'].apply(self._encode_ip_protocol) + df['radiotap.dbm_antsignal'] = df['radiotap.dbm_antsignal'].apply(self._encode_antsignal) + df['Label'] = df['Label'].apply(lambda x: 0 if x == 'Normal' else 1) + + single_value_columns = df.columns[df.nunique() == 1] + df.drop(columns=single_value_columns,axis=1,inplace=True) + df = df.sample(frac=1, random_state=self.seed) + return df + + def _filling_adjacency_numpy(self,data): + N = data.shape[0] + try: + adjacency = np.zeros((N,N), dtype=bool) + except Exception as e: + print(f"An error occurred: {e}") + source_ips = data['ip.src'].to_numpy() + destination_ips = data['ip.dst'].to_numpy() + mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips)) + adjacency[mask] = True + return adjacency + + def _load_file(self,path,max_per_class,sample=False): + df = pd.read_csv(path,low_memory=False) + if(sample): + real_min = df['Label'].value_counts().min() + print(real_min) + df = df.groupby(['Label']).apply(lambda x: x.sample(min(max_per_class,real_min))).reset_index(drop=True) + return df + + def _create_file_bc(self,n_nodes): + file_paths = self._get_files() + max_per_class = int(n_nodes / (self.n_classes * len(file_paths))) +1 + dfs = [] + for path in file_paths: + df = self._load_file(path,max_per_class,sample=True) + dfs.append(df) + print('finishing loading the file : {}'.format(path)) + df = pd.concat(dfs, ignore_index=True) + df = df.sample(frac=1).reset_index(drop=True) + print(df['Label'].value_counts()) + return df + + def build(self,n_nodes): + df = self._create_file_bc(n_nodes) + df['ip.dst'] =df['ip.dst'].astype(str) + df['ip.src'] =df['ip.src'].astype(str) + condition_ip_dst = (df['ip.dst'] == 'nan') ## this is a property of certain attacks + df.loc[condition_ip_dst,'ip.dst'] = '-1' + condition_ip_src = (df['ip.src'] == 'nan') ## this is a property of certain attacks + df.loc[condition_ip_src,'ip.src'] = '-1' + df = df.fillna(0) + df = self._process_data(df) + adj = self._filling_adjacency_numpy(df) + df.drop(['ip.src','ip.dst'],axis=1,inplace=True) + labels = df['Label'].to_numpy() + labels = labels.astype(np.bool_) + df.drop(['Label'],axis=1,inplace=True) + features = df.to_numpy() + scaler = MinMaxScaler() + features = scaler.fit_transform(features) + print("features:",features.shape) + return adj,features,labels + +class TON_IoT(Dataset): + def __init__(self): + super().__init__( + directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','original'), + features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','features.pkl'), + adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','adjacency.pkl'), + labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','labels.pkl') + ) + self.n_classes = 2 + + def _hash_string_to_int(self,inp): + input_string = str(inp) + hash_object = hashlib.sha1(input_string.encode()) + hashed_hex = hash_object.hexdigest() + hashed_int = int(hashed_hex, 16) + return hashed_int + + def _filling_adjacency_numpy(self,data): + N = data.shape[0] + try: + adjacency = np.zeros((N,N), dtype=bool) + except Exception as e: + print(f"An error occurred: {e}") + source_ips = data['src_ip'].to_numpy() + destination_ips = data['dst_ip'].to_numpy() + mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips)) + adjacency[mask] = True + return adjacency + + def _load_file(self,path,max_per_class,sample=False): + df = pd.read_csv(path,low_memory=False) + if(sample): + real_min = df['label'].value_counts().min() + print(real_min) + df = df.groupby(['label']).apply(lambda x: x.sample(min(max_per_class,real_min))).reset_index(drop=True) + return df + + def _create_file_bc(self,n_nodes): + file_paths = self._get_files() + max_per_class = int(n_nodes / (self.n_classes * len(file_paths))) +1 + dfs = [] + for path in file_paths: + df = self._load_file(path,max_per_class,sample=True) + dfs.append(df) + print('finishing loading the file : {}'.format(path)) + df = pd.concat(dfs, ignore_index=True) + df = df.sample(frac=1).reset_index(drop=True) + print(df['label'].value_counts()) + return df + + def build(self,n_nodes): + df = self._create_file_bc(n_nodes) + # remove type from the drop list and sample from the type to perform multiclass classification + df = df.groupby(['label']).apply(lambda x: x.sample(int(self.n_nodes / 2)).reset_index(drop=True)) + df.drop(columns=['ts','type'],inplace=True) + encoder = LabelEncoder() + string_to_int = ['proto','ssl_subject','ssl_issuer','http_referrer','service','conn_state','dns_AA','dns_RD','dns_RA','dns_rejected','ssl_version','ssl_cipher','ssl_resumed','ssl_established','http_method','http_version','http_orig_mime_types','http_resp_mime_types','weird_addl','weird_notice','http_uri','dns_query','http_user_agent','weird_name'] + for i in string_to_int: + df[i] = encoder.fit_transform(df[i]) + # text_to_int = ['http_uri','dns_query','http_user_agent','weird_name'] + # for j in text_to_int: + # df[j] = df[j].apply(self._hash_string_to_int) + # df[j] = df[j].astype(np.float64) + df['src_bytes'] = df['src_bytes'].apply(lambda x: 0 if x == '0.0.0.0' else x) + df['src_bytes'] = df['src_bytes'].astype(np.int64) + df['http_trans_depth'] = df['http_trans_depth'].apply(lambda x: 0 if x == '-' else x) + df['http_trans_depth'] = df['http_trans_depth'].astype(np.int64) + + adj = self._filling_adjacency_numpy(df) + df.drop(['src_ip','dst_ip'],axis=1,inplace=True) + labels = df['label'].to_numpy() + labels = labels.astype(np.bool_) + df.drop(['label'],axis=1,inplace=True) + features = df.to_numpy() + scaler = MinMaxScaler() + features = scaler.fit_transform(features) + print("features:",features.shape) + return adj,features,labels \ No newline at end of file diff --git a/datasets/examples/AWID3/adjacency.pkl b/datasets/examples/AWID3/adjacency.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e02e23fb6a62540606b81948cfeac0db4d458af4 Binary files /dev/null and b/datasets/examples/AWID3/adjacency.pkl differ diff --git a/datasets/examples/AWID3/features.pkl b/datasets/examples/AWID3/features.pkl new file mode 100644 index 0000000000000000000000000000000000000000..08f5bbabd1c222fc8b1a0c9e8b37ffc129ecb6d3 Binary files /dev/null and b/datasets/examples/AWID3/features.pkl differ diff --git a/datasets/examples/AWID3/labels.pkl b/datasets/examples/AWID3/labels.pkl new file mode 100644 index 0000000000000000000000000000000000000000..255f6cf165c48c5f367365836ce1147c197b8422 Binary files /dev/null and b/datasets/examples/AWID3/labels.pkl differ diff --git a/datasets/examples/CICDDoS2019/adjacency.pkl b/datasets/examples/CICDDoS2019/adjacency.pkl new file mode 100644 index 0000000000000000000000000000000000000000..317d1c9c7e9cbc3f94a1c1b39010b02abff4d67c Binary files /dev/null and b/datasets/examples/CICDDoS2019/adjacency.pkl differ diff --git a/datasets/examples/CICDDoS2019/features.pkl b/datasets/examples/CICDDoS2019/features.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6e76ec06afa8b0a85d491cfeb8aef5f0631abed3 Binary files /dev/null and b/datasets/examples/CICDDoS2019/features.pkl differ diff --git a/datasets/examples/CICDDoS2019/labels.pkl b/datasets/examples/CICDDoS2019/labels.pkl new file mode 100644 index 0000000000000000000000000000000000000000..81205142e3ac95f45b0e8865eec5cd4e78645f36 Binary files /dev/null and b/datasets/examples/CICDDoS2019/labels.pkl differ diff --git a/datasets/examples/TON_IOT/adjacency.pkl b/datasets/examples/TON_IOT/adjacency.pkl new file mode 100644 index 0000000000000000000000000000000000000000..629011f0607e25fa38147f523af6367cb06dcae6 Binary files /dev/null and b/datasets/examples/TON_IOT/adjacency.pkl differ diff --git a/datasets/examples/TON_IOT/features.pkl b/datasets/examples/TON_IOT/features.pkl new file mode 100644 index 0000000000000000000000000000000000000000..31290456dc9659885e7f9a6b3c3abc4fef4325e5 Binary files /dev/null and b/datasets/examples/TON_IOT/features.pkl differ diff --git a/datasets/examples/TON_IOT/labels.pkl b/datasets/examples/TON_IOT/labels.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f7d1f85af5d254ce8d63032d3be24769fb6f5914 Binary files /dev/null and b/datasets/examples/TON_IOT/labels.pkl differ diff --git a/datasets/utils.py b/datasets/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cab0b352d0ee12f4fbb580e29583e2e676adfb81 --- /dev/null +++ b/datasets/utils.py @@ -0,0 +1,32 @@ +import os +import pickle as pkl +import sys +import time +import scipy.sparse as sp +import networkx as nx +import numpy as np +from tqdm import tqdm + +def hyperbolicity(adj, num_samples): + curr_time = time.time() + hyps = [] + G = nx.from_numpy_array(adj) + for i in tqdm(range(num_samples)): + node_tuple = np.random.choice(G.nodes(), 4, replace=False) + s = [] + try: + d01 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[1], weight=None) + d23 = nx.shortest_path_length(G, source=node_tuple[2], target=node_tuple[3], weight=None) + d02 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[2], weight=None) + d13 = nx.shortest_path_length(G, source=node_tuple[1], target=node_tuple[3], weight=None) + d03 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[3], weight=None) + d12 = nx.shortest_path_length(G, source=node_tuple[1], target=node_tuple[2], weight=None) + s.append(d01 + d23) + s.append(d02 + d13) + s.append(d03 + d12) + s.sort() + hyps.append((s[-1] - s[-2]) / 2) + except Exception as e: + continue + print('Time for hyp: ', time.time() - curr_time) + return max(hyps) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..864bc7ec3743fdd6d8c14e512d5809354dc33aa9 Binary files /dev/null and b/requirements.txt differ