final push

7725415a · yacinetouahria · 7725415a · 7725415a · 7725415a · 7725415a
Commit 7725415a authored 7 months ago by yacinetouahria
--- a/.gitignore
+++ b/.gitignore
+__pycache__/
--- a/H2HGCN/.gitignore
+++ b/H2HGCN/.gitignore
+__pycache__/
\ No newline at end of file
--- a/H2HGCN/__init__.py
+++ b/H2HGCN/__init__.py
--- a/H2HGCN/h2hgcn.py
+++ b/H2HGCN/h2hgcn.py
+from __future__ import division
+from __future__ import print_function
+import logging
+import os
+import time
+import numpy as np
+import torch
+from Ghypeddings.H2HGCN.models.base_models import NCModel
+from Ghypeddings.H2HGCN.utils.data_utils import process_data
+from Ghypeddings.H2HGCN.utils.train_utils import format_metrics, create_args
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+import warnings
+warnings.filterwarnings('ignore')
+class H2HGCN:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='leaky_relu',
+                select_manifold='lorentz',
+                num_centroid=10,
+                lr_stie=0.009,
+                stie_vars=[],
+                stiefel_optimizer='rsgd',
+                eucl_vars=[],
+                grad_clip=None,
+                optimizer='Adam',
+                weight_decay=0.01,
+                lr=0.01,
+                lr_scheduler='step',
+                lr_gamma=0.5,
+                step_lr_gamma=0.1,
+                step_lr_reduce_freq=500,
+                proj_init='xavier',
+                tie_weight=True,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.15,
+                test_prop=0.15,
+                double_precision=0,
+                dropout=0.1,
+                normalize_adj=False,
+                normalize_feats=True
+                ):
+        self.args = create_args(dim,c,num_layers,bias,act,select_manifold,num_centroid,lr_stie,stie_vars,stiefel_optimizer,eucl_vars,grad_clip,optimizer,weight_decay,lr,lr_scheduler,lr_gamma,step_lr_gamma,step_lr_reduce_freq,proj_init,tie_weight,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+        self.model = NCModel(self.args)
+        self.optimizer, self.lr_scheduler, self.stiefel_optimizer, self.stiefel_lr_scheduler = set_up_optimizer_scheduler(True, self.args, self.model, self.args.lr, self.args.lr_stie)
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+        self.best_emb = None
+    def fit(self):
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(f'Using: {self.args.device}')
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+        best_losses = []
+        real_losses = []
+        train_losses = []
+        for epoch in range(self.args.epochs):       
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            self.stiefel_optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight']) 
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.stiefel_optimizer.step()
+            self.lr_scheduler.step()
+            self.stiefel_lr_scheduler.step()
+            train_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(train_losses[0])
+            elif (best_losses[-1] > train_losses[-1]):
+                best_losses.append(train_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+            if (epoch + 1) % self.args.log_freq == 0:
+                logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                    'lr: {:04f}, stie_lr: {:04f}'.format(self.lr_scheduler.get_lr()[0], self.stiefel_lr_scheduler.get_lr()[0]),
+                                    format_metrics(train_metrics, 'train'),
+                                    'time: {:.4f}s'.format(time.time() - t)
+                                    ]))
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
+                val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                real_losses.append(val_metrics['loss'].item())
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                if self.model.has_improved(best_val_metrics, val_metrics):
+                    self.best_emb = embeddings
+                    best_val_metrics = val_metrics
+                    counter = 0
+                else:
+                    counter += 1
+                    if counter == self.args.patience and epoch > self.args.min_epochs:
+                        logging.info("Early stopping")
+                        break
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+        return {'val':real_losses,'best':best_losses,'train':train_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        logging.info(" ".join([format_metrics(val_metrics, 'test')]))
+        return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
+    def save_embeddings(self):
+        #tb_embeddings_euc = self.model.manifold.log_map_zero(self.best_emb)
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        #for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_hyp.csv')
+        #euc_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        #np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
--- a/H2HGCN/layers/CentroidDistance.py
+++ b/H2HGCN/layers/CentroidDistance.py
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.H2HGCN.utils import *
+class CentroidDistance(nn.Module):
+    """
+    Implement a model that calculates the pairwise distances between node representations
+    and centroids
+    """
+    def __init__(self, args, logger, manifold):
+        super(CentroidDistance, self).__init__()
+        self.args = args
+        self.logger = logger
+        self.manifold = manifold
+        self.debug = False
+        # centroid embedding
+        self.centroid_embedding = nn.Embedding(
+            args.num_centroid, args.dim,
+            sparse=False,
+            scale_grad_by_freq=False,
+        )
+        nn_init(self.centroid_embedding, self.args.proj_init)
+        args.eucl_vars.append(self.centroid_embedding)
+    def forward(self, node_repr, mask):
+        """
+        Args:
+            node_repr: [node_num, dim] 
+            mask: [node_num, 1] 1 denote real node, 0 padded node
+        return:
+            graph_centroid_dist: [1, num_centroid]
+            node_centroid_dist: [1, node_num, num_centroid]
+        """
+        node_num = node_repr.size(0)
+        # broadcast and reshape node_repr to [node_num * num_centroid, dim]
+        node_repr =  node_repr.unsqueeze(1).expand(
+                                                -1,
+                                                self.args.num_centroid,
+                                                -1).contiguous().view(-1, self.args.dim)
+        # broadcast and reshape centroid embeddings to [node_num * num_centroid, dim]
+        centroid_repr = self.manifold.exp_map_zero(self.centroid_embedding(th.arange(self.args.num_centroid).cuda().to(self.args.device)))
+        centroid_repr = centroid_repr.unsqueeze(0).expand(
+                                                node_num,
+                                                -1,
+                                                -1).contiguous().view(-1, self.args.dim) 
+        # get distance
+        node_centroid_dist = self.manifold.distance(node_repr, centroid_repr) 
+        node_centroid_dist = node_centroid_dist.view(1, node_num, self.args.num_centroid) 
+        # average pooling over nodes
+        graph_centroid_dist = th.sum(node_centroid_dist, dim=1) / th.sum(mask)
+        return graph_centroid_dist, node_centroid_dist
--- a/H2HGCN/layers/__init__.py
+++ b/H2HGCN/layers/__init__.py
--- a/H2HGCN/layers/layers.py
+++ b/H2HGCN/layers/layers.py
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+    def __init__(self, args, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+        args.eucl_vars.append(self.linear)
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
\ No newline at end of file
--- a/H2HGCN/manifolds/LorentzManifold.py
+++ b/H2HGCN/manifolds/LorentzManifold.py
+"""Lorentz manifold."""
+import torch
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+import torch
+from Ghypeddings.H2HGCN.utils import *
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+from Ghypeddings.H2HGCN.manifolds import *
+from Ghypeddings.H2HGCN.utils.math_utils import arcosh, cosh, sinh 
+_eps = 1e-10
+class LorentzManifold:
+    def __init__(self, args, eps=1e-3, norm_clip=1, max_norm=1e3):
+        self.args = args
+        self.eps = eps
+        self.norm_clip = norm_clip
+        self.max_norm = max_norm
+    def minkowski_dot(self, x, y, keepdim=True):
+        res = torch.sum(x * y, dim=-1) - 2 * x[..., 0] * y[..., 0]
+        if keepdim:
+            res = res.view(res.shape + (1,))
+        return res
+    def sqdist(self, x, y, c):
+        K = 1. / c
+        prod = self.minkowski_dot(x, y)
+        eps = {torch.float32: 1e-7, torch.float64: 1e-15}
+        theta = torch.clamp(-prod / K, min=1.0 + eps[x.dtype])
+        sqdist = K * arcosh(theta) ** 2
+        return torch.clamp(sqdist, max=50.0)
+    @staticmethod
+    def ldot(u, v, keepdim=False):
+        """
+        Lorentzian Scalar Product
+        Args:
+            u: [batch_size, d + 1]
+            v: [batch_size, d + 1]
+        Return:
+            keepdim: False [batch_size]
+            keepdim: True  [batch_size, 1]
+        """
+        d = u.size(1) - 1
+        uv = u * v
+        uv = th.cat((-uv.narrow(1, 0, 1), uv.narrow(1, 1, d)), dim=1) 
+        return th.sum(uv, dim=1, keepdim=keepdim)
+    def from_lorentz_to_poincare(self, x):
+        """
+        Args:
+            u: [batch_size, d + 1]
+        """
+        d = x.size(-1) - 1
+        return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
+    def from_poincare_to_lorentz(self, x):
+        """
+        Args:
+            u: [batch_size, d]
+        """
+        x_norm_square = th_dot(x, x)
+        return th.cat((1 + x_norm_square, 2 * x), dim=1) / (1 - x_norm_square + self.eps)
+    def distance(self, u, v):
+        d = -LorentzDot.apply(u, v)
+        dis = Acosh.apply(d, self.eps)
+        return dis
+    def normalize(self, w):
+        """
+        Normalize vector such that it is located on the Lorentz
+        Args:
+            w: [batch_size, d + 1]
+        """
+        d = w.size(-1) - 1
+        narrowed = w.narrow(-1, 1, d)
+        if self.max_norm:
+            narrowed = th.renorm(narrowed.view(-1, d), 2, 0, self.max_norm)
+        first = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
+        first = th.sqrt(first)
+        tmp = th.cat((first, narrowed), dim=1)
+        return tmp
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+    def rgrad(self, p, d_p):
+        """Riemannian gradient for Lorentz"""
+        u = d_p
+        x = p
+        u.narrow(-1, 0, 1).mul_(-1)
+        u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
+        return d_p
+    def exp_map_zero(self, v):
+        zeros = th.zeros_like(v)
+        zeros[:, 0] = 1
+        return self.exp_map_x(zeros, v)
+    def exp_map_x(self, p, d_p, d_p_normalize=True, p_normalize=True):
+        if d_p_normalize:
+            d_p = self.normalize_tan(p, d_p)
+        ldv = self.ldot(d_p, d_p, keepdim=True)
+        nd_p = th.sqrt(th.clamp(ldv + self.eps, _eps))
+        t = th.clamp(nd_p, max=self.norm_clip)
+        newp = (th.cosh(t) * p) + (th.sinh(t) * d_p / nd_p)
+        if p_normalize:
+            newp = self.normalize(newp)
+        return newp
+    def normalize_tan(self, x_all, v_all):
+        d = v_all.size(1) - 1
+        x = x_all.narrow(1, 1, d)
+        xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
+        tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
+        tmp = th.sqrt(tmp)
+        return th.cat((xv / tmp, v_all.narrow(1, 1, d)), dim=1)
+    def log_map_zero(self, y, i=-1):
+        zeros = th.zeros_like(y)
+        zeros[:, 0] = 1
+        return self.log_map_x(zeros, y)
+    def log_map_x(self, x, y, normalize=False):
+        """Logarithmic map on the Lorentz Manifold"""
+        xy = self.ldot(x, y).unsqueeze(-1)
+        tmp = th.sqrt(th.clamp(xy * xy - 1 + self.eps, _eps))
+        v = Acosh.apply(-xy, self.eps) / (
+            tmp
+        ) * th.addcmul(y, xy, x)
+        if normalize:
+            result = self.normalize_tan(x, v)
+        else:
+            result = v
+        return result
+    def parallel_transport(self, x, y, v):
+        """Parallel transport for Lorentz"""
+        v_ = v
+        x_ = x
+        y_ = y
+        xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
+        vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
+        vnew = v_ + vy / (1 - xy) * (x_ + y_)
+        return vnew
+    def metric_tensor(self, x, u, v):
+        return self.ldot(u, v, keepdim=True)
+class LorentzDot(Function):
+    @staticmethod
+    def forward(ctx, u, v):
+        ctx.save_for_backward(u, v)
+        return LorentzManifold.ldot(u, v)
+    @staticmethod
+    def backward(ctx, g):
+        u, v = ctx.saved_tensors
+        g = g.unsqueeze(-1).expand_as(u).clone()
+        g.narrow(-1, 0, 1).mul_(-1)
+        return g * v, g * u
+class Acosh(Function):
+    @staticmethod
+    def forward(ctx, x, eps): 
+        z = th.sqrt(th.clamp(x * x - 1 + eps, _eps))
+        ctx.save_for_backward(z)
+        ctx.eps = eps
+        xz = x + z
+        tmp = th.log(xz)
+        return tmp
+    @staticmethod
+    def backward(ctx, g):
+        z, = ctx.saved_tensors
+        z = th.clamp(z, min=ctx.eps)
+        z = g / z
+        return z, None
--- a/H2HGCN/manifolds/StiefelManifold.py
+++ b/H2HGCN/manifolds/StiefelManifold.py
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+from Ghypeddings.clusterers.utils import *
+_eps = 1e-10
+class StiefelManifold:
+    def __init__(self, args, logger, eps=1e-3, norm_clip=1, max_norm=1e3):
+        self.args = args
+        self.logger = logger
+        self.eps = eps
+        self.norm_clip = norm_clip
+        self.max_norm = max_norm
+    def normalize(self, w):
+        return w
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+    def symmetric(self, A):
+        return 0.5 * (A + A.t())
+    def rgrad(self, A, B):
+        out = B - A.mm(self.symmetric(A.transpose(0,1).mm(B)))
+        return out
+    def exp_map_x(self, A, ref):
+        data = A + ref
+        Q, R = data.qr()
+        # To avoid (any possible) negative values in the output matrix, we multiply the negative values by -1
+        sign = (R.diag().sign() + 0.5).sign().diag()
+        out = Q.mm(sign)
+        return out
--- a/H2HGCN/manifolds/__init__.py
+++ b/H2HGCN/manifolds/__init__.py
+from Ghypeddings.H2HGCN.manifolds.LorentzManifold import LorentzManifold
\ No newline at end of file
--- a/H2HGCN/models/__init__.py
+++ b/H2HGCN/models/__init__.py
--- a/H2HGCN/models/base_models.py
+++ b/H2HGCN/models/base_models.py
+import numpy as np
+from sklearn.metrics import roc_auc_score, average_precision_score
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import Ghypeddings.H2HGCN.models.encoders as encoders
+from Ghypeddings.H2HGCN.models.encoders import H2HGCN
+from Ghypeddings.H2HGCN.models.decoders import model2decoder
+from Ghypeddings.H2HGCN.utils.eval_utils import acc_f1
+from Ghypeddings.H2HGCN.manifolds import LorentzManifold
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.c = torch.Tensor([1.]).cuda().to(args.device)
+        args.manifold = self.manifold = LorentzManifold(args)
+        args.feat_dim = args.feat_dim + 1
+        # add 1 for Lorentz as the degree of freedom is d - 1 with d dimensions
+        args.dim = args.dim + 1
+        self.nnodes = args.n_nodes
+        self.encoder = H2HGCN(args, 1)
+    def encode(self, x, hgnn_adj, hgnn_weight):
+        h = self.encoder.encode(x, hgnn_adj, hgnn_weight)
+        return h
+    def compute_metrics(self, embeddings, data, split):
+        raise NotImplementedError
+    def init_metric_dict(self):
+        raise NotImplementedError
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+class NCModel(BaseModel):
+    """
+    Base model for node classification task.
+    """
+    def __init__(self, args):
+        super(NCModel, self).__init__(args)
+        self.decoder = model2decoder(self.c, args)
+        if args.n_classes > 2:
+            self.f1_average = 'micro'
+        else:
+            self.f1_average = 'binary'
+        self.weights = torch.Tensor([1.] * args.n_classes)
+        if not args.cuda == -1:
+            self.weights = self.weights.to(args.device)
+    def decode(self, h, adj, idx):
+        output = self.decoder.decode(h, adj)
+        return F.log_softmax(output[idx], dim=1)
+    def compute_metrics(self, embeddings, data, split):
+        idx = data[f'idx_{split}']
+        output = self.decode(embeddings, data['adj_train_norm'], idx)
+        loss = F.nll_loss(output, data['labels'][idx], self.weights)
+        acc, f1 , recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average)
+        metrics = {'loss': loss, 'acc': acc, 'f1': f1 , 'recall':recall,'precision':precision,'roc_auc':roc_auc}
+        return metrics
+    def init_metric_dict(self):
+        return {'acc': -1, 'f1': -1}
+    def has_improved(self, m1, m2):
+        return m1["f1"] < m2["f1"]
\ No newline at end of file
--- a/H2HGCN/models/decoders.py
+++ b/H2HGCN/models/decoders.py
+"""Graph decoders."""
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.H2HGCN.layers.layers import Linear
+class Decoder(nn.Module):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+    def decode(self, x, adj):
+        if self.decode_adj:
+            input = (x, adj)
+            probs, _ = self.cls.forward(input)
+        else:
+            probs = self.cls.forward(x)
+        return probs
+class MyDecoder(Decoder):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+    def __init__(self, c, args):
+        super(MyDecoder, self).__init__(c)
+        self.input_dim = args.num_centroid
+        self.output_dim = args.n_classes
+        act = lambda x: x
+        self.cls = Linear(args, self.input_dim, self.output_dim, args.dropout, act, args.bias)
+        self.decode_adj = False
+    def decode(self, x, adj):
+        h = x
+        return super(MyDecoder, self).decode(h, adj)
+model2decoder = MyDecoder
--- a/H2HGCN/models/encoders.py
+++ b/H2HGCN/models/encoders.py
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import Ghypeddings.H2HGCN.utils.math_utils as pmath
+import torch as th
+from Ghypeddings.H2HGCN.utils import *
+from Ghypeddings.H2HGCN.utils import pre_utils
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+from Ghypeddings.H2HGCN.manifolds import *
+from Ghypeddings.H2HGCN.layers.CentroidDistance import CentroidDistance
+class H2HGCN(nn.Module):
+    def __init__(self, args, logger):
+        super(H2HGCN, self).__init__()
+        self.debug = False
+        self.args = args
+        self.logger = logger
+        self.set_up_params()
+        self.activation = nn.SELU()
+        fd = args.feat_dim - 1
+        self.linear = nn.Linear(
+                int(fd), int(args.dim),
+        )
+        nn_init(self.linear, self.args.proj_init)
+        self.args.eucl_vars.append(self.linear)	
+        self.distance = CentroidDistance(args, logger, args.manifold)
+    def create_params(self):
+        """
+        create the GNN params for a specific msg type
+        """
+        msg_weight = []
+        layer = self.args.num_layers if not self.args.tie_weight else 1
+        for iii in range(layer):
+            M = th.zeros([self.args.dim-1, self.args.dim-1], requires_grad=True)
+            init_weight(M, 'orthogonal')
+            M = nn.Parameter(M)
+            self.args.stie_vars.append(M)
+            msg_weight.append(M)
+        return nn.ParameterList(msg_weight)
+    def set_up_params(self):
+        """
+        set up the params for all message types
+        """
+        self.type_of_msg = 1
+        for i in range(0, self.type_of_msg):
+            setattr(self, "msg_%d_weight" % i, self.create_params())
+    def apply_activation(self, node_repr):
+        """
+        apply non-linearity for different manifolds
+        """
+        if self.args.select_manifold == "poincare":
+            return self.activation(node_repr)
+        elif self.args.select_manifold == "lorentz":
+            return self.args.manifold.from_poincare_to_lorentz(
+                self.activation(self.args.manifold.from_lorentz_to_poincare(node_repr))
+            )
+    def split_graph_by_negative_edge(self, adj_mat, weight):
+        """
+        Split the graph according to positive and negative edges.
+        """
+        mask = weight > 0
+        neg_mask = weight < 0
+        pos_adj_mat = adj_mat * mask.long()
+        neg_adj_mat = adj_mat * neg_mask.long()
+        pos_weight = weight * mask.float()
+        neg_weight = -weight * neg_mask.float()
+        return pos_adj_mat, pos_weight, neg_adj_mat, neg_weight
+    def split_graph_by_type(self, adj_mat, weight):
+        """
+        split the graph according to edge type for multi-relational datasets
+        """
+        multi_relation_adj_mat = []
+        multi_relation_weight = []
+        for relation in range(1, self.args.edge_type):
+            mask = (weight.int() == relation)
+            multi_relation_adj_mat.append(adj_mat * mask.long())
+            multi_relation_weight.append(mask.float())
+        return multi_relation_adj_mat, multi_relation_weight
+    def split_input(self, adj_mat, weight):
+        return [adj_mat], [weight]
+    def p2k(self, x, c):
+        denom = 1 + c * x.pow(2).sum(-1, keepdim=True)
+        return 2 * x / denom
+    def k2p(self, x, c):
+        denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True))
+        return x / denom
+    def lorenz_factor(self, x, *, c=1.0, dim=-1, keepdim=False):
+        """
+            Calculate Lorenz factors
+        """
+        x_norm = x.pow(2).sum(dim=dim, keepdim=keepdim)
+        x_norm = torch.clamp(x_norm, 0, 0.9)
+        tmp = 1 / torch.sqrt(1 - c * x_norm)
+        return tmp
+    def from_lorentz_to_poincare(self, x):
+        """
+        Args:
+            u: [batch_size, d + 1]
+        """
+        d = x.size(-1) - 1
+        return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
+    def h2p(self, x):
+        return self.from_lorentz_to_poincare(x)
+    def from_poincare_to_lorentz(self, x, eps=1e-3):
+        """
+        Args:
+            u: [batch_size, d]
+        """
+        x_norm_square = x.pow(2).sum(-1, keepdim=True)
+        tmp = th.cat((1 + x_norm_square, 2 * x), dim=1)
+        tmp = tmp / (1 - x_norm_square)
+        return  tmp
+    def p2h(self, x):
+        return  self.from_poincare_to_lorentz(x)
+    def p2k(self, x, c=1.0):
+        denom = 1 + c * x.pow(2).sum(-1, keepdim=True)
+        return 2 * x / denom
+    def k2p(self, x, c=1.0):
+        denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True))
+        return x / denom
+    def h2k(self, x):
+        tmp = x.narrow(-1, 1, x.size(-1)-1) / x.narrow(-1, 0, 1)
+        return tmp
+    def k2h(self, x):
+        x_norm_square = x.pow(2).sum(-1, keepdim=True)
+        x_norm_square = torch.clamp(x_norm_square, max=0.9)
+        tmp = torch.ones((x.size(0),1)).cuda().to(self.args.device)
+        tmp1 = th.cat((tmp, x), dim=1)
+        tmp2 = 1.0 / torch.sqrt(1.0 - x_norm_square)
+        tmp3 = (tmp1 * tmp2)
+        return tmp3 
+    def hyperbolic_mean(self, y, node_num, max_neighbor, real_node_num, weight, dim=0, c=1.0, ):
+        '''
+        y [node_num * max_neighbor, dim]
+        '''
+        x = y[0:real_node_num*max_neighbor, :]
+        weight_tmp = weight.view(-1,1)[0:real_node_num*max_neighbor, :]
+        x = self.h2k(x)
+        lamb = self.lorenz_factor(x, c=c, keepdim=True)
+        lamb = lamb  * weight_tmp 
+        lamb = lamb.view(real_node_num, max_neighbor, -1)
+        x = x.view(real_node_num, max_neighbor, -1) 
+        k_mean = (torch.sum(lamb * x, dim=1, keepdim=True) / (torch.sum(lamb, dim=1, keepdim=True))).squeeze()
+        h_mean = self.k2h(k_mean)
+        virtual_mean = torch.cat((torch.tensor([[1.0]]), torch.zeros(1,y.size(-1)-1)), 1).cuda().to(self.args.device)
+        tmp = virtual_mean.repeat(node_num-real_node_num, 1)
+        mean = torch.cat((h_mean, tmp), 0)
+        return mean	
+    def test_lor(self, A):
+        tmp1 = (A[:,0] * A[:,0]).view(-1)
+        tmp2 = A[:,1:]
+        tmp2 = th.diag(tmp2.mm(tmp2.transpose(0,1)))
+        return (tmp1 - tmp2)
+    def retrieve_params(self, weight, step):
+        """
+        Args:
+            weight: a list of weights
+            step: a certain layer
+        """
+        layer_weight = th.cat((th.zeros((self.args.dim-1, 1)).cuda().to(self.args.device), weight[step]), dim=1)
+        tmp = th.zeros((1, self.args.dim)).cuda().to(self.args.device)
+        tmp[0,0] = 1
+        layer_weight = th.cat((tmp, layer_weight), dim=0)
+        return layer_weight
+    def aggregate_msg(self, node_repr, adj_mat, weight, layer_weight, mask):
+        """
+        message passing for a specific message type.
+        """
+        node_num, max_neighbor = adj_mat.shape[0], adj_mat.shape[1] 
+        combined_msg = node_repr.clone()
+        tmp = self.test_lor(node_repr)
+        msg = th.mm(node_repr, layer_weight) * mask
+        real_node_num = (mask>0).sum()
+        # select out the neighbors of each node
+        neighbors = th.index_select(msg, 0, adj_mat.view(-1))
+        combined_msg = self.hyperbolic_mean(neighbors, node_num, max_neighbor, real_node_num, weight)
+        return combined_msg 
+    def get_combined_msg(self, step, node_repr, adj_mat, weight, mask):
+        """
+        perform message passing in the tangent space of x'
+        """
+        gnn_layer = 0 if self.args.tie_weight else step
+        combined_msg = None
+        for relation in range(0, self.type_of_msg):
+            layer_weight = self.retrieve_params(getattr(self, "msg_%d_weight" % relation), gnn_layer)
+            aggregated_msg = self.aggregate_msg(node_repr,
+                                                adj_mat[relation],
+                                                weight[relation],
+                                                layer_weight, mask)
+            combined_msg = aggregated_msg if combined_msg is None else (combined_msg + aggregated_msg)
+        return combined_msg
+    def encode(self, node_repr, adj_list, weight):
+        node_repr = self.activation(self.linear(node_repr))
+        adj_list, weight = self.split_input(adj_list, weight)
+        mask = torch.ones((node_repr.size(0),1)).cuda().to(self.args.device)
+        node_repr = self.args.manifold.exp_map_zero(node_repr)
+        for step in range(self.args.num_layers):
+            node_repr = node_repr * mask
+            tmp = node_repr
+            combined_msg = self.get_combined_msg(step, node_repr, adj_list, weight, mask)
+            combined_msg = (combined_msg) * mask
+            node_repr = combined_msg * mask
+            node_repr = self.apply_activation(node_repr) * mask
+            real_node_num = (mask>0).sum()
+            node_repr = self.args.manifold.normalize(node_repr)
+        _, node_centroid_sim = self.distance(node_repr, mask) 
+        return node_centroid_sim.squeeze()
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+    def encode(self, x, adj):
+        if self.encode_graph:
+            input = (x, adj)
+            output, _ = self.layers.forward(input)
+        else:
+            output = self.layers.forward(x)
+        return output
--- a/H2HGCN/optimizers/__init__.py
+++ b/H2HGCN/optimizers/__init__.py
+from torch.optim import Adam
--- a/H2HGCN/optimizers/rsgd.py
+++ b/H2HGCN/optimizers/rsgd.py
+import torch as th
+from torch.optim.optimizer import Optimizer, required
+from Ghypeddings.H2HGCN.utils import *
+import os
+import math
+class RiemannianSGD(Optimizer):
+    """Riemannian stochastic gradient descent.
+    """
+    def __init__(self, args, params, lr):
+        defaults = dict(lr=lr)
+        self.args = args
+        super(RiemannianSGD, self).__init__(params, defaults)
+    def step(self, lr=None):
+        """
+        Performs a single optimization step.
+        """
+        loss = None
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad.data
+                d_p = self.args.manifold.rgrad(p, d_p)
+                if lr is None:
+                    lr = group['lr']
+                p.data = self.args.manifold.exp_map_x(p, -lr * d_p)
+        return loss
--- a/H2HGCN/utils/__init__.py
+++ b/H2HGCN/utils/__init__.py
+from Ghypeddings.H2HGCN.utils.pre_utils import *
\ No newline at end of file
--- a/H2HGCN/utils/data_utils.py
+++ b/H2HGCN/utils/data_utils.py
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+def convert_hgnn_adj(adj):
+    hgnn_adj = [[i] for i in range(adj.shape[0])]
+    hgnn_weight = [[1] for i in range(adj.shape[0])]
+    for i in range(adj.shape[0]):
+        for j in range(adj.shape[1]):
+            if adj[i,j] == 1:
+                hgnn_adj[i].append(j)
+                hgnn_weight[i].append(1)
+    max_len = max([len(i) for i in hgnn_adj])
+    normalize_weight(hgnn_adj, hgnn_weight)
+    hgnn_adj = pad_sequence(hgnn_adj, max_len)
+    hgnn_weight = pad_sequence(hgnn_weight, max_len)
+    hgnn_adj = np.array(hgnn_adj)
+    hgnn_weight = np.array(hgnn_weight)
+    return torch.from_numpy(hgnn_adj).cuda(), torch.from_numpy(hgnn_weight).cuda().float()
+def process_data(args,adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats
+    )
+    return data
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj)
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+def process_data_nc(args,adj,features,labels):
+    adj = sp.csr_matrix(adj)
+    hgnn_adj, hgnn_weight = convert_hgnn_adj(adj.todense())
+    idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': adj, 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test, 'hgnn_adj': hgnn_adj, 'hgnn_weight': hgnn_weight}
+    return data
\ No newline at end of file
--- a/H2HGCN/utils/eval_utils.py
+++ b/H2HGCN/utils/eval_utils.py
+from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels,preds)
+    f1 = f1_score(labels,preds , average=average)
+    recall = recall_score(labels,preds)
+    precision = precision_score(labels,preds )
+    roc_auc = roc_auc_score(labels,preds)
+    return accuracy, f1 , recall,precision, roc_auc
\ No newline at end of file
--- a/H2HGCN/utils/math_utils.py
+++ b/H2HGCN/utils/math_utils.py
+"""Math utils functions."""
+import torch
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+def arcosh(x):
+    return Arcosh.apply(x)
+def arsinh(x):
+    return Arsinh.apply(x)
+def artanh(x):
+    return Artanh.apply(x)
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-15, 1 - 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype)
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1.0 + 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype)
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5