diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..c18dd8d83ceed1806b50b0aaa46beb7e335fff13
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__/
diff --git a/H2HGCN/.gitignore b/H2HGCN/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c
--- /dev/null
+++ b/H2HGCN/.gitignore
@@ -0,0 +1 @@
+__pycache__/
\ No newline at end of file
diff --git a/H2HGCN/__init__.py b/H2HGCN/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/H2HGCN/h2hgcn.py b/H2HGCN/h2hgcn.py
new file mode 100644
index 0000000000000000000000000000000000000000..3db3c025b7b5cf85c6fe37fb21b6518135bcc6c8
--- /dev/null
+++ b/H2HGCN/h2hgcn.py
@@ -0,0 +1,160 @@
+from __future__ import division
+from __future__ import print_function
+import logging
+import os
+import time
+import numpy as np
+import torch
+from Ghypeddings.H2HGCN.models.base_models import NCModel
+from Ghypeddings.H2HGCN.utils.data_utils import process_data
+from Ghypeddings.H2HGCN.utils.train_utils import format_metrics, create_args
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+import warnings
+warnings.filterwarnings('ignore')
+
+class H2HGCN:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='leaky_relu',
+                select_manifold='lorentz',
+                num_centroid=100,
+                lr_stie=1,
+                stie_vars=[],
+                stiefel_optimizer='rsgd',
+                eucl_vars=[],
+                grad_clip=None,
+                optimizer='Adam',
+                weight_decay=0.1,
+                lr=1,
+                lr_scheduler='step',
+                lr_gamma=.5,
+                step_lr_gamma=0.99,
+                step_lr_reduce_freq=20,
+                proj_init='xavier',
+                tie_weight=True,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=.3,
+                test_prop=0.3,
+                double_precision=0,
+                dropout=0.1,
+                normalize_adj=False,
+                normalize_feats=True
+                ):
+        
+        self.args = create_args(dim,c,num_layers,bias,act,select_manifold,num_centroid,lr_stie,stie_vars,stiefel_optimizer,eucl_vars,grad_clip,optimizer,weight_decay,lr,lr_scheduler,lr_gamma,step_lr_gamma,step_lr_reduce_freq,proj_init,tie_weight,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
+        
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+        self.model = NCModel(self.args)
+        self.optimizer, self.lr_scheduler, self.stiefel_optimizer, self.stiefel_lr_scheduler = set_up_optimizer_scheduler(True, self.args, self.model, self.args.lr, self.args.lr_stie)
+        
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+        self.best_emb = None
+
+
+    def fit(self):
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(f'Using: {self.args.device}')
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+        
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        real_losses = []
+
+        for epoch in range(self.args.epochs):       
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            self.stiefel_optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight']) 
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.stiefel_optimizer.step()
+            self.lr_scheduler.step()
+            self.stiefel_lr_scheduler.step()
+
+            real_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(real_losses[0])
+            elif (best_losses[-1] > real_losses[-1]):
+                best_losses.append(real_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                    'lr: {:04f}, stie_lr: {:04f}'.format(self.lr_scheduler.get_lr()[0], self.stiefel_lr_scheduler.get_lr()[0]),
+                                    format_metrics(train_metrics, 'train'),
+                                    'time: {:.4f}s'.format(time.time() - t)
+                                    ]))
+                
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
+                val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                if self.model.has_improved(best_val_metrics, val_metrics):
+                    self.best_emb = embeddings
+                    best_val_metrics = val_metrics
+                    counter = 0
+                else:
+                    counter += 1
+                    if counter == self.args.patience and epoch > self.args.min_epochs:
+                        logging.info("Early stopping")
+                        break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+        return {'real':real_losses,'best':best_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
+
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        logging.info(" ".join([format_metrics(val_metrics, 'test')]))
+        return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
+    
+    def save_embeddings(self):
+        #tb_embeddings_euc = self.model.manifold.log_map_zero(self.best_emb)
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        #for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_hyp.csv')
+        #euc_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        #np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
diff --git a/H2HGCN/layers/CentroidDistance.py b/H2HGCN/layers/CentroidDistance.py
new file mode 100644
index 0000000000000000000000000000000000000000..546447492330997a479f47d34b3ad22094d45288
--- /dev/null
+++ b/H2HGCN/layers/CentroidDistance.py
@@ -0,0 +1,56 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.H2HGCN.utils import *
+
+class CentroidDistance(nn.Module):
+    """
+    Implement a model that calculates the pairwise distances between node representations
+    and centroids
+    """
+    def __init__(self, args, logger, manifold):
+        super(CentroidDistance, self).__init__()
+        self.args = args
+        self.logger = logger
+        self.manifold = manifold
+        self.debug = False
+
+        # centroid embedding
+        self.centroid_embedding = nn.Embedding(
+            args.num_centroid, args.dim,
+            sparse=False,
+            scale_grad_by_freq=False,
+        )
+        nn_init(self.centroid_embedding, self.args.proj_init)
+        args.eucl_vars.append(self.centroid_embedding)
+
+    def forward(self, node_repr, mask):
+        """
+        Args:
+            node_repr: [node_num, dim] 
+            mask: [node_num, 1] 1 denote real node, 0 padded node
+        return:
+            graph_centroid_dist: [1, num_centroid]
+            node_centroid_dist: [1, node_num, num_centroid]
+        """
+        node_num = node_repr.size(0)
+
+        # broadcast and reshape node_repr to [node_num * num_centroid, dim]
+        node_repr =  node_repr.unsqueeze(1).expand(
+                                                -1,
+                                                self.args.num_centroid,
+                                                -1).contiguous().view(-1, self.args.dim)
+
+        # broadcast and reshape centroid embeddings to [node_num * num_centroid, dim]
+        centroid_repr = self.manifold.exp_map_zero(self.centroid_embedding(th.arange(self.args.num_centroid).cuda().to(self.args.device)))
+        centroid_repr = centroid_repr.unsqueeze(0).expand(
+                                                node_num,
+                                                -1,
+                                                -1).contiguous().view(-1, self.args.dim) 
+        # get distance
+        node_centroid_dist = self.manifold.distance(node_repr, centroid_repr) 
+        node_centroid_dist = node_centroid_dist.view(1, node_num, self.args.num_centroid) 
+        # average pooling over nodes
+        graph_centroid_dist = th.sum(node_centroid_dist, dim=1) / th.sum(mask)
+        return graph_centroid_dist, node_centroid_dist
+
diff --git a/H2HGCN/layers/__init__.py b/H2HGCN/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/H2HGCN/layers/__init__.py
@@ -0,0 +1 @@
+
diff --git a/H2HGCN/layers/layers.py b/H2HGCN/layers/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..48d5c1f2799dbc0cb8dc5fbbc4b8236de4dc9abf
--- /dev/null
+++ b/H2HGCN/layers/layers.py
@@ -0,0 +1,24 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+
+    def __init__(self, args, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+        args.eucl_vars.append(self.linear)
+
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
\ No newline at end of file
diff --git a/H2HGCN/manifolds/LorentzManifold.py b/H2HGCN/manifolds/LorentzManifold.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ae351ce44b283e97f1bceb23eaf2bbcb9fa791b
--- /dev/null
+++ b/H2HGCN/manifolds/LorentzManifold.py
@@ -0,0 +1,194 @@
+"""Lorentz manifold."""
+import torch
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+import torch
+from Ghypeddings.H2HGCN.utils import *
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+from Ghypeddings.H2HGCN.manifolds import *
+from Ghypeddings.H2HGCN.utils.math_utils import arcosh, cosh, sinh 
+
+_eps = 1e-10
+
+class LorentzManifold:
+
+    def __init__(self, args, eps=1e-3, norm_clip=1, max_norm=1e3):
+        self.args = args
+        self.eps = eps
+        self.norm_clip = norm_clip
+        self.max_norm = max_norm
+
+    def minkowski_dot(self, x, y, keepdim=True):
+        res = torch.sum(x * y, dim=-1) - 2 * x[..., 0] * y[..., 0]
+        if keepdim:
+            res = res.view(res.shape + (1,))
+        return res
+
+
+    def sqdist(self, x, y, c):
+        K = 1. / c
+        prod = self.minkowski_dot(x, y)
+        eps = {torch.float32: 1e-7, torch.float64: 1e-15}
+        theta = torch.clamp(-prod / K, min=1.0 + eps[x.dtype])
+        sqdist = K * arcosh(theta) ** 2
+        return torch.clamp(sqdist, max=50.0)
+
+
+    @staticmethod
+    def ldot(u, v, keepdim=False):
+        """
+        Lorentzian Scalar Product
+        Args:
+            u: [batch_size, d + 1]
+            v: [batch_size, d + 1]
+        Return:
+            keepdim: False [batch_size]
+            keepdim: True  [batch_size, 1]
+        """
+        d = u.size(1) - 1
+        uv = u * v
+        uv = th.cat((-uv.narrow(1, 0, 1), uv.narrow(1, 1, d)), dim=1) 
+        return th.sum(uv, dim=1, keepdim=keepdim)
+
+    def from_lorentz_to_poincare(self, x):
+        """
+        Args:
+            u: [batch_size, d + 1]
+        """
+        d = x.size(-1) - 1
+        return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
+
+    def from_poincare_to_lorentz(self, x):
+        """
+        Args:
+            u: [batch_size, d]
+        """
+        x_norm_square = th_dot(x, x)
+        return th.cat((1 + x_norm_square, 2 * x), dim=1) / (1 - x_norm_square + self.eps)
+
+    def distance(self, u, v):
+        d = -LorentzDot.apply(u, v)
+        dis = Acosh.apply(d, self.eps)
+        return dis
+
+    def normalize(self, w):
+        """
+        Normalize vector such that it is located on the Lorentz
+        Args:
+            w: [batch_size, d + 1]
+        """
+        d = w.size(-1) - 1
+        narrowed = w.narrow(-1, 1, d)
+        if self.max_norm:
+            narrowed = th.renorm(narrowed.view(-1, d), 2, 0, self.max_norm)
+        first = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
+        first = th.sqrt(first)
+        tmp = th.cat((first, narrowed), dim=1)
+        return tmp
+
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+
+    def rgrad(self, p, d_p):
+        """Riemannian gradient for Lorentz"""
+        u = d_p
+        x = p
+        u.narrow(-1, 0, 1).mul_(-1)
+        u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
+        return d_p
+
+    def exp_map_zero(self, v):
+        zeros = th.zeros_like(v)
+        zeros[:, 0] = 1
+        return self.exp_map_x(zeros, v)
+
+    def exp_map_x(self, p, d_p, d_p_normalize=True, p_normalize=True):
+        if d_p_normalize:
+            d_p = self.normalize_tan(p, d_p)
+
+        ldv = self.ldot(d_p, d_p, keepdim=True)
+        nd_p = th.sqrt(th.clamp(ldv + self.eps, _eps))
+
+        t = th.clamp(nd_p, max=self.norm_clip)
+        newp = (th.cosh(t) * p) + (th.sinh(t) * d_p / nd_p)
+
+        if p_normalize:
+            newp = self.normalize(newp)
+        return newp
+
+    def normalize_tan(self, x_all, v_all):
+        d = v_all.size(1) - 1
+        x = x_all.narrow(1, 1, d)
+        xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
+        tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
+        tmp = th.sqrt(tmp)
+        return th.cat((xv / tmp, v_all.narrow(1, 1, d)), dim=1)
+
+    def log_map_zero(self, y, i=-1):
+        zeros = th.zeros_like(y)
+        zeros[:, 0] = 1
+        return self.log_map_x(zeros, y)
+
+    def log_map_x(self, x, y, normalize=False):
+        """Logarithmic map on the Lorentz Manifold"""
+        xy = self.ldot(x, y).unsqueeze(-1)
+        tmp = th.sqrt(th.clamp(xy * xy - 1 + self.eps, _eps))
+        v = Acosh.apply(-xy, self.eps) / (
+            tmp
+        ) * th.addcmul(y, xy, x)
+        if normalize:
+            result = self.normalize_tan(x, v)
+        else:
+            result = v
+        return result
+
+    def parallel_transport(self, x, y, v):
+        """Parallel transport for Lorentz"""
+        v_ = v
+        x_ = x
+        y_ = y
+
+        xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
+        vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
+        vnew = v_ + vy / (1 - xy) * (x_ + y_)
+        return vnew
+
+    def metric_tensor(self, x, u, v):
+        return self.ldot(u, v, keepdim=True)
+
+
+
+class LorentzDot(Function):
+    @staticmethod
+    def forward(ctx, u, v):
+        ctx.save_for_backward(u, v)
+        return LorentzManifold.ldot(u, v)
+
+    @staticmethod
+    def backward(ctx, g):
+        u, v = ctx.saved_tensors
+        g = g.unsqueeze(-1).expand_as(u).clone()
+        g.narrow(-1, 0, 1).mul_(-1)
+        return g * v, g * u
+
+class Acosh(Function):
+    @staticmethod
+    def forward(ctx, x, eps): 
+        z = th.sqrt(th.clamp(x * x - 1 + eps, _eps))
+        ctx.save_for_backward(z)
+        ctx.eps = eps
+        xz = x + z
+        tmp = th.log(xz)
+        return tmp
+
+    @staticmethod
+    def backward(ctx, g):
+        z, = ctx.saved_tensors
+        z = th.clamp(z, min=ctx.eps)
+        z = g / z
+        return z, None
+
+
diff --git a/H2HGCN/manifolds/StiefelManifold.py b/H2HGCN/manifolds/StiefelManifold.py
new file mode 100644
index 0000000000000000000000000000000000000000..42f141a9b76c4d8539b8fd6a6a0a14606f119184
--- /dev/null
+++ b/H2HGCN/manifolds/StiefelManifold.py
@@ -0,0 +1,41 @@
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+from utils import *
+
+_eps = 1e-10
+
+class StiefelManifold:
+
+    def __init__(self, args, logger, eps=1e-3, norm_clip=1, max_norm=1e3):
+        self.args = args
+        self.logger = logger
+        self.eps = eps
+        self.norm_clip = norm_clip
+        self.max_norm = max_norm
+
+    def normalize(self, w):
+        return w
+
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+
+    def symmetric(self, A):
+        return 0.5 * (A + A.t())
+
+    def rgrad(self, A, B):
+        out = B - A.mm(self.symmetric(A.transpose(0,1).mm(B)))
+        return out
+
+    def exp_map_x(self, A, ref):
+        data = A + ref
+        Q, R = data.qr()
+        # To avoid (any possible) negative values in the output matrix, we multiply the negative values by -1
+        sign = (R.diag().sign() + 0.5).sign().diag()
+        out = Q.mm(sign)
+        return out
+
+
+
diff --git a/H2HGCN/manifolds/__init__.py b/H2HGCN/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..99a7356c05028872305abad28f5ac410681319a4
--- /dev/null
+++ b/H2HGCN/manifolds/__init__.py
@@ -0,0 +1 @@
+from Ghypeddings.H2HGCN.manifolds.LorentzManifold import LorentzManifold
\ No newline at end of file
diff --git a/H2HGCN/models/__init__.py b/H2HGCN/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/H2HGCN/models/base_models.py b/H2HGCN/models/base_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0a235cfac0a099d484449fca494e894411ee20a
--- /dev/null
+++ b/H2HGCN/models/base_models.py
@@ -0,0 +1,76 @@
+import numpy as np
+from sklearn.metrics import roc_auc_score, average_precision_score
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import Ghypeddings.H2HGCN.models.encoders as encoders
+from Ghypeddings.H2HGCN.models.encoders import H2HGCN
+from Ghypeddings.H2HGCN.models.decoders import model2decoder
+from Ghypeddings.H2HGCN.utils.eval_utils import acc_f1
+from Ghypeddings.H2HGCN.manifolds import LorentzManifold
+
+ 
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.c = torch.Tensor([1.]).cuda().to(args.device)
+        args.manifold = self.manifold = LorentzManifold(args)
+        args.feat_dim = args.feat_dim + 1
+        # add 1 for Lorentz as the degree of freedom is d - 1 with d dimensions
+        args.dim = args.dim + 1
+        self.nnodes = args.n_nodes
+        self.encoder = H2HGCN(args, 1)
+
+    def encode(self, x, hgnn_adj, hgnn_weight):
+        h = self.encoder.encode(x, hgnn_adj, hgnn_weight)
+        return h
+
+    def compute_metrics(self, embeddings, data, split):
+        raise NotImplementedError
+
+    def init_metric_dict(self):
+        raise NotImplementedError
+
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+
+
+class NCModel(BaseModel):
+    """
+    Base model for node classification task.
+    """
+
+    def __init__(self, args):
+        super(NCModel, self).__init__(args)
+        self.decoder = model2decoder(self.c, args)
+        if args.n_classes > 2:
+            self.f1_average = 'micro'
+        else:
+            self.f1_average = 'binary'
+        
+        self.weights = torch.Tensor([1.] * args.n_classes)
+        if not args.cuda == -1:
+            self.weights = self.weights.to(args.device)
+
+    def decode(self, h, adj, idx):
+        output = self.decoder.decode(h, adj)
+        return F.log_softmax(output[idx], dim=1)
+
+
+    def compute_metrics(self, embeddings, data, split):
+        idx = data[f'idx_{split}']
+        output = self.decode(embeddings, data['adj_train_norm'], idx)
+        loss = F.nll_loss(output, data['labels'][idx], self.weights)
+        acc, f1 , recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average)
+        metrics = {'loss': loss, 'acc': acc, 'f1': f1 , 'recall':recall,'precision':precision,'roc_auc':roc_auc}
+        return metrics
+
+    def init_metric_dict(self):
+        return {'acc': -1, 'f1': -1}
+
+    def has_improved(self, m1, m2):
+        return m1["f1"] < m2["f1"]
\ No newline at end of file
diff --git a/H2HGCN/models/decoders.py b/H2HGCN/models/decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..cac75ec3f51abb35f1dcd5cfd17fa7655c00cf3a
--- /dev/null
+++ b/H2HGCN/models/decoders.py
@@ -0,0 +1,42 @@
+"""Graph decoders."""
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.H2HGCN.layers.layers import Linear
+
+class Decoder(nn.Module):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+
+    def decode(self, x, adj):
+        if self.decode_adj:
+            input = (x, adj)
+            probs, _ = self.cls.forward(input)
+        else:
+            probs = self.cls.forward(x)
+        return probs
+
+
+class MyDecoder(Decoder):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+
+    def __init__(self, c, args):
+        super(MyDecoder, self).__init__(c)
+        self.input_dim = args.num_centroid
+        self.output_dim = args.n_classes
+        act = lambda x: x
+        self.cls = Linear(args, self.input_dim, self.output_dim, 0.0, act, args.bias)
+        self.decode_adj = False
+
+    def decode(self, x, adj):
+        h = x
+        return super(MyDecoder, self).decode(h, adj)
+
+model2decoder = MyDecoder
+
diff --git a/H2HGCN/models/encoders.py b/H2HGCN/models/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5ab9313c1506ffd7984ead39591098158f2dae1
--- /dev/null
+++ b/H2HGCN/models/encoders.py
@@ -0,0 +1,264 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import Ghypeddings.H2HGCN.utils.math_utils as pmath
+import torch as th
+from Ghypeddings.H2HGCN.utils import *
+from Ghypeddings.H2HGCN.utils import pre_utils
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+from Ghypeddings.H2HGCN.manifolds import *
+from Ghypeddings.H2HGCN.layers.CentroidDistance import CentroidDistance
+
+
+class H2HGCN(nn.Module):
+
+    def __init__(self, args, logger):
+        super(H2HGCN, self).__init__()
+        self.debug = False
+        self.args = args
+        self.logger = logger
+        self.set_up_params()
+        self.activation = nn.SELU()
+        fd = args.feat_dim - 1
+        self.linear = nn.Linear(
+                int(fd), int(args.dim),
+        )
+        nn_init(self.linear, self.args.proj_init)
+        self.args.eucl_vars.append(self.linear)	
+
+        self.distance = CentroidDistance(args, logger, args.manifold)
+
+
+    def create_params(self):
+        """
+        create the GNN params for a specific msg type
+        """
+        msg_weight = []
+        layer = self.args.num_layers if not self.args.tie_weight else 1
+        for iii in range(layer):
+            M = th.zeros([self.args.dim-1, self.args.dim-1], requires_grad=True)
+            init_weight(M, 'orthogonal')
+            M = nn.Parameter(M)
+            self.args.stie_vars.append(M)
+            msg_weight.append(M)
+        return nn.ParameterList(msg_weight)
+
+    def set_up_params(self):
+        """
+        set up the params for all message types
+        """
+        self.type_of_msg = 1
+
+        for i in range(0, self.type_of_msg):
+            setattr(self, "msg_%d_weight" % i, self.create_params())
+
+    def apply_activation(self, node_repr):
+        """
+        apply non-linearity for different manifolds
+        """
+        if self.args.select_manifold == "poincare":
+            return self.activation(node_repr)
+        elif self.args.select_manifold == "lorentz":
+            return self.args.manifold.from_poincare_to_lorentz(
+                self.activation(self.args.manifold.from_lorentz_to_poincare(node_repr))
+            )
+
+    def split_graph_by_negative_edge(self, adj_mat, weight):
+        """
+        Split the graph according to positive and negative edges.
+        """
+        mask = weight > 0
+        neg_mask = weight < 0
+
+        pos_adj_mat = adj_mat * mask.long()
+        neg_adj_mat = adj_mat * neg_mask.long()
+        pos_weight = weight * mask.float()
+        neg_weight = -weight * neg_mask.float()
+        return pos_adj_mat, pos_weight, neg_adj_mat, neg_weight
+
+    def split_graph_by_type(self, adj_mat, weight):
+        """
+        split the graph according to edge type for multi-relational datasets
+        """
+        multi_relation_adj_mat = []
+        multi_relation_weight = []
+        for relation in range(1, self.args.edge_type):
+            mask = (weight.int() == relation)
+            multi_relation_adj_mat.append(adj_mat * mask.long())
+            multi_relation_weight.append(mask.float())
+        return multi_relation_adj_mat, multi_relation_weight
+
+    def split_input(self, adj_mat, weight):
+        return [adj_mat], [weight]
+
+    def p2k(self, x, c):
+        denom = 1 + c * x.pow(2).sum(-1, keepdim=True)
+        return 2 * x / denom
+
+    def k2p(self, x, c):
+        denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True))
+        return x / denom
+
+    def lorenz_factor(self, x, *, c=1.0, dim=-1, keepdim=False):
+        """
+            Calculate Lorenz factors
+        """
+        x_norm = x.pow(2).sum(dim=dim, keepdim=keepdim)
+        x_norm = torch.clamp(x_norm, 0, 0.9)
+        tmp = 1 / torch.sqrt(1 - c * x_norm)
+        return tmp
+     
+    def from_lorentz_to_poincare(self, x):
+        """
+        Args:
+            u: [batch_size, d + 1]
+        """
+        d = x.size(-1) - 1
+        return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
+
+    def h2p(self, x):
+        return self.from_lorentz_to_poincare(x)
+
+    def from_poincare_to_lorentz(self, x, eps=1e-3):
+        """
+        Args:
+            u: [batch_size, d]
+        """
+        x_norm_square = x.pow(2).sum(-1, keepdim=True)
+        tmp = th.cat((1 + x_norm_square, 2 * x), dim=1)
+        tmp = tmp / (1 - x_norm_square)
+        return  tmp
+
+    def p2h(self, x):
+        return  self.from_poincare_to_lorentz(x)
+
+    def p2k(self, x, c=1.0):
+        denom = 1 + c * x.pow(2).sum(-1, keepdim=True)
+        return 2 * x / denom
+
+    def k2p(self, x, c=1.0):
+        denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True))
+        return x / denom
+
+    def h2k(self, x):
+        tmp = x.narrow(-1, 1, x.size(-1)-1) / x.narrow(-1, 0, 1)
+        return tmp
+        
+    def k2h(self, x):
+        x_norm_square = x.pow(2).sum(-1, keepdim=True)
+        x_norm_square = torch.clamp(x_norm_square, max=0.9)
+        tmp = torch.ones((x.size(0),1)).cuda().to(self.args.device)
+        tmp1 = th.cat((tmp, x), dim=1)
+        tmp2 = 1.0 / torch.sqrt(1.0 - x_norm_square)
+        tmp3 = (tmp1 * tmp2)
+        return tmp3 
+
+
+    def hyperbolic_mean(self, y, node_num, max_neighbor, real_node_num, weight, dim=0, c=1.0, ):
+        '''
+        y [node_num * max_neighbor, dim]
+        '''
+        x = y[0:real_node_num*max_neighbor, :]
+        weight_tmp = weight.view(-1,1)[0:real_node_num*max_neighbor, :]
+        x = self.h2k(x)
+        
+        lamb = self.lorenz_factor(x, c=c, keepdim=True)
+        lamb = lamb  * weight_tmp 
+        lamb = lamb.view(real_node_num, max_neighbor, -1)
+
+        x = x.view(real_node_num, max_neighbor, -1) 
+        k_mean = (torch.sum(lamb * x, dim=1, keepdim=True) / (torch.sum(lamb, dim=1, keepdim=True))).squeeze()
+        h_mean = self.k2h(k_mean)
+
+        virtual_mean = torch.cat((torch.tensor([[1.0]]), torch.zeros(1,y.size(-1)-1)), 1).cuda().to(self.args.device)
+        tmp = virtual_mean.repeat(node_num-real_node_num, 1)
+
+        mean = torch.cat((h_mean, tmp), 0)
+        return mean	
+
+    def test_lor(self, A):
+        tmp1 = (A[:,0] * A[:,0]).view(-1)
+        tmp2 = A[:,1:]
+        tmp2 = th.diag(tmp2.mm(tmp2.transpose(0,1)))
+        return (tmp1 - tmp2)
+
+    def retrieve_params(self, weight, step):
+        """
+        Args:
+            weight: a list of weights
+            step: a certain layer
+        """
+        layer_weight = th.cat((th.zeros((self.args.dim-1, 1)).cuda().to(self.args.device), weight[step]), dim=1)
+        tmp = th.zeros((1, self.args.dim)).cuda().to(self.args.device)
+        tmp[0,0] = 1
+        layer_weight = th.cat((tmp, layer_weight), dim=0)
+        return layer_weight
+
+    def aggregate_msg(self, node_repr, adj_mat, weight, layer_weight, mask):
+        """
+        message passing for a specific message type.
+        """
+        node_num, max_neighbor = adj_mat.shape[0], adj_mat.shape[1] 
+        combined_msg = node_repr.clone()
+
+        tmp = self.test_lor(node_repr)
+        msg = th.mm(node_repr, layer_weight) * mask
+        real_node_num = (mask>0).sum()
+        
+        # select out the neighbors of each node
+        neighbors = th.index_select(msg, 0, adj_mat.view(-1))
+        combined_msg = self.hyperbolic_mean(neighbors, node_num, max_neighbor, real_node_num, weight)
+        return combined_msg 
+
+    def get_combined_msg(self, step, node_repr, adj_mat, weight, mask):
+        """
+        perform message passing in the tangent space of x'
+        """
+        gnn_layer = 0 if self.args.tie_weight else step
+        combined_msg = None
+        for relation in range(0, self.type_of_msg):
+            layer_weight = self.retrieve_params(getattr(self, "msg_%d_weight" % relation), gnn_layer)
+            aggregated_msg = self.aggregate_msg(node_repr,
+                                                adj_mat[relation],
+                                                weight[relation],
+                                                layer_weight, mask)
+            combined_msg = aggregated_msg if combined_msg is None else (combined_msg + aggregated_msg)
+        return combined_msg
+
+
+    def encode(self, node_repr, adj_list, weight):
+        node_repr = self.activation(self.linear(node_repr))
+        adj_list, weight = self.split_input(adj_list, weight)
+        
+        mask = torch.ones((node_repr.size(0),1)).cuda().to(self.args.device)
+        node_repr = self.args.manifold.exp_map_zero(node_repr)
+
+        for step in range(self.args.num_layers):
+            node_repr = node_repr * mask
+            tmp = node_repr
+            combined_msg = self.get_combined_msg(step, node_repr, adj_list, weight, mask)
+            combined_msg = (combined_msg) * mask
+            node_repr = combined_msg * mask
+            node_repr = self.apply_activation(node_repr) * mask
+            real_node_num = (mask>0).sum()
+            node_repr = self.args.manifold.normalize(node_repr)
+        _, node_centroid_sim = self.distance(node_repr, mask) 
+        return node_centroid_sim.squeeze()
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x, adj):
+        if self.encode_graph:
+            input = (x, adj)
+            output, _ = self.layers.forward(input)
+        else:
+            output = self.layers.forward(x)
+        return output
diff --git a/H2HGCN/optimizers/__init__.py b/H2HGCN/optimizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..23027ab1847bcd1d4e3c46d7ae72fbfba2fc86b5
--- /dev/null
+++ b/H2HGCN/optimizers/__init__.py
@@ -0,0 +1 @@
+from torch.optim import Adam
diff --git a/H2HGCN/optimizers/rsgd.py b/H2HGCN/optimizers/rsgd.py
new file mode 100644
index 0000000000000000000000000000000000000000..968b97444edb76491fa39cdc65636cdf9fc6b432
--- /dev/null
+++ b/H2HGCN/optimizers/rsgd.py
@@ -0,0 +1,29 @@
+import torch as th
+from torch.optim.optimizer import Optimizer, required
+from Ghypeddings.H2HGCN.utils import *
+import os
+import math
+
+class RiemannianSGD(Optimizer):
+    """Riemannian stochastic gradient descent.
+    """
+    def __init__(self, args, params, lr):
+        defaults = dict(lr=lr)
+        self.args = args
+        super(RiemannianSGD, self).__init__(params, defaults)
+
+    def step(self, lr=None):
+        """
+        Performs a single optimization step.
+        """
+        loss = None
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad.data
+                d_p = self.args.manifold.rgrad(p, d_p)
+                if lr is None:
+                    lr = group['lr']
+                p.data = self.args.manifold.exp_map_x(p, -lr * d_p)
+        return loss
diff --git a/H2HGCN/utils/__init__.py b/H2HGCN/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c3b5149888b0c78501eb36595aff1f4f4027b8c
--- /dev/null
+++ b/H2HGCN/utils/__init__.py
@@ -0,0 +1 @@
+from Ghypeddings.H2HGCN.utils.pre_utils import *
\ No newline at end of file
diff --git a/H2HGCN/utils/data_utils.py b/H2HGCN/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f726ddff75b141d465f387292b13fa3fa25e6f2e
--- /dev/null
+++ b/H2HGCN/utils/data_utils.py
@@ -0,0 +1,102 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+
+def convert_hgnn_adj(adj):
+    hgnn_adj = [[i] for i in range(adj.shape[0])]
+    hgnn_weight = [[1] for i in range(adj.shape[0])]
+    for i in range(adj.shape[0]):
+        for j in range(adj.shape[1]):
+            if adj[i,j] == 1:
+                hgnn_adj[i].append(j)
+                hgnn_weight[i].append(1)
+
+    max_len = max([len(i) for i in hgnn_adj])
+    normalize_weight(hgnn_adj, hgnn_weight)
+ 
+    hgnn_adj = pad_sequence(hgnn_adj, max_len)
+    hgnn_weight = pad_sequence(hgnn_weight, max_len)
+    hgnn_adj = np.array(hgnn_adj)
+    hgnn_weight = np.array(hgnn_weight)
+    return torch.from_numpy(hgnn_adj).cuda(), torch.from_numpy(hgnn_weight).cuda().float()
+
+
+def process_data(args,adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats
+    )
+    return data
+
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj)
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+
+def process_data_nc(args,adj,features,labels):
+    adj = sp.csr_matrix(adj)
+    hgnn_adj, hgnn_weight = convert_hgnn_adj(adj.todense())
+    idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': adj, 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test, 'hgnn_adj': hgnn_adj, 'hgnn_weight': hgnn_weight}
+    return data
\ No newline at end of file
diff --git a/H2HGCN/utils/eval_utils.py b/H2HGCN/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a17797a6916957f4b9f0962f77bfe15000ecb4e
--- /dev/null
+++ b/H2HGCN/utils/eval_utils.py
@@ -0,0 +1,13 @@
+from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score
+ 
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels,preds)
+    f1 = f1_score(labels,preds , average=average)
+    recall = recall_score(labels,preds)
+    precision = precision_score(labels,preds )
+    roc_auc = roc_auc_score(labels,preds)
+    return accuracy, f1 , recall,precision, roc_auc
\ No newline at end of file
diff --git a/H2HGCN/utils/math_utils.py b/H2HGCN/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cf278ed7ce59b97f4793f5def3218f3e830d473
--- /dev/null
+++ b/H2HGCN/utils/math_utils.py
@@ -0,0 +1,69 @@
+"""Math utils functions."""
+
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-15, 1 - 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1.0 + 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
diff --git a/H2HGCN/utils/pre_utils.py b/H2HGCN/utils/pre_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..283e73035851b0e7bbe7a78b02c8eeb2257be7f7
--- /dev/null
+++ b/H2HGCN/utils/pre_utils.py
@@ -0,0 +1,167 @@
+from collections import defaultdict
+import os
+import pickle
+import json
+import torch.nn as nn
+import torch as th
+import torch.optim as optim
+import numpy as np
+import random
+from Ghypeddings.H2HGCN.optimizers.rsgd import RiemannianSGD
+import math
+import subprocess
+import random
+
+def set_seed(seed):
+    """
+    Set the random seed
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    th.manual_seed(seed)
+    th.cuda.manual_seed(seed)
+    th.cuda.manual_seed_all(seed)
+
+def th_dot(x, y, keepdim=True):
+    return th.sum(x * y, dim=1, keepdim=keepdim)
+
+def pad_sequence(data_list, maxlen, value=0):
+    return [row + [value] * (maxlen - len(row)) for row in data_list]
+
+def normalize_weight(adj_mat, weight):
+    degree = [1 / math.sqrt(sum(np.abs(w))) for w in weight]
+    for dst in range(len(adj_mat)):
+        for src_idx in range(len(adj_mat[dst])):
+            src = adj_mat[dst][src_idx]
+            weight[dst][src_idx] = degree[dst] * weight[dst][src_idx] * degree[src]
+
+def nn_init(nn_module, method='orthogonal'):
+    """
+    Initialize a Sequential or Module object
+    Args:
+        nn_module: Sequential or Module
+        method: initialization method
+    """
+    if method == 'none':
+        return
+    for param_name, _ in nn_module.named_parameters():
+        if isinstance(nn_module, nn.Sequential):
+            # for a Sequential object, the param_name contains both id and param name
+            i, name = param_name.split('.', 1)
+            param = getattr(nn_module[int(i)], name)
+        else:
+            param = getattr(nn_module, param_name)
+        if param_name.find('weight') > -1:
+            init_weight(param, method)
+        elif param_name.find('bias') > -1:
+            nn.init.uniform_(param, -1e-4, 1e-4)
+
+def get_params(params_list, vars_list):
+    """
+    Add parameters in vars_list to param_list
+    """
+    for i in vars_list:
+        if issubclass(i.__class__, nn.Module):
+            params_list.extend(list(i.parameters()))
+        elif issubclass(i.__class__, nn.Parameter):
+            params_list.append(i)
+        else:
+            print("Encounter unknown objects")
+            exit(1)
+
+def categorize_params(args):
+    """
+    Categorize parameters into hyperbolic ones and euclidean ones
+    """
+    stiefel_params, euclidean_params = [], []
+    get_params(euclidean_params, args.eucl_vars)
+    get_params(stiefel_params, args.stie_vars)
+    return stiefel_params, euclidean_params
+
+def get_activation(args):
+    if args.activation == 'leaky_relu':
+        return nn.LeakyReLU(args.leaky_relu)
+    elif args.activation == 'rrelu':
+        return nn.RReLU()
+    elif args.activation == 'relu':
+        return nn.ReLU()
+    elif args.activation == 'elu':
+        return nn.ELU()
+    elif args.activation == 'prelu':
+        return nn.PReLU()
+    elif args.activation == 'selu':
+        return nn.SELU()
+
+def init_weight(weight, method):
+    """
+    Initialize parameters
+    Args:
+        weight: a Parameter object
+        method: initialization method 
+    """
+    if method == 'orthogonal':
+        nn.init.orthogonal_(weight)
+    elif method == 'xavier':
+        nn.init.xavier_uniform_(weight)
+    elif method == 'kaiming':
+        nn.init.kaiming_uniform_(weight)
+    elif method == 'none':
+        pass
+    else:
+        raise Exception('Unknown init method')
+
+
+def get_stiefel_optimizer(args, params, lr_stie):
+    if args.stiefel_optimizer == 'rsgd':
+        optimizer = RiemannianSGD(
+            args,
+            params,
+            lr=lr_stie,
+        )
+    elif args.stiefel_optimizer == 'ramsgrad':
+        optimizer = RiemannianAMSGrad(
+            args,
+            params,
+            lr=lr_stie,
+        )
+    else:
+        print("unsupported hyper optimizer")
+        exit(1)        
+    return optimizer
+
+def get_lr_scheduler(args, optimizer):
+    if args.lr_scheduler == 'exponential':
+        return optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_gamma)
+    elif args.lr_scheduler == 'cosine':
+        return optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=0)
+    elif args.lr_scheduler == 'cycle':
+        return optim.lr_scheduler.CyclicLR(optimizer, 0, max_lr=args.lr, step_size_up=20, cycle_momentum=False)
+    elif args.lr_scheduler == 'step':
+        return optim.lr_scheduler.StepLR(
+        optimizer,
+        step_size=int(args.step_lr_reduce_freq),
+        gamma=float(args.step_lr_gamma)
+    )
+    elif args.lr_scheduler == 'none':
+        return NoneScheduler()
+
+def get_optimizer(args, params, lr):
+    if args.optimizer == 'sgd':
+        optimizer = optim.SGD(params, lr=lr, weight_decay=args.weight_decay)
+    elif args.optimizer == 'Adam':
+        optimizer = optim.Adam(params, lr=lr, weight_decay=args.weight_decay)
+    elif args.optimizer == 'amsgrad':
+        optimizer = optim.Adam(params, lr=lr, amsgrad=True, weight_decay=args.weight_decay)
+    return optimizer
+
+def set_up_optimizer_scheduler(hyperbolic, args, model, lr, lr_stie, pprint=True):
+    stiefel_params, euclidean_params = categorize_params(args)
+    #assert(len(list(model.parameters())) == len(stiefel_params) + len(euclidean_params))
+    optimizer = get_optimizer(args, euclidean_params, lr)
+    lr_scheduler = get_lr_scheduler(args, optimizer)
+    if len(stiefel_params) > 0:
+        stiefel_optimizer = get_stiefel_optimizer(args, stiefel_params, lr_stie)
+        stiefel_lr_scheduler = get_lr_scheduler(args, stiefel_optimizer)
+    else:
+        stiefel_optimizer, stiefel_lr_scheduler = None, None
+    return optimizer, lr_scheduler, stiefel_optimizer, stiefel_lr_scheduler
\ No newline at end of file
diff --git a/H2HGCN/utils/train_utils.py b/H2HGCN/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..71781f97d42cfae91a9caef4ada7b56ce7a4cbe1
--- /dev/null
+++ b/H2HGCN/utils/train_utils.py
@@ -0,0 +1,52 @@
+import os
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--c', type=int, default=args[1])
+    parser.add_argument('--num_layers', type=int, default=args[2])
+    parser.add_argument('--bias', type=bool, default=args[3])
+    parser.add_argument('--act', type=str, default=args[4])
+    parser.add_argument('--select_manifold', type=str, default=args[5])
+    parser.add_argument('--num_centroid', type=int, default=args[6])
+    parser.add_argument('--lr_stie', type=float, default=args[7])
+    parser.add_argument('--stie_vars', nargs='+', default=args[8])
+    parser.add_argument('--stiefel_optimizer', type=str, default=args[9])
+    parser.add_argument('--eucl_vars', nargs='+', default=args[10])
+    parser.add_argument('--grad_clip', type=float, default=args[11])
+    parser.add_argument('--optimizer', type=str, default=args[12])
+    parser.add_argument('--weight_decay', type=float, default=args[13])
+    parser.add_argument('--lr', type=float, default=args[14])
+    parser.add_argument('--lr_scheduler', type=str, default=args[15])
+    parser.add_argument('--lr_gamma', type=float, default=args[16])
+    parser.add_argument('--step_lr_gamma', type=float, default=args[17])
+    parser.add_argument('--step_lr_reduce_freq', type=int, default=args[18])
+    parser.add_argument('--proj_init', type=str, default=args[19])
+    parser.add_argument('--tie_weight', type=bool, default=args[20])
+    parser.add_argument('--cuda', type=int, default=args[21])
+    parser.add_argument('--epochs', type=int, default=args[22])
+    parser.add_argument('--min_epochs', type=int, default=args[23])
+    parser.add_argument('--patience', type=int, default=args[24])
+    parser.add_argument('--seed', type=int, default=args[25])
+    parser.add_argument('--log_freq', type=int, default=args[26])
+    parser.add_argument('--eval_freq', type=int, default=args[27])
+    parser.add_argument('--val_prop', type=float, default=args[28])
+    parser.add_argument('--test_prop', type=float, default=args[29])
+    parser.add_argument('--double_precision', type=int, default=args[30])
+    parser.add_argument('--dropout', type=float, default=args[31])
+    parser.add_argument('--normalize_adj', type=bool, default=args[32])
+    parser.add_argument('--normalize_feats', type=bool, default=args[33])
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
diff --git a/HGCAE/.gitignore b/HGCAE/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..bee8a64b79a99590d5303307144172cfe824fbf7
--- /dev/null
+++ b/HGCAE/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/HGCAE/__init__.py b/HGCAE/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9
--- /dev/null
+++ b/HGCAE/__init__.py
@@ -0,0 +1,2 @@
+from __future__ import print_function
+from __future__ import division
diff --git a/HGCAE/hgcae.py b/HGCAE/hgcae.py
new file mode 100644
index 0000000000000000000000000000000000000000..58a36cf05c6aecdef212428f987424cb52dc19b3
--- /dev/null
+++ b/HGCAE/hgcae.py
@@ -0,0 +1,199 @@
+from Ghypeddings.HGCAE.models.base_models import LPModel
+import logging
+import torch
+import numpy as np
+import os
+import time
+from Ghypeddings.HGCAE.utils.train_utils import get_dir_name, format_metrics
+from Ghypeddings.HGCAE.utils.data_utils import process_data
+from Ghypeddings.HGCAE.utils.train_utils import create_args , perform_task
+import Ghypeddings.HGCAE.optimizers as optimizers
+from Ghypeddings.HGCAE.utils.data_utils import sparse_mx_to_torch_sparse_tensor 
+
+class HGCAE(object):
+    def __init__(self, 
+                adj,
+                features,
+                labels,
+                dim,
+                hidden_dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='relu',
+                grad_clip=None,
+                optimizer='RiemannianAdam',
+                weight_decay=0.,
+                lr=0.01,
+                gamma=0.5,
+                lr_reduce_freq=500,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=0,
+                eval_freq=1,
+                val_prop=.5,
+                test_prop=0.3,
+                double_precision=0,
+                dropout=0.01,
+                lambda_rec=1.0,
+                lambda_lp=1.0,
+                num_dec_layers=2,
+                use_att= True,
+                att_type= 'sparse_adjmask_dist',
+                att_logit='tanh',
+                beta = 0,
+                classifier=None,
+                clusterer = None,
+                normalize_adj=True,
+                normalize_feats=True
+                ):
+        
+        self.args = create_args(dim,hidden_dim,c,num_layers,bias,act,grad_clip,optimizer,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,lambda_rec,lambda_lp,num_dec_layers,use_att,att_type,att_logit,beta,classifier,clusterer,normalize_adj,normalize_feats)
+
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        if(self.args.c == None):
+            self.args.c_trainable = 1
+            self.args.c = 1.0
+        
+        np.random.seed(self.args.seed)
+        torch.manual_seed(self.args.seed)
+
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+
+        if not self.args.lr_reduce_freq:
+            self.args.lr_reduce_freq = self.args.epochs
+
+        self.args.nb_false_edges = len(self.data['train_edges_false'])
+        self.args.nb_edges = len(self.data['train_edges'])
+        st0 = np.random.get_state()
+        self.args.np_seed = st0
+        np.random.set_state(self.args.np_seed)
+
+        for x, val in self.data.items():
+            if 'adj' in x:
+                self.data[x] = sparse_mx_to_torch_sparse_tensor(self.data[x])
+
+        self.model = LPModel(self.args)
+
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+
+        self.adj_train_enc = self.data['adj_train_enc']
+        self.optimizer = getattr(optimizers, self.args.optimizer)(params=self.model.parameters(), lr=self.args.lr,
+                                                        weight_decay=self.args.weight_decay)
+        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
+            self.optimizer,
+            step_size=int(self.args.lr_reduce_freq),
+            gamma=float(self.args.gamma)
+        )
+
+        self.best_emb = None
+
+
+
+    def fit(self):
+
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(f'Using: {self.args.device}')
+        logging.info(str(self.model))
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        real_losses = []
+
+        for epoch in range(self.args.epochs):
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train', epoch)
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.lr_scheduler.step()
+
+            real_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(real_losses[0])
+            elif (best_losses[-1] > real_losses[-1]):
+                best_losses.append(real_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+            with torch.no_grad():
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                           'lr: {}'.format(self.lr_scheduler.get_lr()[0]),
+                                           format_metrics(train_metrics, 'train'),
+                                           'time: {:.4f}s'.format(time.time() - t)
+                                           ]))
+                    
+                if (epoch + 1) % self.args.eval_freq == 0:
+                    self.model.eval()
+                    embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
+                    val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                    if (epoch + 1) % self.args.log_freq == 0:
+                        logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                    if self.model.has_improved(best_val_metrics, val_metrics):
+                        self.best_emb = embeddings
+                        best_val_metrics = val_metrics
+                        counter = 0
+                    else:
+                        counter += 1
+                        if counter == self.args.patience and epoch > self.args.min_epochs:
+                            logging.info("Early stopping")
+                            break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+
+        X = self.model.manifold.logmap0(self.best_emb,self.model.encoder.curvatures[-1]).cpu().detach().numpy()
+        y = self.data['labels'].reshape(-1,1)
+        acc,f1,recall,precision,roc_auc = perform_task(self.args, X,y)
+        
+        return {'real':real_losses,'best':best_losses},acc,f1,recall,precision,roc_auc , time.time() - t_total
+
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        data = self.model.manifold.logmap0(embeddings,self.model.encoder.curvatures[-1]).cpu().detach().numpy()
+        labels = self.data['labels'].reshape(-1,1)
+        acc,f1,recall,precision,roc_auc=perform_task(self.args,data,labels)
+        return val_metrics['loss'].item(),acc,f1,recall,precision,roc_auc
+
+                    
+    def save_embeddings(self,directory,prefix):
+        tb_embeddings_euc = self.model.manifold.logmap0(self.best_emb,self.model.encoder.curvatures[-1])
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].reshape(-1,1)))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].reshape(-1,1)))
+        hyp_file_path = os.path.join(directory,f'{prefix}_embeddings_hyp.csv')
+        euc_file_path = os.path.join(directory,f'{prefix}_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/HGCAE/layers/__init__.py b/HGCAE/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCAE/layers/att_layers.py b/HGCAE/layers/att_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..e99964c73d3eefa9a058c73d9bb5d53fa604839d
--- /dev/null
+++ b/HGCAE/layers/att_layers.py
@@ -0,0 +1,80 @@
+"""Attention layers (some modules are copied from https://github.com/Diego999/pyGAT.)"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+def HypAggAtt(in_features, manifold, dropout, act=None, att_type=None, att_logit=None, beta=0):
+    att_logit = get_att_logit(att_logit, att_type)
+    return GeometricAwareHypAggAtt(in_features, manifold, dropout, lambda x: x, att_logit=att_logit, beta=beta)
+
+class GeometricAwareHypAggAtt(nn.Module):
+    def __init__(self, in_features, manifold, dropout, act, att_logit=torch.tanh, beta=0.):
+        super(GeometricAwareHypAggAtt, self).__init__()
+        self.dropout = dropout
+        self.att_logit=att_logit
+        self.special_spmm = SpecialSpmm()
+
+
+        self.m = manifold
+        self.beta = nn.Parameter(torch.Tensor([1e-6]))
+        self.con = nn.Parameter(torch.Tensor([1e-6]))
+        self.act = act
+        self.in_features = in_features
+
+    def forward (self, x, adj, c=1):
+        n = x.size(0)
+        edge = adj._indices()
+
+        assert not torch.isnan(self.beta).any()
+        edge_h = self.beta * self.m.sqdist(x[edge[0, :], :], x[edge[1, :], :], c) + self.con
+
+        self.edge_h = edge_h
+        assert not torch.isnan(edge_h).any()
+        edge_e = self.att_logit(edge_h)
+        self.edge_e = edge_e
+        ones = torch.ones(size=(n, 1))
+        if x.is_cuda:
+            ones = ones.to(x.device)
+        e_rowsum = self.special_spmm(edge, abs(edge_e), torch.Size([n, n]), ones) + 1e-10
+
+        return edge_e, e_rowsum
+
+class SpecialSpmmFunction(torch.autograd.Function):
+    """Special function for only sparse region backpropataion layer."""
+    # generate sparse matrix from `indicex, values, shape` and matmul with b
+    # Previously, `AXW` computing did not need bp to `A`.
+    # To trian attention of `A`, now bp through sparse matrix needed.
+    @staticmethod
+    def forward(ctx, indices, values, shape, b):
+        assert indices.requires_grad == False
+        a = torch.sparse_coo_tensor(indices, values, shape, device=b.device) # make sparse matrix shaped of `NxN` 
+        ctx.save_for_backward(a, b) # save sparse matrix for bp
+        ctx.N = shape[0] # number of nodes
+        return torch.matmul(a, b)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        assert not torch.isnan(grad_output).any()
+
+        # grad_output : Nxd  gradient
+        # a : NxN adj(attention) matrix, b: Nxd node feature
+        a, b = ctx.saved_tensors
+        grad_values = grad_b = None
+        if ctx.needs_input_grad[1]:
+            grad_a_dense = grad_output.matmul(b.t())
+            edge_idx = a._indices()[0, :] * ctx.N + a._indices()[1, :] # flattening (x,y) --> nx + y
+            grad_values = grad_a_dense.view(-1)[edge_idx]
+        if ctx.needs_input_grad[3]:
+            grad_b = a.t().matmul(grad_output)
+        return None, grad_values, None, grad_b
+
+
+class SpecialSpmm(nn.Module):
+    def forward(self, indices, values, shape, b):
+        return SpecialSpmmFunction.apply(indices, values, shape, b)
+
+def get_att_logit(att_logit, att_type):
+    if att_logit:
+        att_logit = getattr(torch, att_logit)
+    return att_logit
diff --git a/HGCAE/layers/hyp_layers.py b/HGCAE/layers/hyp_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c19b24150a0992f06790f510e4da5d1dcd516d50
--- /dev/null
+++ b/HGCAE/layers/hyp_layers.py
@@ -0,0 +1,232 @@
+"""
+Hyperbolic layers.
+Major codes of hyperbolic layers are from HGCN
+"""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+from Ghypeddings.HGCAE.layers.att_layers import HypAggAtt, SpecialSpmm
+
+
+def get_dim_act_curv(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+
+    dims = [args.feat_dim]
+    # Check layer_num and hdden_dim match
+    if args.num_layers > 1:
+        hidden_dim = [args.hidden_dim for _ in range(args.num_layers -1)]
+        if args.num_layers != len(hidden_dim) + 1:
+            raise RuntimeError('Check dimension hidden:{}, num_layers:{}'.format(args.hidden_dim, args.num_layers) )
+        dims = dims + hidden_dim
+
+    dims += [args.dim]
+    acts += [act]
+    n_curvatures = args.num_layers
+    if args.c_trainable == 1: # NOTE : changed from # if args.c is None:
+        # create list of trainable curvature parameters
+        curvatures = [nn.Parameter(torch.Tensor([args.c]).to(args.device)) for _ in range(n_curvatures)]
+    else:
+        # fixed curvature
+        curvatures = [torch.tensor([args.c]) for _ in range(n_curvatures)]
+        if not args.cuda == -1:
+            curvatures = [curv.to(args.device) for curv in curvatures]
+    return dims, acts, curvatures
+
+
+
+class HNNLayer(nn.Module):
+    """
+    Hyperbolic neural networks layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias):
+        super(HNNLayer, self).__init__()
+        self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias)
+        self.hyp_act = HypAct(manifold, c_in, c_out, act)
+
+    def forward(self, x):
+        h = self.linear.forward(x)
+        h = self.hyp_act.forward(h)
+        return h
+
+
+class HyperbolicGraphConvolution(nn.Module):
+    """
+    Hyperbolic graph convolution layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias, use_att,
+            att_type='sparse_adjmask_dist', att_logit=torch.exp, beta=0., decode=False):
+        super(HyperbolicGraphConvolution, self).__init__()
+        self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias)
+        self.agg = HypAgg(manifold, c_in, use_att, out_features, dropout, att_type=att_type, att_logit=att_logit, beta=beta, decode=decode)
+        self.hyp_act = HypAct(manifold, c_in, c_out, act)
+        self.decode = decode
+
+    def forward(self, input):
+        x, adj = input
+        assert not torch.isnan(self.hyp_act.c_in).any()
+        self.hyp_act.c_in.data = torch.clamp_min(self.hyp_act.c_in,1e-12)
+        if self.hyp_act.c_out:
+            assert not torch.isnan(self.hyp_act.c_out).any()
+            self.hyp_act.c_out.data = torch.clamp_min(self.hyp_act.c_out,1e-12)
+        assert not torch.isnan(x).any()
+        h = self.linear.forward(x)
+        assert not torch.isnan(h).any()
+        h = self.agg.forward(h, adj, prev_x=x)
+        assert not torch.isnan(h).any()
+        h = self.hyp_act.forward(h)
+        assert not torch.isnan(h).any()
+        output = h, adj
+        return output
+
+
+class HypLinear(nn.Module):
+    """
+    Hyperbolic linear layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c, dropout, use_bias):
+        super(HypLinear, self).__init__()
+        self.manifold = manifold
+        self.in_features = in_features
+        self.out_features = out_features
+        self.c = c
+        self.dropout = dropout
+        self.use_bias = use_bias
+        # self.bias = nn.Parameter(torch.Tensor(out_features))
+        self.bias = nn.Parameter(torch.Tensor(1, out_features))
+        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        init.xavier_uniform_(self.weight, gain=math.sqrt(2))
+        init.constant_(self.bias, 0)
+
+    def forward(self, x):
+        drop_weight = F.dropout(self.weight, self.dropout, training=self.training)
+        mv = self.manifold.mobius_matvec(drop_weight, x, self.c)
+        res = self.manifold.proj(mv, self.c)
+        if self.use_bias: 
+            bias = self.bias
+            hyp_bias = self.manifold.expmap0(bias, self.c)
+            hyp_bias = self.manifold.proj(hyp_bias, self.c)
+            res = self.manifold.mobius_add(res, hyp_bias, c=self.c)
+            res = self.manifold.proj(res, self.c)
+        return res
+
+    def extra_repr(self):
+        return 'in_features={}, out_features={}, c={}'.format(
+                self.in_features, self.out_features, self.c
+        )
+
+
+class HypAgg(Module):
+    """
+    Hyperbolic aggregation layer.
+    """
+
+    def __init__(self, manifold, c, use_att, in_features, dropout, att_type='sparse_adjmask_dist', att_logit=None, beta=0, decode=False):
+        super(HypAgg, self).__init__()
+        self.manifold = manifold
+        self.c = c
+        self.use_att = use_att
+
+        self.in_features = in_features
+        self.dropout = dropout
+        if use_att:
+            self.att = HypAggAtt(in_features, manifold, dropout, act=None, att_type=att_type, att_logit=att_logit, beta=beta)
+            self.att_type = att_type
+
+            self.special_spmm = SpecialSpmm()
+        self.decode = decode
+
+    def forward(self, x, adj, prev_x=None):
+
+        if self.use_att:
+            dist = 'dist' in self.att_type
+            if dist:
+                if 'sparse' in self.att_type:
+                    if self.decode:
+                        # NOTE : AGG(prev_x)
+                        edge_e, e_rowsum = self.att(prev_x, adj, self.c) # SparseAtt
+                    else:
+                        # NOTE : AGG(x)
+                        edge_e, e_rowsum = self.att(x, adj, self.c) # SparseAtt
+                    self.edge_e = edge_e
+                    self.e_rowsum = e_rowsum
+                    ## SparseAtt
+                    x_tangent = self.manifold.logmap0(x, c=self.c)
+                    N = x.size()[0]
+                    edge = adj._indices()
+                    support_t = self.special_spmm(edge, edge_e, torch.Size([N, N]), x_tangent) 
+                    assert not torch.isnan(support_t).any()
+                    support_t = support_t.div(e_rowsum)
+                    assert not torch.isnan(support_t).any()
+                    output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+                else:
+                    adj = self.att(x, adj, self.c) # DenseAtt
+                    x_tangent = self.manifold.logmap0(x, c=self.c)
+                    support_t = torch.spmm(adj, x_tangent)
+                    output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+            else:
+                ## MLP attention
+                x_tangent = self.manifold.logmap0(x, c=self.c)
+                adj = self.att(x_tangent, adj)
+                support_t = torch.spmm(adj, x_tangent)
+                output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+        else:
+            x_tangent = self.manifold.logmap0(x, c=self.c)
+            support_t = torch.spmm(adj, x_tangent)
+            output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+
+        return output
+
+    def extra_repr(self):
+        return 'c={}, use_att={}, decode={}'.format(
+                self.c, self.use_att, self.decode
+        )
+
+
+class HypAct(Module):
+    """
+    Hyperbolic activation layer.
+    """
+
+    def __init__(self, manifold, c_in, c_out, act):
+        super(HypAct, self).__init__()
+        self.manifold = manifold
+        self.c_in = c_in
+        self.c_out = c_out
+        self.act = act
+
+    def forward(self, x):
+        if self.manifold.name == 'PoincareBall':
+            if self.c_out:
+                xt = self.manifold.activation(x, self.act, self.c_in, self.c_out)
+                return xt
+            else:
+                xt = self.manifold.logmap0(x, c=self.c_in)
+                return xt
+        else:
+            NotImplementedError("not implemented")
+
+    def extra_repr(self):
+        return 'Manifold={},\n c_in={},\n act={},\n c_out={}'.format(
+                self.manifold.name, self.c_in, self.act.__name__, self.c_out
+        )
diff --git a/HGCAE/layers/layers.py b/HGCAE/layers/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..d17b37d1f62390e982b94ee4e5450d8a5a0bf632
--- /dev/null
+++ b/HGCAE/layers/layers.py
@@ -0,0 +1,68 @@
+"""Euclidean layers."""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+
+def get_dim_act(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+
+    dims = [args.feat_dim]
+    if args.num_layers > 1:
+        # Check layer_num and hdden_dim match
+        hidden_dim = [int(h) for h in args.hidden_dim.split(',')]
+        if args.num_layers != len(hidden_dim) + 1:
+            raise RuntimeError('Check dimension hidden:{}, num_laysers:{}'.format(args.hidden_dim, args.num_layers) )
+        dims = dims + hidden_dim
+
+    dims += [args.dim]
+    acts += [act]
+    return dims, acts
+
+
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
+
+'''
+InnerProductDecdoer implemntation from:
+https://github.com/zfjsail/gae-pytorch/blob/master/gae/model.py
+'''
+class InnerProductDecoder(nn.Module):
+    """Decoder for using inner product for prediction."""
+
+    def __init__(self, dropout=0, act=torch.sigmoid):
+        super(InnerProductDecoder, self).__init__()
+        self.dropout = dropout
+        self.act = act
+
+    def forward(self, emb_in, emb_out):
+        cos_dist = emb_in * emb_out
+        probs = self.act(cos_dist.sum(1))
+        return probs
diff --git a/HGCAE/manifolds/__init__.py b/HGCAE/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed1d71769d7412c7f0fa82c9425ee7fe449e9567
--- /dev/null
+++ b/HGCAE/manifolds/__init__.py
@@ -0,0 +1,7 @@
+'''
+Major codes of hyperbolic layers are from HGCN
+Refer Lorentz implementation from HGCN if you need.
+'''
+from Ghypeddings.HGCAE.manifolds.base import ManifoldParameter
+from Ghypeddings.HGCAE.manifolds.euclidean import Euclidean
+from Ghypeddings.HGCAE.manifolds.poincare import PoincareBall
diff --git a/HGCAE/manifolds/base.py b/HGCAE/manifolds/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..805edd678d9e768f22a0dce3a6691bf8556ed53d
--- /dev/null
+++ b/HGCAE/manifolds/base.py
@@ -0,0 +1,84 @@
+'''
+Major codes of hyperbolic layers are from HGCN
+'''
+from torch.nn import Parameter
+
+class Manifold(object):
+    """
+    Abstract class to define operations on a manifold.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.eps = 10e-8
+
+    def sqdist(self, p1, p2, c):
+        """Squared distance between pairs of points."""
+        raise NotImplementedError
+
+    def egrad2rgrad(self, p, dp, c):
+        """Converts Euclidean Gradient to Riemannian Gradients."""
+        raise NotImplementedError
+
+    def proj(self, p, c):
+        """Projects point p on the manifold."""
+        raise NotImplementedError
+
+    def proj_tan(self, u, p, c):
+        """Projects u on the tangent space of p."""
+        raise NotImplementedError
+
+    def proj_tan0(self, u, c):
+        """Projects u on the tangent space of the origin."""
+        raise NotImplementedError
+
+    def expmap(self, u, p, c):
+        """Exponential map of u at point p."""
+        raise NotImplementedError
+
+    def logmap(self, p1, p2, c):
+        """Logarithmic map of point p1 at point p2."""
+        raise NotImplementedError
+
+    def expmap0(self, u, c):
+        """Exponential map of u at the origin."""
+        raise NotImplementedError
+
+    def logmap0(self, p, c):
+        """Logarithmic map of point p at the origin."""
+        raise NotImplementedError
+
+    def mobius_add(self, x, y, c, dim=-1):
+        """Adds points x and y."""
+        raise NotImplementedError
+
+    def mobius_matvec(self, m, x, c):
+        """Performs hyperboic martrix-vector multiplication."""
+        raise NotImplementedError
+
+    def init_weights(self, w, c, irange=1e-5):
+        """Initializes random weigths on the manifold."""
+        raise NotImplementedError
+
+    def inner(self, p, c, u, v=None):
+        """Inner product for tangent vectors at point x."""
+        raise NotImplementedError
+
+    def ptransp(self, x, y, u, c):
+        """Parallel transport of u from x to y."""
+        raise NotImplementedError
+
+
+class ManifoldParameter(Parameter):
+    """
+    Subclass of torch.nn.Parameter for Riemannian optimization.
+    """
+    def __new__(cls, data, requires_grad, manifold, c):
+        return Parameter.__new__(cls, data, requires_grad)
+
+    def __init__(self, data, requires_grad, manifold, c):
+        self.c = c
+        self.manifold = manifold
+
+    def __repr__(self):
+        return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__()
diff --git a/HGCAE/manifolds/euclidean.py b/HGCAE/manifolds/euclidean.py
new file mode 100644
index 0000000000000000000000000000000000000000..c102023b24eebc91053be85984a8a295166e8c41
--- /dev/null
+++ b/HGCAE/manifolds/euclidean.py
@@ -0,0 +1,66 @@
+'''
+Major codes of hyperbolic layers are from HGCN
+'''
+import torch
+from Ghypeddings.HGCAE.manifolds.base import Manifold
+
+
+class Euclidean(Manifold):
+    """
+    Euclidean Manifold class.
+    """
+
+    def __init__(self):
+        super(Euclidean, self).__init__()
+        self.name = 'Euclidean'
+
+    def normalize(self, p):
+        dim = p.size(-1)
+        p_norm = torch.renorm(p, 2, 0, 1.)
+        return p_norm
+
+    def sqdist(self, p1, p2, c):
+        return (p1 - p2).pow(2).sum(dim=-1)
+
+    def egrad2rgrad(self, p, dp, c):
+        return dp
+
+    def proj(self, p, c):
+        return p
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        return p + u
+
+    def logmap(self, p1, p2, c):
+        return p2 - p1
+
+    def expmap0(self, u, c):
+        return u
+
+    def logmap0(self, p, c):
+        return p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        return x + y
+
+    def mobius_matvec(self, m, x, c):
+        mx = x @ m.transpose(-1, -2)
+        return mx
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        return (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, v, c):
+        return v
diff --git a/HGCAE/manifolds/poincare.py b/HGCAE/manifolds/poincare.py
new file mode 100644
index 0000000000000000000000000000000000000000..df06e38afaca4063ce8975527f286fb2397d8956
--- /dev/null
+++ b/HGCAE/manifolds/poincare.py
@@ -0,0 +1,136 @@
+'''
+Major codes of hyperbolic layers are from HGCN
+'''
+import torch
+from Ghypeddings.HGCAE.manifolds.base import Manifold
+from torch.autograd import Function
+from Ghypeddings.HGCAE.utils.math_utils import artanh, tanh
+
+
+class PoincareBall(Manifold):
+    """
+    PoicareBall Manifold class.
+
+    We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c
+
+    Note that 1/sqrt(c) is the Poincare ball radius.
+
+    """
+
+    def __init__(self, ):
+        super(PoincareBall, self).__init__()
+        self.name = 'PoincareBall'
+        self.min_norm = 1e-15
+        self.eps = {torch.float32: 4e-3, torch.float64: 1e-5}
+
+    def sqdist(self, p1, p2, c):
+        sqrt_c = c ** 0.5
+        dist_c = artanh(
+            sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False)
+        )
+        dist = dist_c * 2 / sqrt_c
+        return dist ** 2
+
+    def _lambda_x(self, x, c):
+        x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True)
+        return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm)
+
+    def egrad2rgrad(self, p, dp, c):
+        lambda_p = self._lambda_x(p, c)
+        dp /= lambda_p.pow(2)
+        return dp
+
+    def proj(self, x, c):
+        norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm)
+        maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5)
+        cond = norm > maxnorm
+        projected = x / norm * maxnorm
+        return torch.where(cond, projected, x)
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        sqrt_c = c ** 0.5
+        u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        second_term = (
+                tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm)
+                * u
+                / (sqrt_c * u_norm)
+        )
+        gamma_1 = self.mobius_add(p, second_term, c)
+        return gamma_1
+
+    def logmap(self, p1, p2, c):
+        sub = self.mobius_add(-p1, p2, c)
+        sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        lam = self._lambda_x(p1, c)
+        sqrt_c = c ** 0.5
+        return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm
+
+    def expmap0(self, u, c):
+        sqrt_c = c ** 0.5
+        u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm)
+        gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
+        return gamma_1
+
+    def logmap0(self, p, c):
+        sqrt_c = c ** 0.5
+        p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm
+        return scale * p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        x2 = x.pow(2).sum(dim=dim, keepdim=True)
+        y2 = y.pow(2).sum(dim=dim, keepdim=True)
+        xy = (x * y).sum(dim=dim, keepdim=True)
+        num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y
+        denom = 1 + 2 * c * xy + c ** 2 * x2 * y2
+        return num / denom.clamp_min(self.min_norm)
+
+    def mobius_matvec(self, m, x, c):
+        sqrt_c = c ** 0.5
+        x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        mx = x @ m.transpose(-1, -2)
+        mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c)
+        cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8)
+        res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device)
+        res = torch.where(cond, res_0, res_c)
+        return res
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def _gyration(self, u, v, w, c, dim: int = -1):
+        u2 = u.pow(2).sum(dim=dim, keepdim=True)
+        v2 = v.pow(2).sum(dim=dim, keepdim=True)
+        uv = (u * v).sum(dim=dim, keepdim=True)
+        uw = (u * w).sum(dim=dim, keepdim=True)
+        vw = (v * w).sum(dim=dim, keepdim=True)
+        c2 = c ** 2
+        a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw
+        b = -c2 * vw * u2 - c * uw
+        d = 1 + 2 * c * uv + c2 * u2 * v2
+        return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm)
+
+    def inner(self, x, c, u, v=None, keepdim=False, dim=-1):
+        if v is None:
+            v = u
+        lambda_x = self._lambda_x(x, c)
+        return lambda_x ** 2 * (u * v).sum(dim=dim, keepdim=keepdim)
+
+    def ptransp(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def activation(self, x, act, c_in, c_out):
+        x_act = act(x)
+        x_prev = self.logmap0(x_act, c_in)
+        x_next = self.expmap0(x_prev, c_out)
+        return x_next
diff --git a/HGCAE/models/__init__.py b/HGCAE/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCAE/models/base_models.py b/HGCAE/models/base_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d10fd9f134e8ffe44b5c5e58975f31723a081cd
--- /dev/null
+++ b/HGCAE/models/base_models.py
@@ -0,0 +1,200 @@
+import Ghypeddings.HGCAE.models.encoders as encoders
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGCAE.models.decoders import model2decoder
+from Ghypeddings.HGCAE.layers.layers import  InnerProductDecoder
+from sklearn.metrics import roc_auc_score, average_precision_score
+from Ghypeddings.HGCAE.utils.eval_utils import acc_f1
+from sklearn import cluster
+from sklearn.metrics import accuracy_score, normalized_mutual_info_score, adjusted_rand_score
+import Ghypeddings.HGCAE.manifolds as manifolds
+import Ghypeddings.HGCAE.models.encoders as encoders
+
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.manifold_name = "PoincareBall"
+        if args.c is not None:
+            self.c = torch.tensor([args.c])
+            if not args.cuda == -1:
+                self.c = self.c.to(args.device)
+        else:
+            self.c = nn.Parameter(torch.Tensor([1.]))
+        self.manifold = getattr(manifolds, self.manifold_name)()
+        self.nnodes = args.n_nodes
+        self.n_classes = args.n_classes
+        self.encoder = getattr(encoders, "HGCAE")(self.c, args)
+        self.num_layers=args.num_layers
+
+        # Embedding c
+        self.hyperbolic_embedding = True if args.use_att else False
+        self.decoder_type = 'InnerProductDecoder'
+        self.dc = InnerProductDecoder(dropout=0, act=torch.sigmoid)
+
+
+    def encode(self, x, adj):
+        h = self.encoder.encode(x, adj)
+        return h
+
+    def pred_link_score(self, h, idx):  # for LP,REC 
+        emb_in = h[idx[:, 0], :]
+        emb_out = h[idx[:, 1], :]
+        probs = self.dc.forward(emb_in, emb_out)
+
+        return probs
+
+    def decode(self, h, adj, idx): # REC
+        output = self.decoder.decode(h, adj)
+        return output
+
+
+    def eval_cluster(self, embeddings, data, split):
+        if self.hyperbolic_embedding:
+            emb_c = self.encoder.layers[-1].hyp_act.c_out
+            embeddings = self.manifold.logmap0(embeddings.to(emb_c.device), c=emb_c).cpu()
+
+        idx = data[f'idx_{split}']
+        n_classes = self.n_classes
+
+        embeddings_to_cluster = embeddings[idx].detach().cpu().numpy()
+        # gt_label = data['labels'][idx].cpu().numpy()
+        gt_label = data['labels']
+
+        kmeans = cluster.KMeans(n_clusters=n_classes, algorithm='auto')
+        kmeans.fit(embeddings_to_cluster)
+        pred_label = kmeans.fit_predict(embeddings_to_cluster)
+
+        from munkres import Munkres
+        def best_map(L1,L2):
+            #L1 should be the groundtruth labels and L2 should be the clustering labels we got
+            Label1 = np.unique(L1)
+            nClass1 = len(Label1)
+            Label2 = np.unique(L2)
+            nClass2 = len(Label2)
+            nClass = np.maximum(nClass1,nClass2)
+            G = np.zeros((nClass,nClass))
+            for i in range(nClass1):
+                ind_cla1 = L1 == Label1[i]
+                ind_cla1 = ind_cla1.astype(float)
+                for j in range(nClass2):
+                    ind_cla2 = L2 == Label2[j]
+                    ind_cla2 = ind_cla2.astype(float)
+                    G[i,j] = np.sum(ind_cla2 * ind_cla1)
+            m = Munkres()
+            index = m.compute(-G.T)
+            index = np.array(index)
+            c = index[:,1]
+            newL2 = np.zeros(L2.shape)
+            for i in range(nClass2):
+                newL2[L2 == Label2[i]] = Label1[c[i]]
+            return newL2
+
+
+        def err_rate(gt_s, s):
+            c_x = best_map(gt_s, s)
+            err_x = np.sum(gt_s[:] !=c_x[:])
+            missrate = err_x.astype(float) / (gt_s.shape[0])
+            return missrate
+
+
+        acc = 1-err_rate(gt_label, pred_label)
+        # acc = accuracy_score(gt_label, pred_label)
+        nmi = normalized_mutual_info_score(gt_label, pred_label, average_method='arithmetic')
+        ari = adjusted_rand_score(gt_label, pred_label)
+    
+        metrics = { 'cluster_acc': acc, 'nmi': nmi, 'ari': ari}
+        return metrics, pred_label
+
+
+    def compute_metrics(self, embeddings, data, split, epoch=None):
+        raise NotImplementedError
+
+    def init_metric_dict(self):
+        raise NotImplementedError
+
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+
+class LPModel(BaseModel):
+    """
+    Base model for link prediction task.
+    """
+
+    def __init__(self, args):
+        super(LPModel, self).__init__(args)
+        self.nb_false_edges = args.nb_false_edges
+        self.positive_edge_samplig = True
+        if self.positive_edge_samplig:
+            self.nb_edges = min(args.nb_edges, 5000) # NOTE : be-aware too dense edges
+        else:
+            self.nb_edges = args.nb_edges
+
+        if args.lambda_rec > 0:
+            self.num_dec_layers = args.num_dec_layers
+            self.lambda_rec = args.lambda_rec
+            c = self.encoder.curvatures if hasattr(self.encoder, 'curvatures') else args.c ### handle HNN
+            self.decoder = model2decoder(c, args, 'rec')
+        else:
+            self.lambda_rec = 0
+            
+        if args.lambda_lp > 0:
+            self.lambda_lp = args.lambda_lp
+        else:
+            self.lambda_lp = 0
+
+    def compute_metrics(self, embeddings, data, split, epoch=None):
+        if split == 'train':
+            num_true_edges = data[f'{split}_edges'].shape[0]
+            if self.positive_edge_samplig and num_true_edges > self.nb_edges:
+                edges_true = data[f'{split}_edges'][np.random.randint(0, num_true_edges, self.nb_edges)]
+            else:
+                edges_true = data[f'{split}_edges']
+            edges_false = data[f'{split}_edges_false'][np.random.randint(0, self.nb_false_edges, self.nb_edges)]
+        else:
+            edges_true = data[f'{split}_edges']
+            edges_false = data[f'{split}_edges_false']
+
+        pos_scores = self.pred_link_score(embeddings, edges_true)
+        neg_scores = self.pred_link_score(embeddings, edges_false)
+        assert not torch.isnan(pos_scores).any()
+        assert not torch.isnan(neg_scores).any()
+        loss = F.binary_cross_entropy(pos_scores, torch.ones_like(pos_scores))
+        loss += F.binary_cross_entropy(neg_scores, torch.zeros_like(neg_scores))
+        if pos_scores.is_cuda:
+            pos_scores = pos_scores.cpu()
+            neg_scores = neg_scores.cpu()
+        labels = [1] * pos_scores.shape[0] + [0] * neg_scores.shape[0]
+        preds = list(pos_scores.data.numpy()) + list(neg_scores.data.numpy())
+        roc = roc_auc_score(labels, preds)
+        ap = average_precision_score(labels, preds)
+        metrics = {'loss': loss, 'roc': roc, 'ap': ap}
+
+        assert not torch.isnan(loss).any()
+        if self.lambda_rec:
+            idx = data['idx_all']
+            recon = self.decode(embeddings, data['adj_train_dec'], idx) ## NOTE : adj
+            assert not torch.isnan(recon).any()
+            if self.num_dec_layers == self.num_layers:
+                target = data['features'][idx]
+            elif self.num_dec_layers == self.num_layers - 1: 
+                target = self.encoder.features[0].detach()[idx]
+            else:
+                raise RuntimeError('num_dec_layers only support 1,2')
+            loss_rec = self.lambda_rec * torch.nn.functional.mse_loss(recon[idx], target , reduction='mean')
+            assert not torch.isnan(loss_rec).any()
+            loss_lp = loss * self.lambda_lp
+            metrics.update({'loss': loss_lp + loss_rec, 'loss_rec': loss_rec, 'loss_lp': loss_lp})
+
+        return metrics
+
+    def init_metric_dict(self):
+        return {'roc': -1, 'ap': -1}
+
+    def has_improved(self, m1, m2):
+        return 0.5 * (m1['roc'] + m1['ap']) < 0.5 * (m2['roc'] + m2['ap'])
diff --git a/HGCAE/models/decoders.py b/HGCAE/models/decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..72c46942397dc97d00f1980e6569399e2792a644
--- /dev/null
+++ b/HGCAE/models/decoders.py
@@ -0,0 +1,106 @@
+"""Graph decoders."""
+import Ghypeddings.HGCAE.manifolds as manifolds
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+import torch
+
+
+class Decoder(nn.Module):
+    """
+    Decoder abstract class
+    """
+
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+
+    def classify(self, x, adj):
+        '''
+        output
+        - nc : probs 
+        - rec : input_feat
+        '''
+        if self.decode_adj:
+            input = (x, adj)
+            output, _ = self.classifier.forward(input)
+        else:
+            output = self.classifier.forward(x)
+        return output
+
+
+    def decode(self, x, adj):
+        '''
+        output
+        - nc : probs 
+        - rec : input_feat
+        '''
+        if self.decode_adj:
+            input = (x, adj)
+            output, _ = self.decoder.forward(input)
+        else:
+            output = self.decoder.forward(x)
+        return output
+
+
+
+import Ghypeddings.HGCAE.layers.hyp_layers as hyp_layers
+class HGCAEDecoder(Decoder):
+    """
+    Decoder for HGCAE
+    """
+
+    def __init__(self, c, args, task):
+        super(HGCAEDecoder, self).__init__(c)
+        self.manifold = getattr(manifolds, 'PoincareBall')()
+    
+        assert args.num_layers > 0
+
+        dims, acts, _ = hyp_layers.get_dim_act_curv(args)
+        dims = dims[::-1]
+        acts = acts[::-1][:-1] + [lambda x: x] # Last layer without act
+        self.curvatures = self.c[::-1]
+
+        encdec_share_curvature = False
+        if not encdec_share_curvature and args.num_layers == args.num_dec_layers: # do not share and enc-dec mirror-shape
+            num_c = len(self.curvatures)
+            self.curvatures = self.curvatures[:1] 
+            if args.c_trainable == 1:
+                self.curvatures += [nn.Parameter(torch.Tensor([args.c]).to(args.device))] * (num_c - 1)
+            else:
+                self.curvatures += [torch.tensor([args.c])] * (num_c - 1)
+                if not args.cuda == -1:
+                    self.curvatures = [curv.to(args.device) for curv in self.curvatures]
+
+
+        self.curvatures = self.curvatures[:-1] + [None]
+
+
+        hgc_layers = []
+        num_dec_layers = args.num_dec_layers
+        for i in range(num_dec_layers):
+            c_in, c_out = self.curvatures[i], self.curvatures[i + 1]
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            hgc_layers.append(
+                hyp_layers.HyperbolicGraphConvolution(
+                        self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att,
+                        att_type=args.att_type, att_logit=args.att_logit, beta=args.beta, decode=True
+                )
+            )
+
+        self.decoder = nn.Sequential(*hgc_layers)
+        self.decode_adj = True
+
+    # NOTE : self.c is fixed, not trainable
+    def classify(self, x, adj):
+        h = self.manifold.logmap0(x, c=self.c)
+        return super(HGCAEDecoder, self).classify(h, adj)
+    
+    def decode(self, x, adj):
+        output = super(HGCAEDecoder, self).decode(x, adj)
+        return output
+
+model2decoder = HGCAEDecoder
+
diff --git a/HGCAE/models/encoders.py b/HGCAE/models/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..6dfdc0f984d4018035118bd4b09b88900bcb0976
--- /dev/null
+++ b/HGCAE/models/encoders.py
@@ -0,0 +1,64 @@
+"""Graph encoders."""
+import Ghypeddings.HGCAE.manifolds as manifolds
+import Ghypeddings.HGCAE.layers.hyp_layers as hyp_layers
+import torch
+import torch.nn as nn
+
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c, use_cnn=None):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x, adj):
+        self.features = []
+        if self.encode_graph:
+            input = (x, adj)
+            xx = input
+            for i in range(len(self.layers)):
+                out = self.layers[i].forward(xx)
+                self.features.append(out[0])
+                xx = out
+            output , _ = xx
+        else:
+            output = self.layers.forward(x)
+        return output
+
+class HGCAE(Encoder):
+    """
+    Hyperbolic Graph Convolutional Auto-Encoders.
+    """
+
+    def __init__(self, c, args): #, use_cnn
+        super(HGCAE, self).__init__(c, use_cnn=True)
+        self.manifold = getattr(manifolds, "PoincareBall")()
+        assert args.num_layers > 0 
+        dims, acts, self.curvatures = hyp_layers.get_dim_act_curv(args)
+        if args.c_trainable == 1: 
+            self.curvatures.append(nn.Parameter(torch.Tensor([args.c]).to(args.device)))
+        else:
+            self.curvatures.append(torch.tensor([args.c]).to(args.device)) 
+        hgc_layers = []
+        for i in range(len(dims) - 1):
+            c_in, c_out = self.curvatures[i], self.curvatures[i + 1]
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+
+            hgc_layers.append(
+                    hyp_layers.HyperbolicGraphConvolution(
+                            self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att,
+                            att_type=args.att_type, att_logit=args.att_logit, beta=args.beta
+                    )
+            )
+        self.layers = nn.Sequential(*hgc_layers)
+        self.encode_graph = True
+
+    def encode(self, x, adj):
+        x_hyp = self.manifold.proj(
+                self.manifold.expmap0(self.manifold.proj_tan0(x, self.curvatures[0]), c=self.curvatures[0]),
+                c=self.curvatures[0])
+        return super(HGCAE, self).encode(x_hyp, adj)
diff --git a/HGCAE/optimizers/__init__.py b/HGCAE/optimizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1198f3d759b39b51aedfe5b2d92f068151a0fe7
--- /dev/null
+++ b/HGCAE/optimizers/__init__.py
@@ -0,0 +1,2 @@
+from torch.optim import Adam
+from Ghypeddings.HGCAE.optimizers.radam import RiemannianAdam
diff --git a/HGCAE/optimizers/radam.py b/HGCAE/optimizers/radam.py
new file mode 100644
index 0000000000000000000000000000000000000000..b48cb6fe6f1a66a8b2103a49b207485a143df1f8
--- /dev/null
+++ b/HGCAE/optimizers/radam.py
@@ -0,0 +1,175 @@
+"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/)."""
+import torch.optim
+from Ghypeddings.HGCAE.manifolds import Euclidean,ManifoldParameter
+
+_default_manifold = Euclidean()
+
+
+class OptimMixin(object):
+    def __init__(self, *args, stabilize=None, **kwargs):
+        self._stabilize = stabilize
+        super().__init__(*args, **kwargs)
+
+    def stabilize_group(self, group):
+        pass
+
+    def stabilize(self):
+        """Stabilize parameters if they are off-manifold due to numerical reasons
+        """
+        for group in self.param_groups:
+            self.stabilize_group(group)
+
+
+def copy_or_set_(dest, source):
+    """
+    A workaround to respect strides of :code:`dest` when copying :code:`source`
+    (https://github.com/geoopt/geoopt/issues/70)
+    Parameters
+    ----------
+    dest : torch.Tensor
+        Destination tensor where to store new data
+    source : torch.Tensor
+        Source data to put in the new tensor
+    Returns
+    -------
+    dest
+        torch.Tensor, modified inplace
+    """
+    if dest.stride() != source.stride():
+        return dest.copy_(source)
+    else:
+        return dest.set_(source)
+
+
+class RiemannianAdam(OptimMixin, torch.optim.Adam):
+    r"""Riemannian Adam with the same API as :class:`torch.optim.Adam`
+    Parameters
+    ----------
+    params : iterable
+        iterable of parameters to optimize or dicts defining
+        parameter groups
+    lr : float (optional)
+        learning rate (default: 1e-3)
+    betas : Tuple[float, float] (optional)
+        coefficients used for computing
+        running averages of gradient and its square (default: (0.9, 0.999))
+    eps : float (optional)
+        term added to the denominator to improve
+        numerical stability (default: 1e-8)
+    weight_decay : float (optional)
+        weight decay (L2 penalty) (default: 0)
+    amsgrad : bool (optional)
+        whether to use the AMSGrad variant of this
+        algorithm from the paper `On the Convergence of Adam and Beyond`_
+        (default: False)
+    Other Parameters
+    ----------------
+    stabilize : int
+        Stabilize parameters if they are off-manifold due to numerical
+        reasons every ``stabilize`` steps (default: ``None`` -- no stabilize)
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments
+        ---------
+        closure : callable (optional)
+            A closure that reevaluates the model
+            and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        with torch.no_grad():
+            for group in self.param_groups:
+                if "step" not in group:
+                    group["step"] = 0
+                betas = group["betas"]
+                weight_decay = group["weight_decay"]
+                eps = group["eps"]
+                learning_rate = group["lr"]
+                amsgrad = group["amsgrad"]
+                for point in group["params"]:
+                    grad = point.grad
+                    if grad is None:
+                        continue
+
+                    if isinstance(point, (ManifoldParameter)):
+                        manifold = point.manifold
+                        c = point.c
+                    else:
+                        manifold = _default_manifold
+                        c = None
+                    if grad.is_sparse:
+                        raise RuntimeError(
+                                "Riemannian Adam does not support sparse gradients yet (PR is welcome)"
+                        )
+
+                    state = self.state[point]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state["step"] = 0
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(point)
+                        # Exponential moving average of squared gradient values
+                        state["exp_avg_sq"] = torch.zeros_like(point)
+                        if amsgrad:
+                            # Maintains max of all exp. moving avg. of sq. grad. values
+                            state["max_exp_avg_sq"] = torch.zeros_like(point)
+                    # make local variables for easy access
+                    exp_avg = state["exp_avg"]
+                    exp_avg_sq = state["exp_avg_sq"]
+                    # actual step
+                    grad.add_(weight_decay, point)
+                    grad = manifold.egrad2rgrad(point, grad, c)
+
+                    exp_avg.mul_(betas[0]).add_(1 - betas[0], grad)
+                    exp_avg_sq.mul_(betas[1]).add_(
+                            1 - betas[1], manifold.inner(point, c, grad, keepdim=True)
+                    )
+                    if amsgrad:
+                        max_exp_avg_sq = state["max_exp_avg_sq"]
+                        # Maintains the maximum of all 2nd moment running avg. till now
+                        torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                        # Use the max. for normalizing running avg. of gradient
+                        denom = max_exp_avg_sq.sqrt().add_(eps)
+                    else:
+                        denom = exp_avg_sq.sqrt().add_(eps)
+                    group["step"] += 1
+                    bias_correction1 = 1 - betas[0] ** group["step"]
+                    bias_correction2 = 1 - betas[1] ** group["step"]
+                    step_size = (
+                        learning_rate * bias_correction2 ** 0.5 / bias_correction1
+                    )
+
+                    # copy the state, we need it for retraction
+                    # get the direction for ascend
+                    direction = exp_avg / denom
+                    # transport the exponential averaging to the new point
+                    new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c)
+                    exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c)
+                    # use copy only for user facing point
+                    copy_or_set_(point, new_point)
+                    exp_avg.set_(exp_avg_new)
+
+                    group["step"] += 1
+                if self._stabilize is not None and group["step"] % self._stabilize == 0:
+                    self.stabilize_group(group)
+        return loss
+
+    @torch.no_grad()
+    def stabilize_group(self, group):
+        for p in group["params"]:
+            if not isinstance(p, ManifoldParameter):
+                continue
+            state = self.state[p]
+            if not state:  # due to None grads
+                continue
+            manifold = p.manifold
+            c = p.c
+            exp_avg = state["exp_avg"]
+            copy_or_set_(p, manifold.proj(p, c))
+            exp_avg.set_(manifold.proj_tan(exp_avg, u, c))
diff --git a/HGCAE/utils/__init__.py b/HGCAE/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCAE/utils/data_utils.py b/HGCAE/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8739c8a56bc1e8bf6b7dad1e98e88ccce79a28d7
--- /dev/null
+++ b/HGCAE/utils/data_utils.py
@@ -0,0 +1,134 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+
+from scipy import sparse
+import logging
+
+import pandas as pd
+
+def process_data(args, adj , features, labels):
+    ## Load data
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels}
+    adj = data['adj_train']
+
+    ## TAKES a lot of time
+
+    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_edges(
+                adj, args.val_prop, args.test_prop, args.seed
+        )
+
+    ## TAKES a lot of time
+    data['adj_train'] = adj_train
+    data['train_edges'], data['train_edges_false'] = train_edges, train_edges_false
+    if args.val_prop + args.test_prop > 0:
+        data['val_edges'], data['val_edges_false'] = val_edges, val_edges_false
+        data['test_edges'], data['test_edges_false'] = test_edges, test_edges_false
+    all_info=""
+
+    ## Adj matrix
+    adj = data['adj_train']
+    data['adj_train_enc'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats
+    )
+
+    if args.lambda_rec:
+        data['adj_train_dec'] = rowwise_normalizing(data['adj_train'])
+
+    adj_2hop = get_adj_2hop(adj)
+    data['adj_train_enc_2hop'] = symmetric_laplacian_smoothing(adj_2hop)
+
+    # NOTE : Re-adjust labels
+    # Some data omit `0` class, thus n_classes are wrong with `max(labels)+1`
+    args.n_classes = int(data['labels'].max() + 1)
+
+    data['idx_all'] =  range(data['features'].shape[0])
+    data_info = "Dataset {} Loaded : dimensions are adj:{}, edges:{}, features:{}, labels:{}\n".format(
+            'ddos2019', data['adj_train'].shape, data['adj_train'].sum(), data['features'].shape, data['labels'].shape)
+    data['info'] = data_info
+    return data
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj + sp.eye(adj.shape[0]))
+    return adj, features
+
+def get_adj_2hop(adj):
+    adj_self = adj + sp.eye(adj.shape[0])
+    adj_2hop = adj_self.dot(adj_self)
+    adj_2hop.data = np.clip(adj_2hop.data, 0, 1)
+    adj_2hop = adj_2hop - sp.eye(adj.shape[0]) - adj
+    return adj_2hop
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+def symmetric_laplacian_smoothing(adj):
+    """Symmetrically normalize adjacency matrix."""
+    adj = adj + sp.eye(adj.shape[0])  # self-loop
+
+    adj = sp.coo_matrix(adj)
+    rowsum = np.array(adj.sum(1))
+    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
+    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
+    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
+    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
+
+def rowwise_normalizing(adj):
+    """Row-wise normalize adjacency matrix."""
+    adj = adj + sp.eye(adj.shape[0])  # self-loop
+    adj = sp.coo_matrix(adj)
+    rowsum = np.array(adj.sum(1))
+    d_inv = np.power(rowsum, -1.0).flatten()
+    d_inv[np.isinf(d_inv)] = 0.
+    d_mat_inv = sp.diags(d_inv)
+    return adj.dot(d_mat_inv).transpose().tocoo()
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+def mask_edges(adj, val_prop, test_prop, seed):
+    np.random.seed(seed)  # get tp edges
+    x, y = sp.triu(adj).nonzero()
+    pos_edges = np.array(list(zip(x, y)))
+    np.random.shuffle(pos_edges)
+    # get tn edges
+    x, y = sp.triu(sp.csr_matrix(1. - adj.toarray())).nonzero()   #  LONG
+    neg_edges = np.array(list(zip(x, y)))   #  EVEN LONGER
+    np.random.shuffle(neg_edges)  # ALSO LONG
+
+    m_pos = len(pos_edges)
+    n_val = int(m_pos * val_prop)
+    n_test = int(m_pos * test_prop)
+    val_edges, test_edges, train_edges = pos_edges[:n_val], pos_edges[n_val:n_test + n_val], pos_edges[n_test + n_val:]
+    val_edges_false, test_edges_false = neg_edges[:n_val], neg_edges[n_val:n_test + n_val]
+    train_edges_false = np.concatenate([neg_edges, val_edges, test_edges], axis=0)
+    adj_train = sp.csr_matrix((np.ones(train_edges.shape[0]), (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
+    adj_train = adj_train + adj_train.T
+    return adj_train, torch.LongTensor(train_edges), torch.LongTensor(train_edges_false), torch.LongTensor(val_edges), \
+           torch.LongTensor(val_edges_false), torch.LongTensor(test_edges), torch.LongTensor(
+            test_edges_false)  
diff --git a/HGCAE/utils/eval_utils.py b/HGCAE/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2793a673e8d9a19d78be82733c652e568cec985
--- /dev/null
+++ b/HGCAE/utils/eval_utils.py
@@ -0,0 +1,11 @@
+from sklearn.metrics import average_precision_score, accuracy_score, f1_score
+
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels,preds)
+    f1 = f1_score(labels,preds, average=average)
+    return accuracy, f1
+
diff --git a/HGCAE/utils/math_utils.py b/HGCAE/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..56a0de2552fcc9ef35e0e933904f1b391d63f3ec
--- /dev/null
+++ b/HGCAE/utils/math_utils.py
@@ -0,0 +1,70 @@
+'''
+Code from HGCN (https://github.com/HazyResearch/hgcn/blob/master/utils/math_utils.py)
+'''
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-15, 1 - 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1 + 1e-7)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
diff --git a/HGCAE/utils/train_utils.py b/HGCAE/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d207f71cd919d8f446147eafcf48bdd4f91e141
--- /dev/null
+++ b/HGCAE/utils/train_utils.py
@@ -0,0 +1,199 @@
+import os
+import sys
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.8f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+def get_dir_name(models_dir):
+    """Gets a directory to save the model.
+
+    If the directory already exists, then append a new integer to the end of
+    it. This method is useful so that we don't overwrite existing models
+    when launching new jobs.
+
+    Args:
+        models_dir: The directory where all the models are.
+
+    Returns:
+        The name of a new directory to save the training logs and model weights.
+    """
+    if not os.path.exists(models_dir):
+        save_dir = os.path.join(models_dir, '0')
+        os.makedirs(save_dir)
+    else:
+        existing_dirs = np.array(
+                [
+                    d
+                    for d in os.listdir(models_dir)
+                    if os.path.isdir(os.path.join(models_dir, d))
+                    ]
+        ).astype(np.int)
+        if len(existing_dirs) > 0:
+            dir_id = str(existing_dirs.max() + 1)
+        else:
+            dir_id = "1"
+        save_dir = os.path.join(models_dir, dir_id)
+        os.makedirs(save_dir)
+    return save_dir
+
+
+def add_flags_from_config(parser, config_dict):
+    """
+    Adds a flag (and default value) to an ArgumentParser for each parameter in a config
+    """
+
+    def OrNone(default):
+        def func(x):
+            # Convert "none" to proper None object
+            if x.lower() == "none":
+                return None
+            # If default is None (and x is not None), return x without conversion as str
+            elif default is None:
+                return str(x)
+            # Otherwise, default has non-None type; convert x to that type
+            else:
+                return type(default)(x)
+
+        return func
+
+    for param in config_dict:
+        default, description = config_dict[param]
+        try:
+            if isinstance(default, dict):
+                parser = add_flags_from_config(parser, default)
+            elif isinstance(default, list):
+                if len(default) > 0:
+                    # pass a list as argument
+                    parser.add_argument(
+                            f"--{param}",
+                            action="append",
+                            type=type(default[0]),
+                            default=default,
+                            help=description
+                    )
+                else:
+                    pass
+                    parser.add_argument(f"--{param}", action="append", default=default, help=description)
+            else:
+                pass
+                parser.add_argument(f"--{param}", type=OrNone(default), default=default, help=description)
+        except argparse.ArgumentError:
+            print(
+                f"Could not add flag for param {param} because it was already present."
+            )
+    return parser
+
+
+
+import subprocess
+def check_gpustats(columns=None):
+    query = r'nvidia-smi --query-gpu=%s --format=csv,noheader' % ','.join(columns)
+    smi_output = subprocess.check_output(query, shell=True).decode().strip()
+
+    gpustats = []
+    for line in smi_output.split('\n'):
+        if not line:
+            continue
+        gpustat = line.split(',')
+        gpustats.append({k: v.strip() for k, v in zip(columns, gpustat)})
+
+    return gpustats
+
+
+def assign_gpus(num_gpu, memory_threshold=1000):    # (MiB)
+    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+
+    columns = ['index', 'memory.used']
+    gpustats = {i['index']: i['memory.used'] for i in check_gpustats(columns)}
+
+
+
+    available_gpus = []
+    for gpu in sorted(gpustats.keys()):
+        if int(gpustats.get(gpu).split(' ')[0]) < memory_threshold:
+            available_gpus.append(gpu)
+
+    if len(available_gpus) < num_gpu:
+        raise MemoryError('{} GPUs requested, but only {} available'.format(num_gpu, len(available_gpus)))
+
+    gpus_to_assign = available_gpus[:num_gpu]
+    # os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(gpus_to_assign)
+    return gpus_to_assign
+
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--hidden_dim', type=int, default=args[1])
+    parser.add_argument('--c', type=int, default=args[2])
+    parser.add_argument('--num_layers', type=int, default=args[3])
+    parser.add_argument('--bias', type=bool, default=args[4])
+    parser.add_argument('--act', type=str, default=args[5])
+    parser.add_argument('--grad_clip', type=float, default=args[6])
+    parser.add_argument('--optimizer', type=str, default=args[7])
+    parser.add_argument('--weight_decay', type=float, default=args[8])
+    parser.add_argument('--lr', type=float, default=args[9])
+    parser.add_argument('--gamma', type=float, default=args[10])
+    parser.add_argument('--lr_reduce_freq', type=int, default=args[11])
+    parser.add_argument('--cuda', type=int, default=args[12])
+    parser.add_argument('--epochs', type=int, default=args[13])
+    parser.add_argument('--min_epochs', type=int, default=args[14])
+    parser.add_argument('--patience', type=int, default=args[15])
+    parser.add_argument('--seed', type=int, default=args[16])
+    parser.add_argument('--log_freq', type=int, default=args[17])
+    parser.add_argument('--eval_freq', type=int, default=args[18])
+    parser.add_argument('--val_prop', type=float, default=args[19])
+    parser.add_argument('--test_prop', type=float, default=args[20])
+    parser.add_argument('--double_precision', type=int, default=args[21])
+    parser.add_argument('--dropout', type=float, default=args[22])
+    parser.add_argument('--lambda_rec', type=float, default=args[23])
+    parser.add_argument('--lambda_lp', type=float, default=args[24])
+    parser.add_argument('--num_dec_layers', type=int, default=args[25])
+    parser.add_argument('--use_att', type=bool, default=args[26])
+    parser.add_argument('--att_type', type=str, default=args[27])
+    parser.add_argument('--att_logit', type=str, default=args[28])
+    parser.add_argument('--beta', type=float, default=args[29])
+    parser.add_argument('--classifier', type=str, default=args[30])
+    parser.add_argument('--clusterer', type=str, default=args[31])
+    parser.add_argument('--normalize_adj', type=bool, default=args[32])
+    parser.add_argument('--normalize_feats', type=bool, default=args[33])
+    flags, unknown = parser.parse_known_args()
+    return flags
+
+
+
+from Ghypeddings.classifiers import *
+def perform_task(args,X,y):
+    if(args.classifier and args.clusterer):
+        print('You have to chose one of them!')
+        sys.exit(1)
+    elif(args.classifier):
+        if(args.classifier == 'svm'):
+            return SVM(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'mlp'):
+            return mlp(X,y,1,10)
+        elif(args.classifier == 'decision tree'):
+            return decision_tree(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'random forest'):
+            return random_forest(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'adaboost'):
+            return adaboost(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'knn'):
+            return KNN(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'naive bayes'):
+            return naive_bayes(X,y,args.test_prop,args.seed)
+        else:
+            raise NotImplementedError
+    elif(args.clusterer):
+        pass
+    else:
+        return 99,99,99,99,99
\ No newline at end of file
diff --git a/HGCN/.gitignore b/HGCN/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c
--- /dev/null
+++ b/HGCN/.gitignore
@@ -0,0 +1 @@
+__pycache__/
\ No newline at end of file
diff --git a/HGCN/__init__.py b/HGCN/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9
--- /dev/null
+++ b/HGCN/__init__.py
@@ -0,0 +1,2 @@
+from __future__ import print_function
+from __future__ import division
diff --git a/HGCN/hgcn.py b/HGCN/hgcn.py
new file mode 100644
index 0000000000000000000000000000000000000000..81773945e3b9dfb04fc84d9fec7acb0b0db62b88
--- /dev/null
+++ b/HGCN/hgcn.py
@@ -0,0 +1,163 @@
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import time
+
+import numpy as np
+import Ghypeddings.HGCN.optimizers as optimizers
+import torch
+from Ghypeddings.HGCN.models.base_models import NCModel
+from Ghypeddings.HGCN.utils.data_utils import process_data
+from Ghypeddings.HGCN.utils.train_utils import format_metrics
+from Ghypeddings.HGCN.utils.train_utils import create_args
+import warnings
+warnings.filterwarnings('ignore')
+
+
+class HGCN:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='leaky_relu',
+                select_manifold='Hyperboloid',
+                grad_clip=None,
+                optimizer='RiemannianAdam',
+                weight_decay=0.01,
+                lr=.1,
+                gamma=0.5,
+                lr_reduce_freq=200,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=0,
+                eval_freq=1,
+                val_prop=.3,
+                test_prop=0.3,
+                double_precision=0,
+                dropout=0.1,
+                use_att= True,
+                alpha=0.2,
+                local_agg = False,
+                normalize_adj=False,
+                normalize_feats=True
+                ):
+        
+        self.args = create_args(dim,c,num_layers,bias,act,select_manifold,grad_clip,optimizer,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,use_att,alpha,local_agg,normalize_adj,normalize_feats)
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        np.random.seed(self.args.seed)
+        torch.manual_seed(self.args.seed)
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+        if not self.args.lr_reduce_freq:
+            self.args.lr_reduce_freq = self.args.epochs
+        self.model = NCModel(self.args)
+        self.optimizer = getattr(optimizers, self.args.optimizer)(params=self.model.parameters(), lr=self.args.lr,weight_decay=self.args.weight_decay)
+        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
+            self.optimizer,
+            step_size=int(self.args.lr_reduce_freq),
+            gamma=float(self.args.gamma)
+        )
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+        self.best_emb = None
+
+    def fit(self):
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(f'Using: {self.args.device}')
+        logging.info(str(self.model))
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        real_losses = []
+
+        for epoch in range(self.args.epochs):
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm'])
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.lr_scheduler.step()
+
+            real_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(real_losses[0])
+            elif (best_losses[-1] > real_losses[-1]):
+                best_losses.append(real_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                    'lr: {}'.format(self.lr_scheduler.get_lr()[0]),
+                                    format_metrics(train_metrics, 'train'),
+                                    'time: {:.4f}s'.format(time.time() - t)
+                                    ]))
+                
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm'])
+                val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                if self.model.has_improved(best_val_metrics, val_metrics):
+                    self.best_emb = embeddings
+                    best_val_metrics = val_metrics
+                    counter = 0
+                else:
+                    counter += 1
+                    if counter == self.args.patience and epoch > self.args.min_epochs:
+                        logging.info("Early stopping")
+                        break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+        return {'real':real_losses,'best':best_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
+
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm'])
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
+
+    def save_embeddings(self):
+        c = self.model.decoder.c
+        tb_embeddings_euc = self.manifold.proj_tan0(self.model.manifold.logmap0(self.best_emb,c),c)
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'hgcn_embeddings_hyp.csv')
+        euc_file_path = os.path.join(os.getcwd(),'hgcn_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/HGCN/layers/__init__.py b/HGCN/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCN/layers/att_layers.py b/HGCN/layers/att_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..8414d8d48dffa4dca79e38ebeacc54f480b4def1
--- /dev/null
+++ b/HGCN/layers/att_layers.py
@@ -0,0 +1,144 @@
+"""Attention layers (some modules are copied from https://github.com/Diego999/pyGAT."""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class DenseAtt(nn.Module):
+    def __init__(self, in_features, dropout):
+        super(DenseAtt, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(2 * in_features, 1, bias=True)
+        self.in_features = in_features
+
+    def forward (self, x, adj):
+        n = x.size(0)
+        # n x 1 x d
+        x_left = torch.unsqueeze(x, 1)
+        x_left = x_left.expand(-1, n, -1)
+        # 1 x n x d
+        x_right = torch.unsqueeze(x, 0)
+        x_right = x_right.expand(n, -1, -1)
+
+        x_cat = torch.cat((x_left, x_right), dim=2)
+        att_adj = self.linear(x_cat).squeeze()
+        att_adj = F.sigmoid(att_adj)
+        att_adj = torch.mul(adj.to_dense(), att_adj)
+        return att_adj
+
+
+class SpecialSpmmFunction(torch.autograd.Function):
+    """Special function for only sparse region backpropataion layer."""
+
+    @staticmethod
+    def forward(ctx, indices, values, shape, b):
+        assert indices.requires_grad == False
+        a = torch.sparse_coo_tensor(indices, values, shape)
+        ctx.save_for_backward(a, b)
+        ctx.N = shape[0]
+        return torch.matmul(a, b)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        a, b = ctx.saved_tensors
+        grad_values = grad_b = None
+        if ctx.needs_input_grad[1]:
+            grad_a_dense = grad_output.matmul(b.t())
+            edge_idx = a._indices()[0, :] * ctx.N + a._indices()[1, :]
+            grad_values = grad_a_dense.view(-1)[edge_idx]
+        if ctx.needs_input_grad[3]:
+            grad_b = a.t().matmul(grad_output)
+        return None, grad_values, None, grad_b
+
+
+class SpecialSpmm(nn.Module):
+    def forward(self, indices, values, shape, b):
+        return SpecialSpmmFunction.apply(indices, values, shape, b)
+
+
+class SpGraphAttentionLayer(nn.Module):
+    """
+    Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
+    """
+
+    def __init__(self, in_features, out_features, dropout, alpha, activation):
+        super(SpGraphAttentionLayer, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.alpha = alpha
+
+        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
+        nn.init.xavier_normal_(self.W.data, gain=1.414)
+
+        self.a = nn.Parameter(torch.zeros(size=(1, 2 * out_features)))
+        nn.init.xavier_normal_(self.a.data, gain=1.414)
+
+        self.dropout = nn.Dropout(dropout)
+        self.leakyrelu = nn.LeakyReLU(self.alpha)
+        self.special_spmm = SpecialSpmm()
+        self.act = activation
+
+    def forward(self, input, adj):
+        N = input.size()[0]
+        edge = adj._indices()
+
+        h = torch.mm(input, self.W)
+        # h: N x out
+        assert not torch.isnan(h).any()
+
+        # Self-attention on the nodes - Shared attention mechanism
+        edge_h = torch.cat((h[edge[0, :], :], h[edge[1, :], :]), dim=1).t()
+        # edge: 2*D x E
+
+        edge_e = torch.exp(-self.leakyrelu(self.a.mm(edge_h).squeeze()))
+        assert not torch.isnan(edge_e).any()
+        # edge_e: E
+
+        ones = torch.ones(size=(N, 1))
+        if h.is_cuda:
+            ones = ones.cuda()
+        e_rowsum = self.special_spmm(edge, edge_e, torch.Size([N, N]), ones)
+        # e_rowsum: N x 1
+
+        edge_e = self.dropout(edge_e)
+        # edge_e: E
+
+        h_prime = self.special_spmm(edge, edge_e, torch.Size([N, N]), h)
+        assert not torch.isnan(h_prime).any()
+        # h_prime: N x out
+
+        h_prime = h_prime.div(e_rowsum)
+        # h_prime: N x out
+        assert not torch.isnan(h_prime).any()
+        return self.act(h_prime)
+
+    def __repr__(self):
+        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
+
+
+class GraphAttentionLayer(nn.Module):
+    def __init__(self, input_dim, output_dim, dropout, activation, alpha, nheads, concat):
+        """Sparse version of GAT."""
+        super(GraphAttentionLayer, self).__init__()
+        self.dropout = dropout
+        self.output_dim = output_dim
+        self.attentions = [SpGraphAttentionLayer(input_dim,
+                                                 output_dim,
+                                                 dropout=dropout,
+                                                 alpha=alpha,
+                                                 activation=activation) for _ in range(nheads)]
+        self.concat = concat
+        for i, attention in enumerate(self.attentions):
+            self.add_module('attention_{}'.format(i), attention)
+
+    def forward(self, input):
+        x, adj = input
+        x = F.dropout(x, self.dropout, training=self.training)
+        if self.concat:
+            h = torch.cat([att(x, adj) for att in self.attentions], dim=1)
+        else:
+            h_cat = torch.cat([att(x, adj).view((-1, self.output_dim, 1)) for att in self.attentions], dim=2)
+            h = torch.mean(h_cat, dim=2)
+        h = F.dropout(h, self.dropout, training=self.training)
+        return (h, adj)
diff --git a/HGCN/layers/hyp_layers.py b/HGCN/layers/hyp_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..0913411c986dbd2b70f2f8e8a5ce216e816cb2be
--- /dev/null
+++ b/HGCN/layers/hyp_layers.py
@@ -0,0 +1,158 @@
+"""Hyperbolic layers."""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from torch.nn.modules.module import Module
+
+from Ghypeddings.HGCN.layers.att_layers import DenseAtt
+
+
+def get_dim_act_curv(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+    dims = [args.feat_dim] + ([args.dim] * (args.num_layers - 1))
+    n_curvatures = args.num_layers - 1
+    if args.c is None:
+        # create list of trainable curvature parameters
+        curvatures = [nn.Parameter(torch.Tensor([1.])) for _ in range(n_curvatures)]
+    else:
+        # fixed curvature
+        curvatures = [torch.tensor([args.c]) for _ in range(n_curvatures)]
+        if not args.cuda == -1:
+            curvatures = [curv.to(args.device) for curv in curvatures]
+    return dims, acts, curvatures
+
+
+class HyperbolicGraphConvolution(nn.Module):
+    """
+    Hyperbolic graph convolution layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias, use_att, local_agg):
+        super(HyperbolicGraphConvolution, self).__init__()
+        self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias)
+        self.agg = HypAgg(manifold, c_in, out_features, dropout, use_att, local_agg)
+        self.hyp_act = HypAct(manifold, c_in, c_out, act)
+
+    def forward(self, input):
+        x, adj = input
+        h = self.linear.forward(x)
+        h = self.agg.forward(h, adj)
+        h = self.hyp_act.forward(h)
+        output = h, adj
+        return output
+
+
+class HypLinear(nn.Module):
+    """
+    Hyperbolic linear layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c, dropout, use_bias):
+        super(HypLinear, self).__init__()
+        self.manifold = manifold
+        self.in_features = in_features
+        self.out_features = out_features
+        self.c = c
+        self.dropout = dropout
+        self.use_bias = use_bias
+        self.bias = nn.Parameter(torch.Tensor(out_features))
+        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        init.xavier_uniform_(self.weight, gain=math.sqrt(2))
+        init.constant_(self.bias, 0)
+
+    def forward(self, x):
+        drop_weight = F.dropout(self.weight, self.dropout, training=self.training)
+        mv = self.manifold.mobius_matvec(drop_weight, x, self.c)
+        res = self.manifold.proj(mv, self.c)
+        if self.use_bias:
+            bias = self.manifold.proj_tan0(self.bias.view(1, -1), self.c)
+            hyp_bias = self.manifold.expmap0(bias, self.c)
+            hyp_bias = self.manifold.proj(hyp_bias, self.c)
+            res = self.manifold.mobius_add(res, hyp_bias, c=self.c)
+            res = self.manifold.proj(res, self.c)
+        return res
+
+    def extra_repr(self):
+        return 'in_features={}, out_features={}, c={}'.format(
+            self.in_features, self.out_features, self.c
+        )
+
+
+class HypAgg(Module):
+    """
+    Hyperbolic aggregation layer.
+    """
+
+    def __init__(self, manifold, c, in_features, dropout, use_att, local_agg):
+        super(HypAgg, self).__init__()
+        self.manifold = manifold
+        self.c = c
+
+        self.in_features = in_features
+        self.dropout = dropout
+        self.local_agg = local_agg
+        self.use_att = use_att
+        if self.use_att:
+            self.att = DenseAtt(in_features, dropout)
+
+    def forward(self, x, adj):
+        x_tangent = self.manifold.logmap0(x, c=self.c)
+        if self.use_att:
+            if self.local_agg:
+                x_local_tangent = []
+                for i in range(x.size(0)):
+                    x_local_tangent.append(self.manifold.logmap(x[i], x, c=self.c))
+                x_local_tangent = torch.stack(x_local_tangent, dim=0)
+                adj_att = self.att(x_tangent, adj)
+                att_rep = adj_att.unsqueeze(-1) * x_local_tangent
+                support_t = torch.sum(adj_att.unsqueeze(-1) * x_local_tangent, dim=1)
+                output = self.manifold.proj(self.manifold.expmap(x, support_t, c=self.c), c=self.c)
+                return output
+            else:
+                adj_att = self.att(x_tangent, adj)
+                support_t = torch.matmul(adj_att, x_tangent)
+        else:
+            support_t = torch.spmm(adj, x_tangent)
+        output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+        return output
+
+    def extra_repr(self):
+        return 'c={}'.format(self.c)
+
+
+class HypAct(Module):
+    """
+    Hyperbolic activation layer.
+    """
+
+    def __init__(self, manifold, c_in, c_out, act):
+        super(HypAct, self).__init__()
+        self.manifold = manifold
+        self.c_in = c_in
+        self.c_out = c_out
+        self.act = act
+
+    def forward(self, x):
+        xt = self.act(self.manifold.logmap0(x, c=self.c_in))
+        xt = self.manifold.proj_tan0(xt, c=self.c_out)
+        return self.manifold.proj(self.manifold.expmap0(xt, c=self.c_out), c=self.c_out)
+
+    def extra_repr(self):
+        return 'c_in={}, c_out={}'.format(
+            self.c_in, self.c_out
+        )
diff --git a/HGCN/layers/layers.py b/HGCN/layers/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7682cfd33b7a7dcd723558c4722e93a92ff4510
--- /dev/null
+++ b/HGCN/layers/layers.py
@@ -0,0 +1,26 @@
+"""Euclidean layers."""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
+    
\ No newline at end of file
diff --git a/HGCN/manifolds/__init__.py b/HGCN/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd4b8d81f23de1d855c70804d1e1fb9441cdc960
--- /dev/null
+++ b/HGCN/manifolds/__init__.py
@@ -0,0 +1,4 @@
+from Ghypeddings.HGCN.manifolds.base import ManifoldParameter
+from Ghypeddings.HGCN.manifolds.hyperboloid import Hyperboloid
+from Ghypeddings.HGCN.manifolds.euclidean import Euclidean
+from Ghypeddings.HGCN.manifolds.poincare import PoincareBall
diff --git a/HGCN/manifolds/base.py b/HGCN/manifolds/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..925d4a6b2a59dae47a3a8ca33a7dcdcb20e0f08e
--- /dev/null
+++ b/HGCN/manifolds/base.py
@@ -0,0 +1,88 @@
+"""Base manifold."""
+
+from torch.nn import Parameter
+
+
+class Manifold(object):
+    """
+    Abstract class to define operations on a manifold.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.eps = 10e-8
+
+    def sqdist(self, p1, p2, c):
+        """Squared distance between pairs of points."""
+        raise NotImplementedError
+
+    def egrad2rgrad(self, p, dp, c):
+        """Converts Euclidean Gradient to Riemannian Gradients."""
+        raise NotImplementedError
+
+    def proj(self, p, c):
+        """Projects point p on the manifold."""
+        raise NotImplementedError
+
+    def proj_tan(self, u, p, c):
+        """Projects u on the tangent space of p."""
+        raise NotImplementedError
+
+    def proj_tan0(self, u, c):
+        """Projects u on the tangent space of the origin."""
+        raise NotImplementedError
+
+    def expmap(self, u, p, c):
+        """Exponential map of u at point p."""
+        raise NotImplementedError
+
+    def logmap(self, p1, p2, c):
+        """Logarithmic map of point p1 at point p2."""
+        raise NotImplementedError
+
+    def expmap0(self, u, c):
+        """Exponential map of u at the origin."""
+        raise NotImplementedError
+
+    def logmap0(self, p, c):
+        """Logarithmic map of point p at the origin."""
+        raise NotImplementedError
+
+    def mobius_add(self, x, y, c, dim=-1):
+        """Adds points x and y."""
+        raise NotImplementedError
+
+    def mobius_matvec(self, m, x, c):
+        """Performs hyperboic martrix-vector multiplication."""
+        raise NotImplementedError
+
+    def init_weights(self, w, c, irange=1e-5):
+        """Initializes random weigths on the manifold."""
+        raise NotImplementedError
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        """Inner product for tangent vectors at point x."""
+        raise NotImplementedError
+
+    def ptransp(self, x, y, u, c):
+        """Parallel transport of u from x to y."""
+        raise NotImplementedError
+
+    def ptransp0(self, x, u, c):
+        """Parallel transport of u from the origin to y."""
+        raise NotImplementedError
+
+
+class ManifoldParameter(Parameter):
+    """
+    Subclass of torch.nn.Parameter for Riemannian optimization.
+    """
+    def __new__(cls, data, requires_grad, manifold, c):
+        return Parameter.__new__(cls, data, requires_grad)
+
+    def __init__(self, data, requires_grad, manifold, c):
+        self.c = c
+        self.manifold = manifold
+
+    def __repr__(self):
+        return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__()
diff --git a/HGCN/manifolds/euclidean.py b/HGCN/manifolds/euclidean.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ec5e38b7ff2c01ef8fc33337d26a08dd9d3cfa9
--- /dev/null
+++ b/HGCN/manifolds/euclidean.py
@@ -0,0 +1,67 @@
+"""Euclidean manifold."""
+
+from Ghypeddings.HGCN.manifolds.base import Manifold
+
+
+class Euclidean(Manifold):
+    """
+    Euclidean Manifold class.
+    """
+
+    def __init__(self):
+        super(Euclidean, self).__init__()
+        self.name = 'Euclidean'
+
+    def normalize(self, p):
+        dim = p.size(-1)
+        p.view(-1, dim).renorm_(2, 0, 1.)
+        return p
+
+    def sqdist(self, p1, p2, c):
+        return (p1 - p2).pow(2).sum(dim=-1)
+
+    def egrad2rgrad(self, p, dp, c):
+        return dp
+
+    def proj(self, p, c):
+        return p
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        return p + u
+
+    def logmap(self, p1, p2, c):
+        return p2 - p1
+
+    def expmap0(self, u, c):
+        return u
+
+    def logmap0(self, p, c):
+        return p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        return x + y
+
+    def mobius_matvec(self, m, x, c):
+        mx = x @ m.transpose(-1, -2)
+        return mx
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        return (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, v, c):
+        return v
+
+    def ptransp0(self, x, v, c):
+        return x + v
diff --git a/HGCN/manifolds/hyperboloid.py b/HGCN/manifolds/hyperboloid.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0147001c24330e86264bbb009ff2a6a2c8986e0
--- /dev/null
+++ b/HGCN/manifolds/hyperboloid.py
@@ -0,0 +1,155 @@
+"""Hyperboloid manifold."""
+
+import torch
+
+from Ghypeddings.HGCN.manifolds.base import Manifold
+from Ghypeddings.HGCN.utils.math_utils import arcosh, cosh, sinh 
+
+
+class Hyperboloid(Manifold):
+    """
+    Hyperboloid manifold class.
+
+    We use the following convention: -x0^2 + x1^2 + ... + xd^2 = -K
+
+    c = 1 / K is the hyperbolic curvature. 
+    """
+
+    def __init__(self):
+        super(Hyperboloid, self).__init__()
+        self.name = 'Hyperboloid'
+        self.eps = {torch.float32: 1e-7, torch.float64: 1e-15}
+        self.min_norm = 1e-15
+        self.max_norm = 1e6
+
+    def minkowski_dot(self, x, y, keepdim=True):
+        res = torch.sum(x * y, dim=-1) - 2 * x[..., 0] * y[..., 0]
+        if keepdim:
+            res = res.view(res.shape + (1,))
+        return res
+
+    def minkowski_norm(self, u, keepdim=True):
+        dot = self.minkowski_dot(u, u, keepdim=keepdim)
+        return torch.sqrt(torch.clamp(dot, min=self.eps[u.dtype]))
+
+    def sqdist(self, x, y, c):
+        K = 1. / c
+        prod = self.minkowski_dot(x, y)
+        theta = torch.clamp(-prod / K, min=1.0 + self.eps[x.dtype])
+        sqdist = K * arcosh(theta) ** 2
+        # clamp distance to avoid nans in Fermi-Dirac decoder
+        return torch.clamp(sqdist, max=50.0)
+
+    def proj(self, x, c):
+        K = 1. / c
+        d = x.size(-1) - 1
+        y = x.narrow(-1, 1, d)
+        y_sqnorm = torch.norm(y, p=2, dim=1, keepdim=True) ** 2 
+        mask = torch.ones_like(x)
+        mask[:, 0] = 0
+        vals = torch.zeros_like(x)
+        vals[:, 0:1] = torch.sqrt(torch.clamp(K + y_sqnorm, min=self.eps[x.dtype]))
+        return vals + mask * x
+
+    def proj_tan(self, u, x, c):
+        K = 1. / c
+        d = x.size(-1) - 1
+        ux = torch.sum(x.narrow(-1, 1, d) * u.narrow(-1, 1, d), dim=1, keepdim=True)
+        mask = torch.ones_like(u)
+        mask[:, 0] = 0
+        vals = torch.zeros_like(u)
+        if(len(x.size()) == 1):
+            x = x.unsqueeze(0)
+        vals[:, 0:1] = ux / torch.clamp(x[:, 0:1], min=self.eps[x.dtype])
+        return vals + mask * u
+
+    def proj_tan0(self, u, c):
+        narrowed = u.narrow(-1, 0, 1)
+        vals = torch.zeros_like(u)
+        vals[:, 0:1] = narrowed
+        return u - vals
+
+    def expmap(self, u, x, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        normu = self.minkowski_norm(u)
+        normu = torch.clamp(normu, max=self.max_norm)
+        theta = normu / sqrtK
+        theta = torch.clamp(theta, min=self.min_norm)
+        result = cosh(theta) * x + sinh(theta) * u / theta
+        return self.proj(result, c)
+        
+    def logmap(self, x, y, c):
+        K = 1. / c
+        xy = torch.clamp(self.minkowski_dot(x, y) + K, max=-self.eps[x.dtype]) - K
+        u = y + xy * x * c
+        normu = self.minkowski_norm(u)
+        normu = torch.clamp(normu, min=self.min_norm)
+        dist = self.sqdist(x, y, c) ** 0.5
+        result = dist * u / normu
+        return self.proj_tan(result, x, c)
+
+    def expmap0(self, u, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        d = u.size(-1) - 1
+        x = u.narrow(-1, 1, d).view(-1, d)
+        x_norm = torch.norm(x, p=2, dim=1, keepdim=True)
+        x_norm = torch.clamp(x_norm, min=self.min_norm)
+        theta = x_norm / sqrtK
+        res = torch.ones_like(u)
+        res[:, 0:1] = sqrtK * cosh(theta)
+        res[:, 1:] = sqrtK * sinh(theta) * x / x_norm
+        return self.proj(res, c)
+
+    def logmap0(self, x, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        d = x.size(-1) - 1
+        y = x.narrow(-1, 1, d).view(-1, d)
+        y_norm = torch.norm(y, p=2, dim=1, keepdim=True)
+        y_norm = torch.clamp(y_norm, min=self.min_norm)
+        res = torch.zeros_like(x)
+        theta = torch.clamp(x[:, 0:1] / sqrtK, min=1.0 + self.eps[x.dtype])
+        res[:, 1:] = sqrtK * arcosh(theta) * y / y_norm
+        return res
+
+    def mobius_add(self, x, y, c):
+        u = self.logmap0(y, c)
+        v = self.ptransp0(x, u, c)
+        return self.expmap(v, x, c)
+
+    def mobius_matvec(self, m, x, c):
+        u = self.logmap0(x, c)
+        mu = u @ m.transpose(-1, -2)
+        return self.expmap0(mu, c)
+
+    def ptransp(self, x, y, u, c):
+        logxy = self.logmap(x, y, c)
+        logyx = self.logmap(y, x, c)
+        sqdist = torch.clamp(self.sqdist(x, y, c), min=self.min_norm)
+        alpha = self.minkowski_dot(logxy, u) / sqdist
+        res = u - alpha * (logxy + logyx)
+        return self.proj_tan(res, y, c)
+
+    def ptransp0(self, x, u, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        x0 = x.narrow(-1, 0, 1)
+        d = x.size(-1) - 1
+        y = x.narrow(-1, 1, d)
+        y_norm = torch.clamp(torch.norm(y, p=2, dim=1, keepdim=True), min=self.min_norm)
+        y_normalized = y / y_norm
+        v = torch.ones_like(x)
+        v[:, 0:1] = - y_norm 
+        v[:, 1:] = (sqrtK - x0) * y_normalized
+        alpha = torch.sum(y_normalized * u[:, 1:], dim=1, keepdim=True) / sqrtK
+        res = u - alpha * v
+        return self.proj_tan(res, x, c)
+
+    def to_poincare(self, x, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        d = x.size(-1) - 1
+        return sqrtK * x.narrow(-1, 1, d) / (x[:, 0:1] + sqrtK)
+
diff --git a/HGCN/manifolds/poincare.py b/HGCN/manifolds/poincare.py
new file mode 100644
index 0000000000000000000000000000000000000000..601b5808980bfbb3dcff40c5354f13a1ca37e67c
--- /dev/null
+++ b/HGCN/manifolds/poincare.py
@@ -0,0 +1,145 @@
+"""Poincare ball manifold."""
+
+import torch
+
+from Ghypeddings.HGCN.manifolds.base import Manifold
+from Ghypeddings.HGCN.utils.math_utils import artanh, tanh
+
+
+class PoincareBall(Manifold):
+    """
+    PoicareBall Manifold class.
+
+    We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c
+
+    Note that 1/sqrt(c) is the Poincare ball radius.
+
+    """
+
+    def __init__(self, ):
+        super(PoincareBall, self).__init__()
+        self.name = 'PoincareBall'
+        self.min_norm = 1e-15
+        self.eps = {torch.float32: 4e-3, torch.float64: 1e-5}
+
+    def sqdist(self, p1, p2, c):
+        sqrt_c = c ** 0.5
+        dist_c = artanh(
+            sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False)
+        )
+        dist = dist_c * 2 / sqrt_c
+        return dist ** 2
+
+    def _lambda_x(self, x, c):
+        x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True)
+        return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm)
+
+    def egrad2rgrad(self, p, dp, c):
+        lambda_p = self._lambda_x(p, c)
+        dp /= lambda_p.pow(2)
+        return dp
+
+    def proj(self, x, c):
+        norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm)
+        maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5)
+        cond = norm > maxnorm
+        projected = x / norm * maxnorm
+        return torch.where(cond, projected, x)
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        sqrt_c = c ** 0.5
+        u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        second_term = (
+                tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm)
+                * u
+                / (sqrt_c * u_norm)
+        )
+        gamma_1 = self.mobius_add(p, second_term, c)
+        return gamma_1
+
+    def logmap(self, p1, p2, c):
+        sub = self.mobius_add(-p1, p2, c)
+        sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        lam = self._lambda_x(p1, c)
+        sqrt_c = c ** 0.5
+        return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm
+
+    def expmap0(self, u, c):
+        sqrt_c = c ** 0.5
+        u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm)
+        gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
+        return gamma_1
+
+    def logmap0(self, p, c):
+        sqrt_c = c ** 0.5
+        p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm
+        return scale * p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        x2 = x.pow(2).sum(dim=dim, keepdim=True)
+        y2 = y.pow(2).sum(dim=dim, keepdim=True)
+        xy = (x * y).sum(dim=dim, keepdim=True)
+        num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y
+        denom = 1 + 2 * c * xy + c ** 2 * x2 * y2
+        return num / denom.clamp_min(self.min_norm)
+
+    def mobius_matvec(self, m, x, c):
+        sqrt_c = c ** 0.5
+        x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        mx = x @ m.transpose(-1, -2)
+        mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c)
+        cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8)
+        res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device)
+        res = torch.where(cond, res_0, res_c)
+        return res
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def _gyration(self, u, v, w, c, dim: int = -1):
+        u2 = u.pow(2).sum(dim=dim, keepdim=True)
+        v2 = v.pow(2).sum(dim=dim, keepdim=True)
+        uv = (u * v).sum(dim=dim, keepdim=True)
+        uw = (u * w).sum(dim=dim, keepdim=True)
+        vw = (v * w).sum(dim=dim, keepdim=True)
+        c2 = c ** 2
+        a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw
+        b = -c2 * vw * u2 - c * uw
+        d = 1 + 2 * c * uv + c2 * u2 * v2
+        return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm)
+
+    def inner(self, x, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        lambda_x = self._lambda_x(x, c)
+        return lambda_x ** 2 * (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def ptransp_(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def ptransp0(self, x, u, c):
+        lambda_x = self._lambda_x(x, c)
+        return 2 * u / lambda_x.clamp_min(self.min_norm)
+
+    def to_hyperboloid(self, x, c):
+        K = 1./ c
+        sqrtK = K ** 0.5
+        sqnorm = torch.norm(x, p=2, dim=1, keepdim=True) ** 2
+        return sqrtK * torch.cat([K + sqnorm, 2 * sqrtK * x], dim=1) / (K - sqnorm)
+
diff --git a/HGCN/models/__init__.py b/HGCN/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCN/models/base_models.py b/HGCN/models/base_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9acf7a144744da2739339a436c9212629053479
--- /dev/null
+++ b/HGCN/models/base_models.py
@@ -0,0 +1,85 @@
+"""Base model class."""
+
+import numpy as np
+from sklearn.metrics import roc_auc_score, average_precision_score
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.HGCN.manifolds as manifolds
+import Ghypeddings.HGCN.models.encoders as encoders
+from Ghypeddings.HGCN.models.decoders import model2decoder
+from Ghypeddings.HGCN.utils.eval_utils import acc_f1
+
+
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.manifold_name = args.select_manifold
+        if args.c is not None:
+            self.c = torch.tensor([args.c])
+            if not args.cuda == -1:
+                self.c = self.c.to(args.device)
+        else:
+            self.c = nn.Parameter(torch.Tensor([1.]))
+        self.manifold = getattr(manifolds, self.manifold_name)()
+        if self.manifold.name == 'Hyperboloid':
+            args.feat_dim = args.feat_dim + 1
+        self.nnodes = args.n_nodes
+        self.encoder = getattr(encoders, 'HGCN')(self.c, args)
+
+    def encode(self, x, adj):
+        if self.manifold.name == 'Hyperboloid':
+            o = torch.zeros_like(x)
+            x = torch.cat([o[:, 0:1], x], dim=1)
+        h = self.encoder.encode(x, adj)
+        return h
+
+    def compute_metrics(self, embeddings, data, split):
+        raise NotImplementedError
+
+    def init_metric_dict(self):
+        raise NotImplementedError
+
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+
+
+class NCModel(BaseModel):
+    """
+    Base model for node classification task.
+    """
+
+    def __init__(self, args):
+        super(NCModel, self).__init__(args)
+        self.decoder = model2decoder(self.c, args)
+        if args.n_classes > 2:
+            self.f1_average = 'micro'
+        else:
+            self.f1_average = 'binary'
+        
+        self.weights = torch.Tensor([1.] * args.n_classes)
+        if not args.cuda == -1:
+            self.weights = self.weights.to(args.device)
+
+    def decode(self, h, adj, idx):
+        output = self.decoder.decode(h, adj)
+        return F.log_softmax(output[idx], dim=1)
+
+    def compute_metrics(self, embeddings, data, split):
+        idx = data[f'idx_{split}']
+        output = self.decode(embeddings, data['adj_train_norm'], idx)
+        loss = F.nll_loss(output, data['labels'][idx], self.weights)
+        acc, f1,recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average)
+        metrics = {'loss': loss, 'acc': acc, 'f1': f1,'recall':recall,'precision':precision,'roc_auc':roc_auc}
+        return metrics
+
+    def init_metric_dict(self):
+        return {'acc': -1, 'f1': -1}
+
+    def has_improved(self, m1, m2):
+        return m1["f1"] < m2["f1"]
diff --git a/HGCN/models/decoders.py b/HGCN/models/decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..f20046bcaca78d98d08a8a94da17f6881347b0d2
--- /dev/null
+++ b/HGCN/models/decoders.py
@@ -0,0 +1,52 @@
+"""Graph decoders."""
+import Ghypeddings.HGCN.manifolds as manifolds
+import torch.nn as nn
+import torch.nn.functional as F
+
+from Ghypeddings.HGCN.layers.layers import Linear
+
+
+class Decoder(nn.Module):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+
+    def decode(self, x, adj):
+        if self.decode_adj:
+            input = (x, adj)
+            probs, _ = self.cls.forward(input)
+        else:
+            probs = self.cls.forward(x)
+        return probs
+
+
+class LinearDecoder(Decoder):
+    """
+    MLP Decoder for Hyperbolic/Euclidean node classification models.
+    """
+
+    def __init__(self, c, args):
+        super(LinearDecoder, self).__init__(c)
+        self.manifold = getattr(manifolds, args.select_manifold)()
+        self.input_dim = args.dim
+        self.output_dim = args.n_classes
+        self.bias = args.bias
+        self.cls = Linear(self.input_dim, self.output_dim, args.dropout, lambda x: x, self.bias)
+        self.decode_adj = False
+
+    def decode(self, x, adj):
+        h = self.manifold.proj_tan0(self.manifold.logmap0(x, c=self.c), c=self.c)
+        return super(LinearDecoder, self).decode(h, adj)
+
+    def extra_repr(self):
+        return 'in_features={}, out_features={}, bias={}, c={}'.format(
+                self.input_dim, self.output_dim, self.bias, self.c
+        )
+
+
+model2decoder = LinearDecoder
+
diff --git a/HGCN/models/encoders.py b/HGCN/models/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..344b8dd35f7d76f0783daeddaa6243beb5393680
--- /dev/null
+++ b/HGCN/models/encoders.py
@@ -0,0 +1,58 @@
+"""Graph encoders."""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.HGCN.manifolds as manifolds
+import Ghypeddings.HGCN.layers.hyp_layers as hyp_layers
+import Ghypeddings.HGCN.utils.math_utils as pmath
+
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x, adj):
+        if self.encode_graph:
+            input = (x, adj)
+            output, _ = self.layers.forward(input)
+        else:
+            output = self.layers.forward(x)
+        return output
+
+class HGCN(Encoder):
+    """
+    Hyperbolic-GCN.
+    """
+
+    def __init__(self, c, args):
+        super(HGCN, self).__init__(c)
+        self.manifold = getattr(manifolds, args.select_manifold)()
+        assert args.num_layers > 1
+        dims, acts, self.curvatures = hyp_layers.get_dim_act_curv(args)
+        self.curvatures.append(self.c)
+        hgc_layers = []
+        for i in range(len(dims) - 1):
+            c_in, c_out = self.curvatures[i], self.curvatures[i + 1]
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            hgc_layers.append(
+                    hyp_layers.HyperbolicGraphConvolution(
+                            self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att, args.local_agg
+                    )
+            )
+        self.layers = nn.Sequential(*hgc_layers)
+        self.encode_graph = True
+
+    def encode(self, x, adj):
+        x_tan = self.manifold.proj_tan0(x, self.curvatures[0])
+        x_hyp = self.manifold.expmap0(x_tan, c=self.curvatures[0])
+        x_hyp = self.manifold.proj(x_hyp, c=self.curvatures[0])
+        return super(HGCN, self).encode(x_hyp, adj)
diff --git a/HGCN/optimizers/__init__.py b/HGCN/optimizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..411e319d0d10a157da5a9e05a85f468983dcb4be
--- /dev/null
+++ b/HGCN/optimizers/__init__.py
@@ -0,0 +1,2 @@
+from torch.optim import Adam
+from Ghypeddings.HGCN.optimizers.radam import RiemannianAdam
diff --git a/HGCN/optimizers/radam.py b/HGCN/optimizers/radam.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7033935d2acb22bb55679828d15564b17896e34
--- /dev/null
+++ b/HGCN/optimizers/radam.py
@@ -0,0 +1,172 @@
+"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/)."""
+import torch.optim
+from Ghypeddings.HGCN.manifolds import Euclidean, ManifoldParameter
+
+_default_manifold = Euclidean()
+
+class OptimMixin(object):
+    def __init__(self, *args, stabilize=None, **kwargs):
+        self._stabilize = stabilize
+        super().__init__(*args, **kwargs)
+
+    def stabilize_group(self, group):
+        pass
+
+    def stabilize(self):
+        """Stabilize parameters if they are off-manifold due to numerical reasons
+        """
+        for group in self.param_groups:
+            self.stabilize_group(group)
+
+
+def copy_or_set_(dest, source):
+    """
+    A workaround to respect strides of :code:`dest` when copying :code:`source`
+    (https://github.com/geoopt/geoopt/issues/70)
+    Parameters
+    ----------
+    dest : torch.Tensor
+        Destination tensor where to store new data
+    source : torch.Tensor
+        Source data to put in the new tensor
+    Returns
+    -------
+    dest
+        torch.Tensor, modified inplace
+    """
+    if dest.stride() != source.stride():
+        return dest.copy_(source)
+    else:
+        return dest.set_(source)
+
+
+class RiemannianAdam(OptimMixin, torch.optim.Adam):
+    r"""Riemannian Adam with the same API as :class:`torch.optim.Adam`
+    Parameters
+    ----------
+    params : iterable
+        iterable of parameters to optimize or dicts defining
+        parameter groups
+    lr : float (optional)
+        learning rate (default: 1e-3)
+    betas : Tuple[float, float] (optional)
+        coefficients used for computing
+        running averages of gradient and its square (default: (0.9, 0.999))
+    eps : float (optional)
+        term added to the denominator to improve
+        numerical stability (default: 1e-8)
+    weight_decay : float (optional)
+        weight decay (L2 penalty) (default: 0)
+    amsgrad : bool (optional)
+        whether to use the AMSGrad variant of this
+        algorithm from the paper `On the Convergence of Adam and Beyond`_
+        (default: False)
+    Other Parameters
+    ----------------
+    stabilize : int
+        Stabilize parameters if they are off-manifold due to numerical
+        reasons every ``stabilize`` steps (default: ``None`` -- no stabilize)
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments
+        ---------
+        closure : callable (optional)
+            A closure that reevaluates the model
+            and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        with torch.no_grad():
+            for group in self.param_groups:
+                if "step" not in group:
+                    group["step"] = 0
+                betas = group["betas"]
+                weight_decay = group["weight_decay"]
+                eps = group["eps"]
+                learning_rate = group["lr"]
+                amsgrad = group["amsgrad"]
+                for point in group["params"]:
+                    grad = point.grad
+                    if grad is None:
+                        continue
+                    if isinstance(point, (ManifoldParameter)):
+                        manifold = point.manifold
+                        c = point.c
+                    else:
+                        manifold = _default_manifold
+                        c = None
+                    if grad.is_sparse:
+                        raise RuntimeError(
+                                "Riemannian Adam does not support sparse gradients yet (PR is welcome)"
+                        )
+
+                    state = self.state[point]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state["step"] = 0
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(point)
+                        # Exponential moving average of squared gradient values
+                        state["exp_avg_sq"] = torch.zeros_like(point)
+                        if amsgrad:
+                            # Maintains max of all exp. moving avg. of sq. grad. values
+                            state["max_exp_avg_sq"] = torch.zeros_like(point)
+                    # make local variables for easy access
+                    exp_avg = state["exp_avg"]
+                    exp_avg_sq = state["exp_avg_sq"]
+                    # actual step
+                    grad.add_(weight_decay, point)
+                    grad = manifold.egrad2rgrad(point, grad, c)
+                    exp_avg.mul_(betas[0]).add_(1 - betas[0], grad)
+                    exp_avg_sq.mul_(betas[1]).add_(
+                            1 - betas[1], manifold.inner(point, c, grad, keepdim=True)
+                    )
+                    if amsgrad:
+                        max_exp_avg_sq = state["max_exp_avg_sq"]
+                        # Maintains the maximum of all 2nd moment running avg. till now
+                        torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                        # Use the max. for normalizing running avg. of gradient
+                        denom = max_exp_avg_sq.sqrt().add_(eps)
+                    else:
+                        denom = exp_avg_sq.sqrt().add_(eps)
+                    group["step"] += 1
+                    bias_correction1 = 1 - betas[0] ** group["step"]
+                    bias_correction2 = 1 - betas[1] ** group["step"]
+                    step_size = (
+                        learning_rate * bias_correction2 ** 0.5 / bias_correction1
+                    )
+
+                    # copy the state, we need it for retraction
+                    # get the direction for ascend
+                    direction = exp_avg / denom
+                    # transport the exponential averaging to the new point
+                    new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c)
+                    exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c)
+                    # use copy only for user facing point
+                    copy_or_set_(point, new_point)
+                    exp_avg.set_(exp_avg_new)
+
+                    group["step"] += 1
+                if self._stabilize is not None and group["step"] % self._stabilize == 0:
+                    self.stabilize_group(group)
+        return loss
+
+    @torch.no_grad()
+    def stabilize_group(self, group):
+        for p in group["params"]:
+            if not isinstance(p, ManifoldParameter):
+                continue
+            state = self.state[p]
+            if not state:  # due to None grads
+                continue
+            manifold = p.manifold
+            c = p.c
+            exp_avg = state["exp_avg"]
+            copy_or_set_(p, manifold.proj(p, c))
+            exp_avg.set_(manifold.proj_tan(exp_avg, u, c))
diff --git a/HGCN/utils/__init__.py b/HGCN/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCN/utils/data_utils.py b/HGCN/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2a01103d7f5b862a001bb60af306db49eba9c3e
--- /dev/null
+++ b/HGCN/utils/data_utils.py
@@ -0,0 +1,89 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+import pandas as pd
+
+from sklearn.preprocessing import MinMaxScaler
+
+
+def process_data(args, adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'],args.normalize_adj,args.normalize_feats
+    )
+    return data
+
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj + sp.eye(adj.shape[0]))
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+
+
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+
+def process_data_nc(args,adj,features,labels):
+    idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test}
+    return data
diff --git a/HGCN/utils/eval_utils.py b/HGCN/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..840a48bf45cc08944925411885698019442f5870
--- /dev/null
+++ b/HGCN/utils/eval_utils.py
@@ -0,0 +1,14 @@
+from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score
+
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels, preds)
+    recall = recall_score(labels, preds)
+    precision = precision_score(labels, preds)
+    roc_auc = roc_auc_score(labels,preds )
+    f1 = f1_score(labels, preds, average=average)
+    return accuracy, f1 , recall,precision,roc_auc
+
diff --git a/HGCN/utils/math_utils.py b/HGCN/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cf278ed7ce59b97f4793f5def3218f3e830d473
--- /dev/null
+++ b/HGCN/utils/math_utils.py
@@ -0,0 +1,69 @@
+"""Math utils functions."""
+
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-15, 1 - 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1.0 + 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
diff --git a/HGCN/utils/train_utils.py b/HGCN/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e4385c5c977b1ea47ee9ffb6afe1d7f013c7fcc
--- /dev/null
+++ b/HGCN/utils/train_utils.py
@@ -0,0 +1,45 @@
+import os
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--c', type=int, default=args[1])
+    parser.add_argument('--num_layers', type=int, default=args[2])
+    parser.add_argument('--bias', type=bool, default=args[3])
+    parser.add_argument('--act', type=str, default=args[4])
+    parser.add_argument('--select_manifold', type=str, default=args[5])
+    parser.add_argument('--grad_clip', type=float, default=args[6])
+    parser.add_argument('--optimizer', type=str, default=args[7])
+    parser.add_argument('--weight_decay', type=float, default=args[8])
+    parser.add_argument('--lr', type=float, default=args[9])
+    parser.add_argument('--gamma', type=float, default=args[10])
+    parser.add_argument('--lr_reduce_freq', type=int, default=args[11])
+    parser.add_argument('--cuda', type=int, default=args[12])
+    parser.add_argument('--epochs', type=int, default=args[13])
+    parser.add_argument('--min_epochs', type=int, default=args[14])
+    parser.add_argument('--patience', type=int, default=args[15])
+    parser.add_argument('--seed', type=int, default=args[16])
+    parser.add_argument('--log_freq', type=int, default=args[17])
+    parser.add_argument('--eval_freq', type=int, default=args[18])
+    parser.add_argument('--val_prop', type=float, default=args[19])
+    parser.add_argument('--test_prop', type=float, default=args[20])
+    parser.add_argument('--double_precision', type=int, default=args[21])
+    parser.add_argument('--dropout', type=float, default=args[22])
+    parser.add_argument('--use_att', type=bool, default=args[23])
+    parser.add_argument('--alpha', type=float, default=args[24])
+    parser.add_argument('--local_agg', type=bool, default=args[25])
+    parser.add_argument('--normalize_adj', type=bool, default=args[26])
+    parser.add_argument('--normalize_feats', type=bool, default=args[27])
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
diff --git a/HGNN/.gitignore b/HGNN/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c
--- /dev/null
+++ b/HGNN/.gitignore
@@ -0,0 +1 @@
+__pycache__/
\ No newline at end of file
diff --git a/HGNN/__init__.py b/HGNN/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGNN/dataset/NodeClassificationDataset.py b/HGNN/dataset/NodeClassificationDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef6a3c813c067a1564277d5509081dbe91489828
--- /dev/null
+++ b/HGNN/dataset/NodeClassificationDataset.py
@@ -0,0 +1,160 @@
+import numpy as np
+import pickle as pkl
+import networkx as nx
+import scipy.sparse as sp
+from scipy.sparse import save_npz, load_npz
+from scipy.sparse.linalg import eigsh
+import sys
+from torch.utils.data import Dataset, DataLoader
+from Ghypeddings.HGNN.utils import *
+import pandas as pd
+from sklearn.preprocessing import MinMaxScaler
+
+def parse_index_file(filename):
+    """Parse index file."""
+    index = []
+    for line in open(filename):
+        index.append(int(line.strip()))
+    return index
+
+def sample_mask(idx, l):
+    """Create mask."""
+    mask = np.zeros(l)
+    mask[idx] = 1
+    return np.array(mask, dtype=np.bool_)
+
+def preprocess_features(features):
+    """Row-normalize feature matrix and convert to tuple representation"""
+    rowsum = np.array(features.sum(1)).astype(float)
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0
+    r_mat_inv = sp.diags(r_inv)
+    features = r_mat_inv.dot(features)
+    return features
+
+class NodeClassificationDataset(Dataset):
+    """
+    Extend the Dataset class for graph datasets
+    """
+    def __init__(self, args, logger,adj,features,labels):
+        self.args = args
+        self.process_data(adj,features,labels)
+
+    def _filling_adjacency_numpy(self,data, N, source_ip_index, destination_ip_index):
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            
+        source_ips = data[:, source_ip_index]
+        destination_ips = data[:, destination_ip_index]
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        adjacency = adjacency - np.eye(N)
+        return adjacency
+    
+    def compact_adjacency(self,adj):
+        max_neighbors = int(np.max(np.sum(adj, axis=1)))
+        shape = (adj.shape[0],max_neighbors)
+        c_adj = np.zeros(shape)
+        c_adj[:,:] = -1
+        indices , neighbors = np.where(adj == 1)
+
+        j=-1
+        l = indices[0]
+        for i,k in zip(indices,neighbors):
+            if i == l:
+                j+=1
+            else:
+                l=i
+                j=0
+            c_adj[i,j]=int(k)
+        return c_adj
+    
+    def compact_weight_matrix(self,c_adj):
+        return np.where(c_adj >= 0, 1, 0)
+    
+    def one_hot_labels(self,y):
+        array  = np.zeros((len(y),2))
+        for i,j in zip(range(len(y)),y):
+            if j:
+                array[i,1]=1
+            else:
+                array[i,0]=1
+
+        return array
+    
+    def split_data(self,labels, test_prop,val_prop):
+        np.random.seed(self.args.seed)
+        #nb_nodes = labels.shape[0]
+        #all_idx = np.arange(nb_nodes)
+        # pos_idx = labels.nonzero()[0]
+        # neg_idx = (1. - labels).nonzero()[0]
+        pos_idx = labels[:,1].nonzero()[0]
+        neg_idx = labels[:,0].nonzero()[0]
+        np.random.shuffle(pos_idx)
+        np.random.shuffle(neg_idx)
+        pos_idx = pos_idx.tolist()
+        neg_idx = neg_idx.tolist()
+        nb_pos_neg = min(len(pos_idx), len(neg_idx))
+        nb_val = round(val_prop * nb_pos_neg)
+        nb_test = round(test_prop * nb_pos_neg)
+        idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                    nb_val + nb_test:]
+        idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                    nb_val + nb_test:]
+        return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+    
+    def process_data(self, adj,features,labels):
+            
+        adj = self.compact_adjacency(adj)
+        weight = self.compact_weight_matrix(adj)
+        adj[adj == -1] = 0
+
+        labels = self.one_hot_labels(labels)
+
+        idx_test, idx_train, idx_val = self.split_data(labels,self.args.test_prop,self.args.val_prop)
+
+        train_mask = sample_mask(idx_train, labels.shape[0])
+        val_mask = sample_mask(idx_val, labels.shape[0])
+        test_mask = sample_mask(idx_test, labels.shape[0])
+
+        y_train = np.zeros(labels.shape)
+        y_val = np.zeros(labels.shape)
+        y_test = np.zeros(labels.shape)
+        y_train[train_mask, :] = labels[train_mask, :]
+        y_val[val_mask, :] = labels[val_mask, :]
+        y_test[test_mask, :] = labels[test_mask, :]
+
+        self.adj = adj
+        self.weight = weight
+
+        self.features = preprocess_features(features) if self.args.normalize_feats else features
+        self.features = features
+        assert np.isnan(features).any()== False
+        self.y_train = y_train
+        self.y_val = y_val
+        self.y_test = y_test
+        self.train_mask = train_mask.astype(int)
+        self.val_mask = val_mask.astype(int)
+        self.test_mask = test_mask.astype(int)
+        self.args.node_num = self.features.shape[0]
+        self.args.input_dim = self.features.shape[1]
+        self.args.num_class = y_train.shape[1]
+    
+
+    def __len__(self):
+        return 1
+
+    def __getitem__(self, idx):
+        return  {
+                  'adj': self.adj,
+                  'weight': self.weight,
+                  'features': self.features,
+                  'y_train' : self.y_train,
+                  'y_val' : self.y_val,
+                  'y_test' : self.y_test,
+                  'train_mask' : self.train_mask,
+                  'val_mask' : self.val_mask,
+                  'test_mask' : self.test_mask,
+                }
diff --git a/HGNN/dataset/__init__.py b/HGNN/dataset/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGNN/gnn/RiemannianGNN.py b/HGNN/gnn/RiemannianGNN.py
new file mode 100644
index 0000000000000000000000000000000000000000..67eecfcc3be8a8b836fa66f5a7f784fc792cd85b
--- /dev/null
+++ b/HGNN/gnn/RiemannianGNN.py
@@ -0,0 +1,151 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGNN.utils import *
+
+class RiemannianGNN(nn.Module):
+
+	def __init__(self, args, logger, manifold):
+		super(RiemannianGNN, self).__init__()
+		self.args = args
+		self.logger = logger
+		self.manifold = manifold
+		self.set_up_params()
+		self.activation = get_activation(self.args)
+		self.dropout = nn.Dropout(self.args.dropout)
+
+	def create_params(self):
+		"""
+		create the GNN params for a specific msg type
+		"""
+		msg_weight = []
+		layer = self.args.num_layers if not self.args.tie_weight else 1
+		for _ in range(layer):
+			# weight in euclidean space
+			if self.args.select_manifold == 'poincare':
+				M = th.zeros([self.args.dim, self.args.dim], requires_grad=True)
+			elif self.args.select_manifold == 'lorentz': # one degree of freedom less
+				M = th.zeros([self.args.dim, self.args.dim - 1], requires_grad=True)
+			init_weight(M, self.args.proj_init)
+			M = nn.Parameter(M)
+			self.args.eucl_vars.append(M)
+			msg_weight.append(M)
+		return nn.ParameterList(msg_weight)
+
+	def set_up_params(self):
+		"""
+		set up the params for all message types
+		"""
+		self.type_of_msg = 1
+
+		for i in range(0, self.type_of_msg):
+			setattr(self, "msg_%d_weight" % i, self.create_params())
+
+
+	def retrieve_params(self, weight, step):
+		"""
+		Args:
+			weight: a list of weights
+			step: a certain layer
+		"""
+		if self.args.select_manifold == 'poincare':
+			layer_weight = weight[step]
+		elif self.args.select_manifold == 'lorentz': # Ensure valid tangent vectors for (1, 0, ...)
+			layer_weight = th.cat((th.zeros((self.args.dim, 1)).cuda(), weight[step]), dim=1)
+		return layer_weight
+
+	def apply_activation(self, node_repr):
+		"""
+		apply non-linearity for different manifolds
+		"""
+		if self.args.select_manifold == "poincare":
+			return self.activation(node_repr)
+		elif self.args.select_manifold == "lorentz":
+			return self.manifold.from_poincare_to_lorentz(
+				self.activation(self.manifold.from_lorentz_to_poincare(node_repr))
+			)
+
+	def split_graph_by_negative_edge(self, adj_mat, weight):
+		"""
+		Split the graph according to positive and negative edges.
+		"""
+		mask = weight > 0
+		neg_mask = weight < 0
+
+		pos_adj_mat = adj_mat * mask.long()
+		neg_adj_mat = adj_mat * neg_mask.long()
+		pos_weight = weight * mask.float()
+		neg_weight = -weight * neg_mask.float()
+		return pos_adj_mat, pos_weight, neg_adj_mat, neg_weight
+
+	def split_graph_by_type(self, adj_mat, weight):
+		"""
+		split the graph according to edge type for multi-relational datasets
+		"""
+		multi_relation_adj_mat = []
+		multi_relation_weight = []
+		for relation in range(1, self.args.edge_type):
+			mask = (weight.int() == relation)
+			multi_relation_adj_mat.append(adj_mat * mask.long())
+			multi_relation_weight.append(mask.float())
+		return multi_relation_adj_mat, multi_relation_weight
+
+	def split_input(self, adj_mat, weight):
+		"""
+		Split the adjacency matrix and weight matrix for multi-relational datasets
+		and datasets with enhanced inverse edges, e.g. Ethereum.
+		"""
+		return [adj_mat], [weight]
+
+	def aggregate_msg(self, node_repr, adj_mat, weight, layer_weight, mask):
+		"""
+		message passing for a specific message type.
+		"""
+		node_num, max_neighbor = adj_mat.size(0), adj_mat.size(1)
+		msg = th.mm(node_repr, layer_weight) * mask
+		# select out the neighbors of each node
+		neighbors = th.index_select(msg, 0, adj_mat.view(-1)) # [node_num * max_neighbor, embed_size]
+		neighbors = neighbors.view(node_num, max_neighbor, -1)
+		# weighted sum of the neighbors' representations
+		neighbors = weight.unsqueeze(2) * neighbors # [node_num, max_neighbor, embed_size]
+		combined_msg = th.sum(neighbors, dim=1)  # [node_num, embed_size]
+		return combined_msg
+
+	def get_combined_msg(self, step, node_repr, adj_mat, weight, mask):
+		"""
+		perform message passing in the tangent space of x'
+		"""
+		# use the first layer only if tying weights
+		gnn_layer = 0 if self.args.tie_weight else step
+		combined_msg = None
+		for relation in range(0, self.type_of_msg):
+			layer_weight = self.retrieve_params(getattr(self, "msg_%d_weight" % relation), gnn_layer)
+			aggregated_msg = self.aggregate_msg(node_repr,
+												adj_mat[relation],
+												weight[relation],
+												layer_weight, mask)
+			combined_msg = aggregated_msg if combined_msg is None else (combined_msg + aggregated_msg)
+		return combined_msg
+
+	def forward(self, node_repr, adj_list, weight, mask):
+		"""
+		Args:
+			node_repr: [node_num, embed_size]
+					   node_repr is in Euclidean space.
+					   If node_repr is in hyperbolic space, invoke log_map_zero first.
+			adj_list: [node_num, max_neighbor] adjacency list
+			weight:  [node_num, max_neighbor]  weights of the adjacency list
+			mask:    [node_num, 1] 1 denote real node, 0 padded node
+		Return:
+			[node_num, embed_size] in hyperbolic space
+		"""
+		# split the adjacency list and weights based on edge types
+		adj_list, weight = self.split_input(adj_list, weight)
+		# gnn layers
+		for step in range(self.args.num_layers):
+			node_repr = self.manifold.log_map_zero(node_repr) * mask if step > 0 else node_repr * mask
+			combined_msg = self.get_combined_msg(step, node_repr, adj_list, weight, mask)
+			combined_msg = self.dropout(combined_msg) * mask
+			node_repr = self.manifold.exp_map_zero(combined_msg) * mask
+			node_repr = self.apply_activation(node_repr) * mask
+		return node_repr
diff --git a/HGNN/gnn/__init__.py b/HGNN/gnn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c53e88b5c75fe891a396a8d629fae431dfe63d6
--- /dev/null
+++ b/HGNN/gnn/__init__.py
@@ -0,0 +1 @@
+from Ghypeddings.HGNN.gnn.RiemannianGNN import RiemannianGNN
diff --git a/HGNN/hgnn.py b/HGNN/hgnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e10742634d05623691ec8e66de926a135ae2b9f
--- /dev/null
+++ b/HGNN/hgnn.py
@@ -0,0 +1,70 @@
+from Ghypeddings.HGNN.task import *
+from Ghypeddings.HGNN.utils import *
+from Ghypeddings.HGNN.manifold import *
+from Ghypeddings.HGNN.gnn import RiemannianGNN
+
+class HGNN:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='leaky_relu',
+                alpha=0.2,
+                select_manifold='poincare',
+                num_centroid=100,
+                eucl_vars=[],
+                hyp_vars=[],
+                grad_clip=1.0,
+                optimizer='sgd',
+                weight_decay=0.05,
+                lr=0.01,
+                lr_scheduler='cosine',
+                lr_gamma=0.5,
+                lr_hyperbolic=0.1,
+                hyper_optimizer='ramsgrad',
+                proj_init='xavier',
+                tie_weight=True,
+                epochs=50,
+                patience=100,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.5,
+                test_prop=0.3,
+                double_precision=0,
+                dropout=0.1,
+                normalize_adj=False,
+                normalize_feats=True):
+        
+        self.args = create_args(dim,c,num_layers,bias,act,alpha,select_manifold,num_centroid,eucl_vars,hyp_vars,grad_clip,optimizer,weight_decay,lr,lr_scheduler,lr_gamma,lr_hyperbolic,hyper_optimizer,proj_init,tie_weight,epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
+        
+        set_seed(self.args.seed)
+        self.logger = create_logger()
+        if self.args.select_manifold == 'lorentz':
+                    self.args.dim += 1
+        if self.args.select_manifold == 'lorentz':
+            self.manifold= LorentzManifold(self.args, self.logger)
+        elif self.args.select_manifold == 'poincare':
+            self.manifold= PoincareManifold(self.args,self.logger)
+        rgnn = RiemannianGNN(self.args, self.logger, self.manifold)
+        self.gnn = NodeClassificationTask(self.args, self.logger, rgnn, self.manifold, adj,features,labels)
+    
+    def fit(self):
+        return self.gnn.run_gnn()
+
+    def predict(self):
+        return self.gnn.evaluate(self.gnn.loader, 'test', self.gnn.model, self.gnn.loss_function)
+
+    def save_embeddings(self):
+        labels = np.argmax(th.squeeze(self.gnn.labels).numpy(),axis=1)
+        #tb_embeddings_euc = self.gnn.manifold.log_map_zero(self.gnn.early_stop.best_emb)
+        for_classification_hyp = np.hstack((self.gnn.early_stop.best_emb.cpu().detach().numpy(),labels.reshape(-1,1)))
+        #for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),labels.reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'hgnn_embeddings_hyp.csv')
+        #euc_file_path = os.path.join(os.getcwd(),'hgnn_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        #np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/HGNN/hyperbolic_module/CentroidDistance.py b/HGNN/hyperbolic_module/CentroidDistance.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d868cb98ea1d10da977fd7bb8b22d9f0cfb0853
--- /dev/null
+++ b/HGNN/hyperbolic_module/CentroidDistance.py
@@ -0,0 +1,54 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGNN.utils import *
+
+class CentroidDistance(nn.Module):
+	"""
+	Implement a model that calculates the pairwise distances between node representations
+	and centroids
+	"""
+	def __init__(self, args, logger, manifold):
+		super(CentroidDistance, self).__init__()
+		self.args = args
+		self.logger = logger
+		self.manifold = manifold
+
+		# centroid embedding
+		self.centroid_embedding = nn.Embedding(
+			args.num_centroid, args.dim,
+			sparse=False,
+			scale_grad_by_freq=False,
+		)
+		self.manifold.init_embed(self.centroid_embedding)
+		args.hyp_vars.append(self.centroid_embedding)
+
+	def forward(self, node_repr, mask):
+		"""
+		Args:
+			node_repr: [node_num, embed_size]
+			mask: [node_num, 1] 1 denote real node, 0 padded node
+		return:
+			graph_centroid_dist: [1, num_centroid]
+			node_centroid_dist: [1, node_num, num_centroid]
+		"""
+		node_num = node_repr.size(0)
+
+		# broadcast and reshape node_repr to [node_num * num_centroid, embed_size]
+		node_repr =  node_repr.unsqueeze(1).expand(
+												-1,
+												self.args.num_centroid,
+												-1).contiguous().view(-1, self.args.dim)
+
+		# broadcast and reshape centroid embeddings to [node_num * num_centroid, embed_size]
+		centroid_repr = self.centroid_embedding(th.arange(self.args.num_centroid).cuda())
+		centroid_repr = centroid_repr.unsqueeze(0).expand(
+												node_num,
+												-1,
+												-1).contiguous().view(-1, self.args.dim)
+		# get distance
+		node_centroid_dist = self.manifold.distance(node_repr, centroid_repr)
+		node_centroid_dist = node_centroid_dist.view(1, node_num, self.args.num_centroid) * mask
+		# average pooling over nodes
+		graph_centroid_dist = th.sum(node_centroid_dist, dim=1) / th.sum(mask)
+		return graph_centroid_dist, node_centroid_dist
diff --git a/HGNN/hyperbolic_module/PoincareDistance.py b/HGNN/hyperbolic_module/PoincareDistance.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bc423409e286c13382d0a76bd97931f1c840a54
--- /dev/null
+++ b/HGNN/hyperbolic_module/PoincareDistance.py
@@ -0,0 +1,38 @@
+import torch as th
+from torch.autograd import Function
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+
+class PoincareDistance(Function):
+    @staticmethod
+    def grad(x, v, sqnormx, sqnormv, sqdist, eps):
+        alpha = (1 - sqnormx)
+        beta = (1 - sqnormv)
+        z = 1 + 2 * sqdist / (alpha * beta)
+        a = ((sqnormv - 2 * th.sum(x * v, dim=-1) + 1) / th.pow(alpha, 2))\
+            .unsqueeze(-1).expand_as(x)
+        a = a * x - v / alpha.unsqueeze(-1).expand_as(v)
+        z = th.sqrt(th.pow(z, 2) - 1)
+        z = th.clamp(z * beta, min=eps).unsqueeze(-1)
+        return 4 * a / z.expand_as(x)
+
+    @staticmethod
+    def forward(ctx, u, v, eps):
+        squnorm = th.clamp(th.sum(u * u, dim=-1), 0, 1 - eps)
+        sqvnorm = th.clamp(th.sum(v * v, dim=-1), 0, 1 - eps)
+        sqdist = th.sum(th.pow(u - v, 2), dim=-1)
+        ctx.eps = eps
+        ctx.save_for_backward(u, v, squnorm, sqvnorm, sqdist)
+        x = sqdist / ((1 - squnorm) * (1 - sqvnorm)) * 2 + 1
+        # arcosh
+        z = th.sqrt(th.pow(x, 2) - 1)
+        return th.log(x + z)
+
+    @staticmethod
+    def backward(ctx, g):
+        u, v, squnorm, sqvnorm, sqdist = ctx.saved_tensors
+        g = g.unsqueeze(-1)
+        gu = PoincareDistance.grad(u, v, squnorm, sqvnorm, sqdist, ctx.eps)
+        gv = PoincareDistance.grad(v, u, sqvnorm, squnorm, sqdist, ctx.eps)
+        return g.expand_as(gu) * gu, g.expand_as(gv) * gv, None
diff --git a/HGNN/hyperbolic_module/__init__.py b/HGNN/hyperbolic_module/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGNN/manifold/LorentzManifold.py b/HGNN/manifold/LorentzManifold.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6e9bbcc36dc0769a486fa887b472a07f1ab1492
--- /dev/null
+++ b/HGNN/manifold/LorentzManifold.py
@@ -0,0 +1,165 @@
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+from Ghypeddings.HGNN.utils import *
+
+_eps = 1e-10
+
+class LorentzManifold:
+
+    def __init__(self, args, logger, eps=1e-3, norm_clip=1, max_norm=1e3):
+        self.args = args
+        self.logger = logger
+        self.eps = eps
+        self.norm_clip = norm_clip
+        self.max_norm = max_norm
+
+    @staticmethod
+    def ldot(u, v, keepdim=False):
+        """
+        Lorentzian Scalar Product
+        Args:
+            u: [batch_size, d + 1]
+            v: [batch_size, d + 1]
+        Return:
+            keepdim: False [batch_size]
+            keepdim: True  [batch_size, 1]
+        """
+        d = u.size(1) - 1
+        uv = u * v
+        uv = th.cat((-uv.narrow(1, 0, 1), uv.narrow(1, 1, d)), dim=1)
+        return th.sum(uv, dim=1, keepdim=keepdim)
+
+    def from_lorentz_to_poincare(self, x):
+        """
+        Args:
+            u: [batch_size, d + 1]
+        """
+        d = x.size(-1) - 1
+        return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
+
+    def from_poincare_to_lorentz(self, x):
+        """
+        Args:
+            u: [batch_size, d]
+        """
+        x_norm_square = th_dot(x, x)
+        return th.cat((1 + x_norm_square, 2 * x), dim=1) / (1 - x_norm_square + self.eps)
+
+    def distance(self, u, v):
+        d = -LorentzDot.apply(u, v)
+        return Acosh.apply(d, self.eps)
+
+    def normalize(self, w):
+        """
+        Normalize vector such that it is located on the hyperboloid
+        Args:
+            w: [batch_size, d + 1]
+        """
+        d = w.size(-1) - 1
+        narrowed = w.narrow(-1, 1, d)
+        if self.max_norm:
+            narrowed = th.renorm(narrowed.view(-1, d), 2, 0, self.max_norm)
+        first = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
+        first = th.sqrt(first)
+        return th.cat((first, narrowed), dim=1)
+
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+
+    def rgrad(self, p, d_p):
+        """Riemannian gradient for hyperboloid"""
+        u = d_p
+        x = p
+        u.narrow(-1, 0, 1).mul_(-1)
+        u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
+        return d_p
+
+    def exp_map_zero(self, v):
+        zeros = th.zeros_like(v)
+        zeros[:, 0] = 1
+        return self.exp_map_x(zeros, v)
+
+    def exp_map_x(self, p, d_p, d_p_normalize=True, p_normalize=True):
+        if d_p_normalize:
+            d_p = self.normalize_tan(p, d_p)
+
+        ldv = self.ldot(d_p, d_p, keepdim=True)
+        nd_p = th.sqrt(th.clamp(ldv + self.eps, _eps))
+
+        t = th.clamp(nd_p, max=self.norm_clip)
+        newp = (th.cosh(t) * p) + (th.sinh(t) * d_p / nd_p)
+
+        if p_normalize:
+            newp = self.normalize(newp)
+        return newp
+
+    def normalize_tan(self, x_all, v_all):
+        d = v_all.size(1) - 1
+        x = x_all.narrow(1, 1, d)
+        xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
+        tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
+        tmp = th.sqrt(tmp)
+        return th.cat((xv / tmp, v_all.narrow(1, 1, d)), dim=1)
+
+    def log_map_zero(self, y, i=-1):
+        zeros = th.zeros_like(y)
+        zeros[:, 0] = 1
+        return self.log_map_x(zeros, y)
+
+    def log_map_x(self, x, y, normalize=False):
+        """Logarithmic map on the Lorentz Manifold"""
+        xy = self.ldot(x, y).unsqueeze(-1)
+        tmp = th.sqrt(th.clamp(xy * xy - 1 + self.eps, _eps))
+        v = Acosh.apply(-xy, self.eps) / (
+            tmp
+        ) * th.addcmul(y, xy, x)
+        if normalize:
+            result = self.normalize_tan(x, v)
+        else:
+            result = v
+        return result
+
+    def parallel_transport(self, x, y, v):
+        """Parallel transport for hyperboloid"""
+        v_ = v
+        x_ = x
+        y_ = y
+
+        xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
+        vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
+        vnew = v_ + vy / (1 - xy) * (x_ + y_)
+        return vnew
+
+    def metric_tensor(self, x, u, v):
+        return self.ldot(u, v, keepdim=True)
+
+class LorentzDot(Function):
+    @staticmethod
+    def forward(ctx, u, v):
+        ctx.save_for_backward(u, v)
+        return LorentzManifold.ldot(u, v)
+
+    @staticmethod
+    def backward(ctx, g):
+        u, v = ctx.saved_tensors
+        g = g.unsqueeze(-1).expand_as(u).clone()
+        g.narrow(-1, 0, 1).mul_(-1)
+        return g * v, g * u
+
+class Acosh(Function):
+    @staticmethod
+    def forward(ctx, x, eps):
+        z = th.sqrt(th.clamp(x * x - 1 + eps, _eps))
+        ctx.save_for_backward(z)
+        ctx.eps = eps
+        return th.log(x + z)
+
+    @staticmethod
+    def backward(ctx, g):
+        z, = ctx.saved_tensors
+        z = th.clamp(z, min=ctx.eps)
+        z = g / z
+        return z, None
diff --git a/HGNN/manifold/PoincareManifold.py b/HGNN/manifold/PoincareManifold.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a3c97c31eb609a62ffb675fcad4e865ef048fe1
--- /dev/null
+++ b/HGNN/manifold/PoincareManifold.py
@@ -0,0 +1,112 @@
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+from Ghypeddings.HGNN.hyperbolic_module.PoincareDistance import PoincareDistance
+from Ghypeddings.HGNN.utils import *
+
+class PoincareManifold:
+
+    def __init__(self, args, logger, EPS=1e-5, PROJ_EPS=1e-5):
+        self.args = args
+        self.logger = logger
+        self.EPS = EPS
+        self.PROJ_EPS = PROJ_EPS
+        self.tanh = nn.Tanh()
+
+    def normalize(self, x):
+        return clip_by_norm(x, (1. - self.PROJ_EPS))
+
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+
+    def mob_add(self, u, v):
+        """
+        Add two vectors in hyperbolic space
+        """
+        v = v + self.EPS
+        th_dot_u_v = 2. * th_dot(u, v)
+        th_norm_u_sq = th_dot(u, u)
+        th_norm_v_sq = th_dot(v, v)
+        denominator = 1. + th_dot_u_v + th_norm_v_sq * th_norm_u_sq
+        result = (1. + th_dot_u_v + th_norm_v_sq) / (denominator + self.EPS) * u + \
+                 (1. - th_norm_u_sq) / (denominator + self.EPS) * v
+        return self.normalize(result)
+
+    def distance(self, u, v):
+        return PoincareDistance.apply(u, v, 1e-5)
+
+    def lambda_x(self, x):
+        """
+        A conformal factor
+        """
+        return 2. / (1 - th_dot(x, x))
+
+    def log_map_zero(self, y):
+        diff = y + self.EPS
+        norm_diff = th_norm(diff)
+        return 1. / th_atanh(norm_diff, self.EPS) / norm_diff * diff
+
+    def log_map_x(self, x, y):
+        diff = self.mob_add(-x, y) + self.EPS
+        norm_diff = th_norm(diff)
+        lam = self.lambda_x(x)
+        return (( 2. / lam) * th_atanh(norm_diff, self.EPS) / norm_diff) * diff
+
+    def metric_tensor(self, x, u, v):
+        """
+        The metric tensor in hyperbolic space.
+        In-place operations for saving memory. (do not use this function in forward calls)
+        """
+        u_dot_v = th_dot(u, v)
+        lambda_x = self.lambda_x(x)
+        lambda_x *= lambda_x
+        lambda_x *= u_dot_v
+        return lambda_x
+
+    def exp_map_zero(self, v):
+        """
+        Exp map from tangent space of zero to hyperbolic space
+        Args:
+            v: [batch_size, *] in tangent space
+        """
+        v = v + self.EPS
+        norm_v = th_norm(v) # [batch_size, 1]
+        result = self.tanh(norm_v) / (norm_v) * v
+        return self.normalize(result)
+
+    def exp_map_x(self, x, v):
+        """
+        Exp map from tangent space of x to hyperbolic space
+        """
+        v = v + self.EPS # Perturbe v to avoid dealing with v = 0
+        norm_v = th_norm(v)
+        second_term = (self.tanh(self.lambda_x(x) * norm_v / 2) / norm_v) * v
+        return self.normalize(self.mob_add(x, second_term))
+
+    def gyr(self, u, v, w):
+        u_norm = th_dot(u, u)
+        v_norm = th_dot(v, v)
+        u_dot_w = th_dot(u, w)
+        v_dot_w = th_dot(v, w)
+        u_dot_v = th_dot(u, v)
+        A = - u_dot_w * v_norm + v_dot_w + 2 * u_dot_v * v_dot_w
+        B = - v_dot_w * u_norm - u_dot_w
+        D = 1 + 2 * u_dot_v + u_norm * v_norm
+        return w + 2 * (A * u + B * v) / (D + self.EPS)
+
+    def parallel_transport(self, src, dst, v):
+        return self.lambda_x(src) / th.clamp(self.lambda_x(dst), min=self.EPS) * self.gyr(dst, -src, v)
+
+    def rgrad(self, p, d_p):
+        """
+        Function to compute Riemannian gradient from the
+        Euclidean gradient in the Poincare ball.
+        Args:
+            p (Tensor): Current point in the ball
+            d_p (Tensor): Euclidean gradient at p
+        """
+        p_sqnorm = th.sum(p.data ** 2, dim=-1, keepdim=True)
+        d_p = d_p * ((1 - p_sqnorm) ** 2 / 4.0).expand_as(d_p)
+        return d_p
diff --git a/HGNN/manifold/__init__.py b/HGNN/manifold/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ada909b5c9f1f0e0467f0b7b368874b627cf3751
--- /dev/null
+++ b/HGNN/manifold/__init__.py
@@ -0,0 +1,2 @@
+from Ghypeddings.HGNN.manifold.PoincareManifold import *
+from Ghypeddings.HGNN.manifold.LorentzManifold import *
diff --git a/HGNN/optimizer/__init__.py b/HGNN/optimizer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGNN/optimizer/ramsgrad.py b/HGNN/optimizer/ramsgrad.py
new file mode 100644
index 0000000000000000000000000000000000000000..c51d3d7cae72d995edf555dd36e2535770e14708
--- /dev/null
+++ b/HGNN/optimizer/ramsgrad.py
@@ -0,0 +1,74 @@
+"""
+Implement a AMSGrad: https://openreview.net/pdf?id=r1eiqi09K7
+"""
+import torch as th
+from torch.optim.optimizer import Optimizer, required
+import os
+import math
+import numpy as np
+
+class RiemannianAMSGrad(Optimizer):
+    """
+    Riemannian AMS gradient descent.
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float): learning rate
+    """
+
+    def __init__(self, args, manifold,params, lr, betas=(0.9, 0.99), eps=1e-8):
+        self.args = args
+        self.manifold = manifold
+        defaults = dict(lr=lr, betas=betas, eps=eps)
+        super(RiemannianAMSGrad, self).__init__(params, defaults)
+
+    def step(self, lr=None):
+        """Performs a single optimization step.
+        Arguments:
+            lr (float, optional): learning rate for the current update.
+        """
+        loss = None
+        with th.no_grad():
+            for group in self.param_groups:
+                for p in group['params']:
+                    if p.grad is None:
+                        continue
+                    grad = p.grad.data
+                    grad = self.manifold.rgrad(p, grad)
+                    if lr is None:
+                        lr = group['lr']
+
+                    state = self.state[p]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state['step'] = 0
+                        state['tau'] = th.zeros_like(p.data)
+                        # Exponential moving average of gradient values
+                        state['exp_avg'] = th.zeros_like(p.data)
+                        # Exponential moving average of squared gradient values
+                        state['exp_avg_sq'] = th.zeros_like(p.data)
+                        # Maintains max of all exp. moving avg. of sq. grad. values
+                        state['max_exp_avg_sq'] = th.zeros_like(p.data)
+
+                    exp_avg, exp_avg_sq, tau, max_exp_avg_sq = \
+                    			state['exp_avg'], state['exp_avg_sq'], state['tau'], state['max_exp_avg_sq']
+
+                    beta1, beta2 = group['betas']
+
+                    state['step'] += 1
+
+                    # Decay the first and second moment running average coefficient
+                    exp_avg.data = beta1 * tau + (1 - beta1) * grad
+                    exp_avg_sq.mul_(beta2).add_(1 - beta2, self.manifold.metric_tensor(p, grad, grad))
+                    th.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                    # Use the max. for normalizing running avg. of gradient
+                    denom = max_exp_avg_sq.sqrt().clamp_(min=group['eps'])
+
+                    step_size = group['lr']
+
+                    p_original = p.clone()
+                    before_proj = self.manifold.exp_map_x(p, (-step_size * exp_avg).div_(denom))
+                    p.data = self.manifold.normalize(before_proj)
+                    tau.data = self.manifold.parallel_transport(p_original, p, exp_avg)
+            return loss
diff --git a/HGNN/optimizer/rsgd.py b/HGNN/optimizer/rsgd.py
new file mode 100644
index 0000000000000000000000000000000000000000..14da1fe8e2f72ae731947ea4ffab607626865c6b
--- /dev/null
+++ b/HGNN/optimizer/rsgd.py
@@ -0,0 +1,43 @@
+import torch as th
+from torch.optim.optimizer import Optimizer, required
+from Ghypeddings.HGNN.utils import *
+import os
+import math
+
+class RiemannianSGD(Optimizer):
+    """Riemannian stochastic gradient descent.
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        rgrad (Function): Function to compute the Riemannian gradient from
+            an Euclidean gradient
+        retraction (Function): Function to update the parameters via a
+            retraction of the Riemannian gradient
+        lr (float): learning rate
+    """
+
+    def __init__(self, args, params, lr):
+        defaults = dict(lr=lr)
+        self.args = args
+        super(RiemannianSGD, self).__init__(params, defaults)
+
+    def step(self, lr=None):
+        """
+        Performs a single optimization step.
+        Arguments:
+            lr (float, optional): learning rate for the current update.
+        """
+        loss = None
+
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad.data
+                d_p = self.args.manifold.rgrad(p, d_p)
+                if lr is None:
+                    lr = group['lr']
+                p.data = self.args.manifold.normalize(
+                            self.args.manifold.exp_map_x(p, -lr * d_p)
+                         )
+        return loss
diff --git a/HGNN/task/BaseTask.py b/HGNN/task/BaseTask.py
new file mode 100644
index 0000000000000000000000000000000000000000..2486800e402e2e1bc8a5b44559f5c259b53b605c
--- /dev/null
+++ b/HGNN/task/BaseTask.py
@@ -0,0 +1,43 @@
+import numpy as np
+from Ghypeddings.HGNN.utils import *
+import torch as th
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader
+import torch.optim as optim
+import torch.distributed as dist
+from torch.utils.data.distributed import DistributedSampler
+
+class BaseTask(object):
+	"""
+	A base class that supports loading datasets, early stop and reporting statistics
+	"""
+	def __init__(self, args, logger, criterion='max'):
+		"""
+		criterion: min/max
+		"""
+		self.args = args
+		self.logger = logger
+		self.early_stop = EarlyStoppingCriterion(self.args.patience, criterion)
+
+	def reset_epoch_stats(self, epoch, prefix):
+		"""
+		prefix: train/dev/test
+		"""
+		self.epoch_stats = {
+			'prefix': prefix,
+			'epoch': epoch,
+			'loss': 0,
+			'num_correct': 0,
+			'num_total': 0,
+		}
+
+	def update_epoch_stats(self, loss, score, label, is_regression=False):
+		with th.no_grad():
+			self.epoch_stats['loss'] += loss.item()
+			self.epoch_stats['num_total'] += label.size(0)
+			if not is_regression:
+				self.epoch_stats['num_correct'] += th.sum(th.eq(th.argmax(score, dim=1), label)).item()
+	
+	def report_best(self):
+		self.logger.info("best val %.6f" 
+			% (self.early_stop.best_dev_score))
diff --git a/HGNN/task/NodeClassification.py b/HGNN/task/NodeClassification.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd2ed241331ad6e1971dd5fdf441a6c6034e7f21
--- /dev/null
+++ b/HGNN/task/NodeClassification.py
@@ -0,0 +1,50 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGNN.utils import * 
+from Ghypeddings.HGNN.hyperbolic_module.CentroidDistance import CentroidDistance
+
+class NodeClassification(nn.Module):
+
+	def __init__(self, args, logger, rgnn, manifold):
+		super(NodeClassification, self).__init__()
+		self.args = args
+		self.logger = logger
+		self.manifold = manifold
+		self.c = nn.Parameter(th.Tensor([1.]))
+
+		self.feature_linear = nn.Linear(self.args.input_dim,
+										self.args.dim
+							  )
+		nn_init(self.feature_linear, self.args.proj_init)
+		self.args.eucl_vars.append(self.feature_linear)			
+
+		self.distance = CentroidDistance(args, logger, manifold)
+
+		self.rgnn = rgnn
+		self.output_linear = nn.Linear(self.args.num_centroid,
+										self.args.num_class
+							  )
+		nn_init(self.output_linear, self.args.proj_init)
+		self.args.eucl_vars.append(self.output_linear)
+
+		self.log_softmax = nn.LogSoftmax(dim=1)
+		self.activation = get_activation(self.args)
+
+	def forward(self, adj, weight, features):
+		"""
+		Args:
+			adj: the neighbor ids of each node [1, node_num, max_neighbor]
+			weight: the weight of each neighbor [1, node_num, max_neighbor]
+			features: [1, node_num, input_dim]
+		"""
+		assert adj.size(0) == 1
+		adj, weight, features = adj.squeeze(0), weight.squeeze(0), features.squeeze(0)
+		node_repr = self.activation(self.feature_linear(features))
+		assert th.isnan(node_repr).any().item() == False
+		mask = th.ones((self.args.node_num, 1)).cuda() # [node_num, 1]
+		node_repr = self.rgnn(node_repr, adj, weight, mask) # [node_num, embed_size]
+
+		_, node_centroid_sim = self.distance(node_repr, mask) # [1, node_num, num_centroid]
+		class_logit = self.output_linear(node_centroid_sim.squeeze())
+		return self.log_softmax(class_logit) , node_repr
\ No newline at end of file
diff --git a/HGNN/task/NodeClassificationTask.py b/HGNN/task/NodeClassificationTask.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdf9fc41662155c27661e355acca7d254ef59c3e
--- /dev/null
+++ b/HGNN/task/NodeClassificationTask.py
@@ -0,0 +1,136 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGNN.utils import * 
+from torch.utils.data import DataLoader
+import torch.optim as optim
+from Ghypeddings.HGNN.task.BaseTask import BaseTask
+import numpy as np
+from Ghypeddings.HGNN.dataset.NodeClassificationDataset import NodeClassificationDataset
+from Ghypeddings.HGNN.task.NodeClassification import NodeClassification
+import time
+from sklearn.metrics import roc_auc_score,accuracy_score,f1_score,precision_score,recall_score
+
+def cross_entropy(log_prob, label, mask):
+	label, mask = label.squeeze(), mask.squeeze()
+	negative_log_prob = -th.sum(label * log_prob, dim=1)
+	return th.sum(mask * negative_log_prob, dim=0) / th.sum(mask)
+
+def get_accuracy(label, log_prob, mask):
+	lab = label.clone()
+	lab = lab.squeeze()
+	mask_copy = mask.clone().cpu().numpy()[0].astype(np.bool_)
+	pred_class = th.argmax(log_prob, dim=1).cpu().numpy()[mask_copy]
+	real_class = th.argmax(lab, dim=1).cpu().numpy()[mask_copy]
+	acc= accuracy_score(y_true=real_class,y_pred=pred_class)
+	f1= f1_score(y_true=real_class,y_pred=pred_class)
+	recall= recall_score(y_true=real_class,y_pred=pred_class)
+	precision= precision_score(y_true=real_class,y_pred=pred_class)
+	print(np.sum(real_class) , np.sum(pred_class))
+	roc_auc = roc_auc_score(real_class,pred_class)	
+	return acc,f1,recall,precision,roc_auc
+
+class NodeClassificationTask(BaseTask):
+
+	def __init__(self, args, logger, rgnn, manifold,adj,features,labels):
+		super(NodeClassificationTask, self).__init__(args, logger, criterion='max')
+		self.args = args
+		self.logger = logger
+		self.manifold = manifold
+		self.hyperbolic = True
+		self.rgnn = rgnn
+		self.loader = self.process_data(adj,features,labels)
+		self.model = NodeClassification(self.args, self.logger, self.rgnn, self.manifold).cuda()
+		self.loss_function = cross_entropy
+
+	def forward(self, model, sample, loss_function):
+		scores , embeddings = model(
+					sample['adj'].cuda().long(),
+			        sample['weight'].cuda().float(),
+			        sample['features'].cuda().float(),
+					)
+		loss = loss_function(scores,
+						 sample['y_train'].cuda().float(), 
+						 sample['train_mask'].cuda().float())
+		return scores, loss , embeddings
+
+	def run_gnn(self):
+		loader = self.loader
+		model = self.model
+		loss_function = self.loss_function
+		
+		self.args.manifold = self.manifold
+		optimizer, lr_scheduler, hyperbolic_optimizer, hyperbolic_lr_scheduler = \
+								set_up_optimizer_scheduler(self.hyperbolic, self.args, model,self.manifold)
+		self.labels = None
+		
+		best_losses = []
+		real_losses = []
+
+		t_total = time.time()
+		for epoch in range(self.args.epochs):
+			model.train()
+			for i, sample in enumerate(loader):
+				model.zero_grad()
+				scores, loss , embeddings = self.forward(model, sample, loss_function)
+				loss.backward()
+				if self.args.grad_clip > 0.0:
+					th.nn.utils.clip_grad_norm_(model.parameters(), self.args.grad_clip)
+				optimizer.step()
+				if self.hyperbolic and len(self.args.hyp_vars) != 0:
+					hyperbolic_optimizer.step()
+				self.labels = sample['y_train']
+				accuracy,f1,recall,precision,roc_auc = get_accuracy(
+									sample['y_train'].cuda().float(), 
+									scores, 
+									sample['train_mask'].cuda().float())
+			
+				real_losses.append(loss.item())
+				if(len(best_losses) == 0):
+					best_losses.append(real_losses[0])
+				elif (best_losses[-1] > real_losses[-1]):
+					best_losses.append(real_losses[-1])
+				else:
+					best_losses.append(best_losses[-1])
+
+				if (epoch + 1) % self.args.log_freq == 0:
+					self.logger.info("%s epoch %d: accuracy %.4f f1 %.4f recall %.4f precision %.4f roc_auc %.4f loss: %.4f \n" % (
+						'train', 
+						epoch, 
+						accuracy,f1,recall,precision,roc_auc,loss.item()))
+					
+				dev_loss, accuracy ,f1,recall,precision,roc_auc  = self.evaluate(loader, 'val', model, loss_function)
+
+				lr_scheduler.step()
+
+				if self.hyperbolic and len(self.args.hyp_vars) != 0:
+					hyperbolic_lr_scheduler.step()
+				if not self.early_stop.step(dev_loss, epoch , embeddings):		
+					break
+
+		self.logger.info("Training Finished!")
+		self.logger.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+
+		return {'real':real_losses,'best':best_losses}, accuracy,f1,recall,precision,roc_auc,time.time() - t_total
+			
+	def evaluate(self, data_loader, prefix, model, loss_function):
+		model.eval()
+		with th.no_grad():
+			for i, sample in enumerate(data_loader):
+				scores, loss , _ = self.forward(model, sample, loss_function)
+				if prefix == 'val':
+					accuracy,f1,recall,precision,roc_auc = get_accuracy(
+									sample['y_val'].cuda().float(), 
+									scores, 
+									sample['val_mask'].cuda().float())
+				elif prefix == 'test':
+					accuracy,f1,recall,precision,roc_auc = get_accuracy(
+									sample['y_test'].cuda().float(), 
+									scores, 
+									sample['test_mask'].cuda().float())
+				
+		return loss.item(), accuracy,f1,recall,precision,roc_auc
+
+	def process_data(self,adj,features,labels):
+		dataset = NodeClassificationDataset(self.args, self.logger,adj,features,labels)
+		return DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
diff --git a/HGNN/task/__init__.py b/HGNN/task/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4bd57382df4059527d0f60eeb0b175ad5c5d2f1
--- /dev/null
+++ b/HGNN/task/__init__.py
@@ -0,0 +1 @@
+from Ghypeddings.HGNN.task.NodeClassificationTask import *
\ No newline at end of file
diff --git a/HGNN/utils/EarlyStoppingCriterion.py b/HGNN/utils/EarlyStoppingCriterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e57381bd8bce016e7d9d9e0f6739e725d7dc521
--- /dev/null
+++ b/HGNN/utils/EarlyStoppingCriterion.py
@@ -0,0 +1,51 @@
+class EarlyStoppingCriterion(object):
+    """
+    Arguments:
+        patience (int): The maximum number of epochs with no improvement before early stopping should take place
+        mode (str, can only be 'max' or 'min'): To take the maximum or minimum of the score for optimization
+        min_delta (float, optional): Minimum change in the score to qualify as an improvement (default: 0.0)
+    """
+
+    def __init__(self, patience, mode, min_delta=0.0):
+        assert patience >= 0
+        assert mode in {'min', 'max'}
+        assert min_delta >= 0.0
+        self.patience = patience
+        self.mode = mode
+        self.min_delta = min_delta
+
+        self._count = 0
+        self.best_dev_score = None
+        self.best_epoch = None
+        self.is_improved = None
+        self.best_emb = None
+
+    def step(self, cur_dev_score, epoch , embeddings):
+        """
+        Checks if training should be continued given the current score.
+
+        Arguments:
+            cur_dev_score (float): the current development score
+            cur_test_score (float): the current test score
+        Output:
+            bool: if training should be continued
+        """
+        if self.best_dev_score is None:
+            self.best_dev_score = cur_dev_score
+            self.best_epoch = epoch
+            self.best_emb = embeddings
+            return True
+        else:
+            if self.mode == 'max':
+                self.is_improved = (cur_dev_score > self.best_dev_score + self.min_delta)
+            else:
+                self.is_improved = (cur_dev_score < self.best_dev_score - self.min_delta)
+
+            if self.is_improved:
+                self._count = 0
+                self.best_dev_score = cur_dev_score
+                self.best_epoch = epoch
+                self.best_emb = embeddings
+            else:
+                self._count += 1
+            return self._count <= self.patience
diff --git a/HGNN/utils/__init__.py b/HGNN/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3da1a96d729c4cfe3eb9edc1a122debcdd215aa
--- /dev/null
+++ b/HGNN/utils/__init__.py
@@ -0,0 +1,3 @@
+from Ghypeddings.HGNN.utils.utils import *
+from Ghypeddings.HGNN.utils.EarlyStoppingCriterion import EarlyStoppingCriterion
+from Ghypeddings.HGNN.utils.logger import *
diff --git a/HGNN/utils/logger.py b/HGNN/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f55e772da8da3784679cb1499dcdf6a13368759
--- /dev/null
+++ b/HGNN/utils/logger.py
@@ -0,0 +1,54 @@
+import logging
+import time
+from datetime import timedelta
+from Ghypeddings.HGNN.utils import make_dir
+
+class LogFormatter():
+
+    def __init__(self):
+        self.start_time = time.time()
+
+    def format(self, record):
+        elapsed_seconds = round(record.created - self.start_time)
+
+        prefix = "%s - %s - %s" % (
+            record.levelname,
+            time.strftime('%x %X'),
+            timedelta(seconds=elapsed_seconds)
+        )
+        message = record.getMessage()
+        message = message.replace('\n', '\n' + ' ' * (len(prefix) + 3))
+        return "%s - %s" % (prefix, message)
+
+def create_logger():
+    """
+    Create a logger.
+    """
+    #make_dir('log')
+    # create log formatter
+    log_formatter = LogFormatter()
+
+    # create file handler and set level to debug
+    # file_handler = logging.FileHandler(filepath, "a")
+    # file_handler.setLevel(logging.DEBUG)
+    # file_handler.setFormatter(log_formatter)
+
+    # create console handler and set level to info
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    console_handler.setFormatter(log_formatter)
+
+    # create logger and set level to debug
+    logger = logging.getLogger()
+    logger.handlers = []
+    logger.setLevel(logging.DEBUG)
+    logger.propagate = False
+    #logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+
+    # reset logger elapsed time
+    def reset_time():
+        log_formatter.start_time = time.time()
+    logger.reset_time = reset_time
+
+    return logger
diff --git a/HGNN/utils/utils.py b/HGNN/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cae6a571d4b2491d1b025926a4b47d2b2fb481e6
--- /dev/null
+++ b/HGNN/utils/utils.py
@@ -0,0 +1,284 @@
+from collections import defaultdict
+import os
+import pickle
+import json
+import torch.nn as nn
+import torch as th
+import torch.optim as optim
+import numpy as np
+import random
+from Ghypeddings.HGNN.optimizer.ramsgrad import RiemannianAMSGrad
+from Ghypeddings.HGNN.optimizer.rsgd import RiemannianSGD
+import math
+import subprocess
+import argparse
+
+def str2bool(v):
+    return v.lower() == "true"
+
+def make_dir(path):
+    if not os.path.exists(path):
+        try:
+            os.mkdir(path)
+        except:
+            pass
+
+def pickle_dump(file_name, content):
+    with open(file_name, 'wb') as out_file:        
+        pickle.dump(content, out_file, pickle.HIGHEST_PROTOCOL)
+        
+def pickle_load(file_name):
+    with open(file_name, 'rb') as f:
+        return pickle.load(f)
+
+def init_weight(weight, method):
+    """
+    Initialize parameters
+    Args:
+        weight: a Parameter object
+        method: initialization method 
+    """
+    if method == 'orthogonal':
+        nn.init.orthogonal_(weight)
+    elif method == 'xavier':
+        nn.init.xavier_uniform_(weight)
+    elif method == 'kaiming':
+        nn.init.kaiming_uniform_(weight)
+    elif method == 'none':
+        pass
+    else:
+        raise Exception('Unknown init method')
+
+
+def nn_init(nn_module, method='orthogonal'):
+    """
+    Initialize a Sequential or Module object
+    Args:
+        nn_module: Sequential or Module
+        method: initialization method
+    """
+    if method == 'none':
+        return
+    for param_name, _ in nn_module.named_parameters():
+        if isinstance(nn_module, nn.Sequential):
+            # for a Sequential object, the param_name contains both id and param name
+            i, name = param_name.split('.', 1)
+            param = getattr(nn_module[int(i)], name)
+        else:
+            param = getattr(nn_module, param_name)
+        if param_name.find('weight') > -1:
+            init_weight(param, method)
+        elif param_name.find('bias') > -1:
+            nn.init.uniform_(param, -1e-4, 1e-4)
+
+class NoneScheduler:
+	def step(self):
+		pass
+
+def get_lr_scheduler(args, optimizer):
+	if args.lr_scheduler == 'exponential':
+		return optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_gamma)
+	elif args.lr_scheduler == 'cosine':
+		return optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=0)
+	elif args.lr_scheduler == 'cycle':
+		return optim.lr_scheduler.CyclicLR(optimizer, 0, max_lr=args.lr, step_size_up=20, cycle_momentum=False)
+	elif args.lr_scheduler == 'none':
+		return NoneScheduler()
+
+def get_optimizer(args, params):
+	if args.optimizer == 'sgd':
+		optimizer = optim.SGD(params, lr=args.lr, weight_decay=args.weight_decay)
+	elif args.optimizer == 'adam':
+		optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay)
+	elif args.optimizer == 'amsgrad':
+		optimizer = optim.Adam(params, lr=args.lr, amsgrad=True, weight_decay=args.weight_decay)
+	return optimizer
+
+def get_hyperbolic_optimizer(args, manifold,params):
+    if args.hyper_optimizer == 'rsgd':
+        optimizer = RiemannianSGD(
+            args,
+            params,
+            lr=args.lr_hyperbolic,
+        )
+    elif args.hyper_optimizer == 'ramsgrad':
+        optimizer = RiemannianAMSGrad(
+            args,
+			manifold,
+            params,
+            lr=args.lr_hyperbolic,
+        )
+    else:
+        print("unsupported hyper optimizer")
+        exit(1)        
+    return optimizer
+
+def set_seed(seed):
+    """
+    Set the random seed
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    th.manual_seed(seed)
+    th.cuda.manual_seed(seed)
+    th.cuda.manual_seed_all(seed)
+
+def pad_sequence(data_list, maxlen, value=0):
+	return [row + [value] * (maxlen - len(row)) for row in data_list]
+
+def normalize_weight(adj_mat, weight):
+	degree = [1 / math.sqrt(sum(np.abs(w))) for w in weight]
+	for dst in range(len(adj_mat)):
+		for src_idx in range(len(adj_mat[dst])):
+			src = adj_mat[dst][src_idx]
+			weight[dst][src_idx] = degree[dst] * weight[dst][src_idx] * degree[src]
+
+def set_up_distributed_training_multi_gpu(args): 
+    #args.device_id = args.local_rank
+    args.device_id = 0
+    th.cuda.set_device(args.device_id)
+    args.distributed_rank = args.device_id
+    th.distributed.init_process_group(backend='nccl',
+                                         init_method='env://')
+
+def save_model_weights(args, model, path):
+	"""
+	save model weights out to file
+	"""
+	if args.distributed_rank == 0:
+		make_dir(path)
+		th.save(model.state_dict(), os.path.join(path, args.name))
+
+def load_model_weights(model, path):
+	"""
+	load saved weights
+	"""
+	model.load_state_dict(th.load(path))
+
+def th_atanh(x, EPS):
+	values = th.min(x, th.Tensor([1.0 - EPS]).cuda())
+	return 0.5 * (th.log(1 + values + EPS) - th.log(1 - values + EPS))
+	
+def th_norm(x, dim=1):
+	"""
+	Args
+		x: [batch size, dim]
+	Output:	
+		[batch size, 1]
+	"""
+	if(len(x.shape) == 1):
+		x = x.unsqueeze(0)
+	return th.norm(x, 2, dim, keepdim=True)
+
+def th_dot(x, y, keepdim=True):
+	tmp = x*y
+	if(len(tmp.shape) == 1):
+		tmp = tmp.unsqueeze(0) 
+	return th.sum(tmp, dim=1, keepdim=keepdim)
+
+def clip_by_norm(x, clip_norm):
+	return th.renorm(x, 2, 0, clip_norm)
+
+def get_params(params_list, vars_list):
+	"""
+	Add parameters in vars_list to param_list
+	"""
+	for i in vars_list:
+		if issubclass(i.__class__, nn.Module):
+			params_list.extend(list(i.parameters()))
+		elif issubclass(i.__class__, nn.Parameter):
+			params_list.append(i)
+		else:
+			print("Encounter unknown objects")
+			exit(1)
+
+def categorize_params(args):
+	"""
+	Categorize parameters into hyperbolic ones and euclidean ones
+	"""
+	hyperbolic_params, euclidean_params = [], []
+	get_params(euclidean_params, args.eucl_vars)
+	get_params(hyperbolic_params, args.hyp_vars)
+	return hyperbolic_params, euclidean_params
+
+def get_activation(args):
+	if args.act == 'leaky_relu':
+		return nn.LeakyReLU(args.alpha)
+	elif args.act == 'rrelu':
+		return nn.RReLU()
+	elif args.act == 'relu':
+		return nn.ReLU()
+	elif args.act == 'elu':
+		return nn.ELU()
+	elif args.act == 'prelu':
+		return nn.PReLU()
+	elif args.act == 'selu':
+		return nn.SELU()
+
+def set_up_optimizer_scheduler(hyperbolic, args, model , manifold):
+	if hyperbolic:
+		hyperbolic_params, euclidean_params = categorize_params(args)
+		#assert(len(list(model.parameters())) == len(hyperbolic_params) + len(euclidean_params))
+		optimizer = get_optimizer(args, euclidean_params)
+		lr_scheduler = get_lr_scheduler(args, optimizer)
+		if len(hyperbolic_params) > 0:
+			hyperbolic_optimizer = get_hyperbolic_optimizer(args,manifold, hyperbolic_params)
+			hyperbolic_lr_scheduler = get_lr_scheduler(args, hyperbolic_optimizer)
+		else:
+			hyperbolic_optimizer, hyperbolic_lr_scheduler = None, None
+		return optimizer, lr_scheduler, hyperbolic_optimizer, hyperbolic_lr_scheduler
+	else:
+		optimizer = get_optimizer(args, model.parameters())
+		lr_scheduler = get_lr_scheduler(args, optimizer)
+		return optimizer, lr_scheduler, None, None
+
+# reimplement clamp functions to avoid killing gradient during backpropagation
+def clamp_max(x, max_value):
+	t = th.clamp(max_value - x.detach(), max=0)
+	return x + t
+
+def clamp_min(x, min_value):
+	t = th.clamp(min_value - x.detach(), min=0)
+	return x + t
+
+def one_hot_vec(length, pos):
+	vec = [0] * length
+	vec[pos] = 1
+	return vec
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--c', type=int, default=args[1])
+    parser.add_argument('--num_layers', type=int, default=args[2])
+    parser.add_argument('--bias', type=bool, default=args[3])
+    parser.add_argument('--act', type=str, default=args[4])
+    parser.add_argument('--alpha', type=float, default=args[5])
+    parser.add_argument('--select_manifold', type=str, default=args[6])
+    parser.add_argument('--num_centroid', type=int, default=args[7])
+    parser.add_argument('--eucl_vars', nargs='+', default=args[8])
+    parser.add_argument('--hyp_vars', nargs='+', default=args[9])
+    parser.add_argument('--grad_clip', type=float, default=args[10])
+    parser.add_argument('--optimizer', type=str, default=args[11])
+    parser.add_argument('--weight_decay', type=float, default=args[12])
+    parser.add_argument('--lr', type=float, default=args[13])
+    parser.add_argument('--lr_scheduler', type=str, default=args[14])
+    parser.add_argument('--lr_gamma', type=float, default=args[15])
+    parser.add_argument('--lr_hyperbolic', type=float, default=args[16])
+    parser.add_argument('--hyper_optimizer', type=str, default=args[17])
+    parser.add_argument('--proj_init', type=str, default=args[18])
+    parser.add_argument('--tie_weight', type=bool, default=args[19])
+    parser.add_argument('--epochs', type=int, default=args[20])
+    parser.add_argument('--patience', type=int, default=args[21])
+    parser.add_argument('--seed', type=int, default=args[22])
+    parser.add_argument('--log_freq', type=int, default=args[23])
+    parser.add_argument('--eval_freq', type=int, default=args[24])
+    parser.add_argument('--val_prop', type=float, default=args[25])
+    parser.add_argument('--test_prop', type=float, default=args[26])
+    parser.add_argument('--double_precision', type=int, default=args[27])
+    parser.add_argument('--dropout', type=float, default=args[28])
+    parser.add_argument('--normalize_adj', type=bool, default=args[29])
+    parser.add_argument('--normalize_feats', type=bool, default=args[30])
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
diff --git a/PVAE/__init__.py b/PVAE/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/PVAE/distributions/__init__.py b/PVAE/distributions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..360ae4e2832bd8984779dd74f26bce9f9df9266a
--- /dev/null
+++ b/PVAE/distributions/__init__.py
@@ -0,0 +1,4 @@
+from Ghypeddings.PVAE.distributions.riemannian_normal import RiemannianNormal
+from Ghypeddings.PVAE.distributions.hyperbolic_radius import HyperbolicRadius
+from Ghypeddings.PVAE.distributions.wrapped_normal import WrappedNormal
+from Ghypeddings.PVAE.distributions.hyperspherical_uniform import HypersphericalUniform
diff --git a/PVAE/distributions/ars.py b/PVAE/distributions/ars.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdd7e7253c2aaf7590f1ee4368f41de55430eaac
--- /dev/null
+++ b/PVAE/distributions/ars.py
@@ -0,0 +1,135 @@
+import torch
+
+infty = torch.tensor(float('Inf'))
+
+def diff(x):
+    return x[:, 1:] - x[:, :-1]
+
+class ARS():
+    '''
+    This class implements the Adaptive Rejection Sampling technique of Gilks and Wild '92.
+    Where possible, naming convention has been borrowed from this paper.
+    The PDF must be log-concave.
+    Currently does not exploit lower hull described in paper- which is fine for drawing
+    only small amount of samples at a time.
+    '''
+
+    def __init__(self, logpdf, grad_logpdf, device, xi, lb=-infty, ub=infty, use_lower=False, ns=50, **fargs):
+        '''
+        initialize the upper (and if needed lower) hulls with the specified params
+
+        Parameters
+        ==========
+        f: function that computes log(f(u,...)), for given u, where f(u) is proportional to the
+           density we want to sample from
+        fprima:  d/du log(f(u,...))
+        xi: ordered vector of starting points in wich log(f(u,...) is defined
+            to initialize the hulls
+        use_lower: True means the lower sqeezing will be used; which is more efficient
+                   for drawing large numbers of samples
+
+
+        lb: lower bound of the domain
+        ub: upper bound of the domain
+        ns: maximum number of points defining the hulls
+        fargs: arguments for f and fprima
+        '''
+        self.device = device
+
+        self.lb = lb
+        self.ub = ub
+
+        self.logpdf = logpdf
+        self.grad_logpdf = grad_logpdf
+        self.fargs = fargs
+
+        #set limit on how many points to maintain on hull
+        self.ns = ns
+        self.xi = xi.to(self.device) # initialize x, the vector of absicassae at which the function h has been evaluated
+        self.B, self.K = self.xi.size() # hull size
+        self.h = torch.zeros(self.B, ns).to(self.device)
+        self.hprime = torch.zeros(self.B, ns).to(self.device)
+        self.x = torch.zeros(self.B, ns).to(self.device)
+        self.h[:, :self.K] = self.logpdf(self.xi, **self.fargs)
+        self.hprime[:, :self.K] = self.grad_logpdf(self.xi, **self.fargs)
+        self.x[:, :self.K] = self.xi
+        # Avoid under/overflow errors. the envelope and pdf are only
+        # proportional to the true pdf, so can choose any constant of proportionality.
+        self.offset = self.h.max(-1)[0].view(-1, 1)
+        self.h = self.h - self.offset 
+
+        # Derivative at first point in xi must be > 0
+        # Derivative at last point in xi must be < 0
+        if not (self.hprime[:, 0] > 0).all(): raise IOError('initial anchor points must span mode of PDF (left)')
+        if not (self.hprime[:, self.K-1] < 0).all(): raise IOError('initial anchor points must span mode of PDF (right)')
+        self.insert()
+
+
+    def sample(self, shape=torch.Size()):
+        '''
+        Draw N samples and update upper and lower hulls accordingly
+        '''
+        shape = shape if isinstance(shape, torch.Size) else torch.Size([shape])
+        samples = torch.ones(self.B, *shape).to(self.device)
+        bool_mask = (torch.ones(self.B, *shape) == 1).to(self.device)
+        count = 0
+        while bool_mask.sum() != 0:
+            count += 1
+            xt, i = self.sampleUpper(shape)
+            ht = self.logpdf(xt, **self.fargs)
+            # hprimet = self.grad_logpdf(xt, **self.fargs)
+            ht = ht - self.offset
+            ut = self.h.gather(1, i) + (xt - self.x.gather(1, i)) * self.hprime.gather(1, i)
+
+            # Accept sample?
+            u = torch.rand(shape).to(self.device)
+            accept = u < torch.exp(ht - ut)
+            reject = ~accept
+            samples[bool_mask * accept] = xt[bool_mask * accept]
+            bool_mask[bool_mask * accept] = reject[bool_mask * accept]
+            # Update hull with new function evaluations
+            # if self.K < self.ns:
+            #     nb_insert = self.ns - self.K
+            #     self.insert(nb_insert, xt[:, :nb_insert], ht[:, :nb_insert], hprimet[:, :nb_insert])
+
+        return samples.t().unsqueeze(-1)
+
+
+    def insert(self, nbnew=0, xnew=None, hnew=None, hprimenew=None):
+        '''
+        Update hulls with new point(s) if none given, just recalculate hull from existing x,h,hprime
+        # '''
+        # if xnew is not None:
+        #     self.x[:, self.K:self.K+nbnew] = xnew
+        #     self.x, idx = self.x.sort()
+        #     self.h[:, self.K:self.K+nbnew] = hnew
+        #     self.h = self.h.gather(1, idx)
+        #     self.hprime[:, self.K:self.K+nbnew] = hprimenew
+        #     self.hprime = self.hprime.gather(1, idx)
+
+        #     self.K += xnew.size(-1)
+
+        self.z = torch.zeros(self.B, self.K + 1).to(self.device)
+        self.z[:, 0] = self.lb; self.z[:, self.K] = self.ub
+        self.z[:, 1:self.K] = (diff(self.h[:, :self.K]) - diff(self.x[:, :self.K] * self.hprime[:, :self.K])) / -diff(self.hprime[:, :self.K]) 
+        idx = [0]+list(range(self.K))
+        self.u = self.h[:, idx] + self.hprime[:, idx] * (self.z-self.x[:, idx])
+
+        self.s = diff(torch.exp(self.u)) / self.hprime[:, :self.K]
+        self.s[self.hprime[:, :self.K] == 0.] = 0. # should be 0 when gradient is 0
+        self.cs = torch.cat((torch.zeros(self.B, 1).to(self.device), torch.cumsum(self.s, dim=-1)), dim=-1)
+        self.cu = self.cs[:, -1]
+
+    def sampleUpper(self, shape=torch.Size()):
+        '''
+        Return a single value randomly sampled from the upper hull and index of segment
+        '''
+
+        u = torch.rand(self.B, *shape).to(self.device)
+        i = (self.cs/self.cu.unsqueeze(-1)).unsqueeze(-1) <= u.unsqueeze(1).expand(*self.cs.shape, *shape)
+        idx = i.sum(1) - 1
+
+        xt = self.x.gather(1, idx) + (-self.h.gather(1, idx) + torch.log(self.hprime.gather(1, idx)*(self.cu.unsqueeze(-1)*u - self.cs.gather(1, idx)) + 
+        torch.exp(self.u.gather(1, idx)))) / self.hprime.gather(1, idx)
+
+        return xt, idx
diff --git a/PVAE/distributions/hyperbolic_radius.py b/PVAE/distributions/hyperbolic_radius.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf559a831fb0a963da2fe8f045a36ead6313abaf
--- /dev/null
+++ b/PVAE/distributions/hyperbolic_radius.py
@@ -0,0 +1,295 @@
+import math
+import torch
+from torch.autograd import Function, grad
+import torch.distributions as dist
+from Ghypeddings.PVAE.utils import Constants, logsinh, log_sum_exp_signs, rexpand
+from numbers import Number
+from Ghypeddings.PVAE.distributions.ars import ARS
+
+
+def cdf_r(value, scale, c, dim):
+    value = value.double()
+    scale = scale.double()
+    c = c.double()
+
+    if dim == 2:
+        return 1 / torch.erf(c.sqrt() * scale / math.sqrt(2)) * .5 * \
+    (2 * torch.erf(c.sqrt() * scale / math.sqrt(2)) + torch.erf((value - c.sqrt() * scale.pow(2)) / math.sqrt(2) / scale) - \
+        torch.erf((c.sqrt() * scale.pow(2) + value) / math.sqrt(2) / scale))
+    else:
+        device = value.device
+
+        k_float = rexpand(torch.arange(dim), *value.size()).double().to(device)
+        dim = torch.tensor(dim).to(device).double()
+
+        s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log( \
+                torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \
+                + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \
+                )
+        s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)))
+
+        signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)]
+        signs = rexpand(signs, *value.size())
+
+        S1 = log_sum_exp_signs(s1, signs, dim=0)
+        S2 = log_sum_exp_signs(s2, signs, dim=0)
+
+        output = torch.exp(S1 - S2)
+        zero_value_idx = value == 0.
+        output[zero_value_idx] = 0.
+        return output.float()
+
+
+def grad_cdf_value_scale(value, scale, c, dim):
+    device = value.device
+
+    dim = torch.tensor(int(dim)).to(device).double()
+
+    signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)]
+    signs = rexpand(signs, *value.size())
+    k_float = rexpand(torch.arange(dim), *value.size()).double().to(device)
+
+    log_arg1 = (dim - 1 - 2 * k_float).pow(2) * c * scale * \
+    (\
+        torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \
+        + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \
+    )
+    
+    log_arg2 = math.sqrt(2 / math.pi) * ( \
+        (dim - 1 - 2 * k_float) * c.sqrt() * torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) \
+        - ((value / scale.pow(2) + (dim - 1 - 2 * k_float) * c.sqrt()) * torch.exp(-(value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)).pow(2) / (2 * scale.pow(2)))) \
+        )
+
+    log_arg = log_arg1 + log_arg2
+    sign_log_arg = torch.sign(log_arg)
+
+    s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log(sign_log_arg * log_arg)
+
+    log_grad_sum_sigma = log_sum_exp_signs(s, signs * sign_log_arg, dim=0)
+    grad_sum_sigma = torch.sum(signs * sign_log_arg * torch.exp(s), dim=0)
+
+    s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+        + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+        + torch.log( \
+            torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \
+            + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \
+        )
+
+    S1 = log_sum_exp_signs(s1, signs, dim=0)
+    grad_log_cdf_scale = grad_sum_sigma / S1.exp()
+    log_unormalised_prob = - value.pow(2) / (2 * scale.pow(2)) + (dim - 1) * logsinh(c.sqrt() * value) - (dim - 1) / 2 * c.log()
+    
+    with torch.autograd.enable_grad():
+        scale = scale.float()
+        logZ = _log_normalizer_closed_grad.apply(scale, c, dim)
+        grad_logZ_scale = grad(logZ, scale, grad_outputs=torch.ones_like(scale))
+
+    grad_log_cdf_scale = - grad_logZ_scale[0] + 1 / scale + grad_log_cdf_scale.float()
+    cdf = cdf_r(value.double(), scale.double(), c.double(), int(dim)).float().squeeze(0)
+    grad_scale = cdf * grad_log_cdf_scale
+
+    grad_value = (log_unormalised_prob.float() - logZ).exp()
+    return grad_value, grad_scale
+
+
+class _log_normalizer_closed_grad(Function):
+    @staticmethod 
+    def forward(ctx, scale, c, dim):
+        scale = scale.double()
+        c = c.double()
+        ctx.scale = scale.clone().detach()
+        ctx.c = c.clone().detach()
+        ctx.dim = dim
+
+        device = scale.device
+        output = .5 * (Constants.logpi - Constants.log2) + scale.log() -(int(dim) - 1) * (c.log() / 2 + Constants.log2)
+        dim = torch.tensor(int(dim)).to(device).double()
+
+        k_float = rexpand(torch.arange(int(dim)), *scale.size()).double().to(device)
+        s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)))
+        signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)]
+        signs = rexpand(signs, *scale.size())
+        ctx.log_sum_term = log_sum_exp_signs(s, signs, dim=0)
+        output = output + ctx.log_sum_term
+
+        return output.float()
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = grad_output.clone()
+
+        device = grad_input.device
+        scale = ctx.scale
+        c = ctx.c
+        dim = torch.tensor(int(ctx.dim)).to(device).double()
+
+        k_float = rexpand(torch.arange(int(dim)), *scale.size()).double().to(device)
+        signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)]
+        signs = rexpand(signs, *scale.size())
+
+        log_arg = (dim - 1 - 2 * k_float).pow(2) * c * scale * (1+torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \
+            torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * 2 / math.sqrt(math.pi) * (dim - 1 - 2 * k_float) * c.sqrt() / math.sqrt(2)
+        log_arg_signs = torch.sign(log_arg)
+        s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log(log_arg_signs * log_arg)
+        log_grad_sum_sigma = log_sum_exp_signs(s, log_arg_signs * signs, dim=0)
+
+        grad_scale = torch.exp(log_grad_sum_sigma - ctx.log_sum_term)
+        grad_scale = 1 / ctx.scale + grad_scale
+
+        grad_scale = (grad_input * grad_scale.float()).view(-1, *grad_input.shape).sum(0)
+        return (grad_scale, None, None)
+
+
+class impl_rsample(Function):
+    @staticmethod
+    def forward(ctx, value, scale, c, dim):
+        ctx.scale = scale.clone().detach().double().requires_grad_(True)
+        ctx.value = value.clone().detach().double().requires_grad_(True)
+        ctx.c = c.clone().detach().double().requires_grad_(True)
+        ctx.dim = dim
+        return value
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = grad_output.clone()
+        grad_cdf_value, grad_cdf_scale = grad_cdf_value_scale(ctx.value, ctx.scale, ctx.c, ctx.dim)
+        assert not torch.isnan(grad_cdf_value).any()
+        assert not torch.isnan(grad_cdf_scale).any()
+        grad_value_scale = -(grad_cdf_value).pow(-1) * grad_cdf_scale.expand(grad_input.shape)
+        grad_scale = (grad_input * grad_value_scale).view(-1, *grad_cdf_scale.shape).sum(0)
+        # grad_value_c = -(grad_cdf_value).pow(-1) * grad_cdf_c.expand(grad_input.shape)
+        # grad_c = (grad_input * grad_value_c).view(-1, *grad_cdf_c.shape).sum(0)
+        return (None, grad_scale, None, None)
+
+
+class HyperbolicRadius(dist.Distribution):
+    support = dist.constraints.positive
+    has_rsample = True
+
+    def __init__(self, dim, c, scale, ars=True, validate_args=None):
+        self.dim = dim
+        self.c = c
+        self.scale = scale
+        self.device = scale.device
+        self.ars = ars
+        if isinstance(scale, Number):
+            batch_shape = torch.Size()
+        else:
+            batch_shape = self.scale.size()
+        self.log_normalizer = self._log_normalizer()
+        if torch.isnan(self.log_normalizer).any() or torch.isinf(self.log_normalizer).any():
+            print('nan or inf in log_normalizer', torch.cat((self.log_normalizer, self.scale), dim=1))
+            raise
+        super(HyperbolicRadius, self).__init__(batch_shape)
+
+    def rsample(self, sample_shape=torch.Size()):
+        value = self.sample(sample_shape)
+        return impl_rsample.apply(value, self.scale, self.c, self.dim)
+
+    def sample(self, sample_shape=torch.Size()):
+        if sample_shape == torch.Size(): sample_shape=torch.Size([1])
+        with torch.no_grad():
+            mean = self.mean
+            stddev = self.stddev
+            if torch.isnan(stddev).any(): stddev[torch.isnan(stddev)] = self.scale[torch.isnan(stddev)]
+            if torch.isnan(mean).any(): mean[torch.isnan(mean)] = ((self.dim - 1) * self.scale.pow(2) * self.c.sqrt())[torch.isnan(mean)]
+            steps = torch.linspace(0.1, 3, 10).to(self.device)
+            steps = torch.cat((-steps.flip(0), steps))
+            xi = [mean + s * torch.min(stddev, .95 * mean / 3) for s in steps]
+            xi = torch.cat(xi, dim=1)
+            ars = ARS(self.log_prob, self.grad_log_prob, self.device, xi=xi, ns=20, lb=0)
+            value = ars.sample(sample_shape)
+        return value
+
+    def __while_loop(self, logM, proposal, sample_shape):
+        shape = self._extended_shape(sample_shape)
+        r, bool_mask = torch.ones(shape).to(self.device), (torch.ones(shape) == 1).to(self.device)
+        count = 0
+        while bool_mask.sum() != 0:
+            count += 1
+            r_ = proposal.sample(sample_shape).to(self.device)
+            u = torch.rand(shape).to(self.device)
+            log_ratio = self.log_prob(r_) - proposal.log_prob(r_) - logM
+            accept = log_ratio > torch.log(u)
+            reject = 1 - accept
+            r[bool_mask * accept] = r_[bool_mask * accept]
+            bool_mask[bool_mask * accept] = reject[bool_mask * accept]
+        return r
+
+    def log_prob(self, value):
+        res = - value.pow(2) / (2 * self.scale.pow(2)) + (self.dim - 1) * logsinh(self.c.sqrt() * value) \
+            - (self.dim - 1) / 2 * self.c.log() - self.log_normalizer#.expand(value.shape)
+        assert not torch.isnan(res).any()
+        return res
+
+    def grad_log_prob(self, value):
+        res = - value / self.scale.pow(2) + (self.dim - 1) * self.c.sqrt() * torch.cosh(self.c.sqrt() * value) / torch.sinh(self.c.sqrt() * value) 
+        return res
+
+    def cdf(self, value):
+        return cdf_r(value, self.scale, self.c, self.dim)
+
+    @property
+    def mean(self):
+        c = self.c.double()
+        scale = self.scale.double()
+        dim = torch.tensor(int(self.dim)).double().to(self.device)
+        signs = torch.tensor([1., -1.]).double().to(self.device).repeat(((self.dim+1) // 2)*2)[:self.dim].unsqueeze(-1).unsqueeze(-1).expand(self.dim, *self.scale.size())
+        
+        k_float = rexpand(torch.arange(self.dim), *self.scale.size()).double().to(self.device)
+        s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+                + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+                + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)))
+        S2 = log_sum_exp_signs(s2, signs, dim=0)
+
+        log_arg = (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2) * (1 + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \
+                torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * scale * math.sqrt(2 / math.pi)
+        log_arg_signs = torch.sign(log_arg)
+        s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+                + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+                + torch.log(log_arg_signs * log_arg)
+        S1 = log_sum_exp_signs(s1, signs * log_arg_signs, dim=0)
+
+        output = torch.exp(S1 - S2)
+        return output.float()
+
+    @property
+    def variance(self):
+        c = self.c.double()
+        scale = self.scale.double()
+        dim = torch.tensor(int(self.dim)).double().to(self.device)
+        signs = torch.tensor([1., -1.]).double().to(self.device).repeat(((int(dim)+1) // 2)*2)[:int(dim)].unsqueeze(-1).unsqueeze(-1).expand(int(dim), *self.scale.size())
+
+        k_float = rexpand(torch.arange(self.dim), *self.scale.size()).double().to(self.device)
+        s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+                + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+                + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)))
+        S2 = log_sum_exp_signs(s2, signs, dim=0)
+
+        log_arg = (1 + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2)) * (1 + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \
+               (dim - 1 - 2 * k_float) * c.sqrt() * torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * scale * math.sqrt(2 / math.pi)
+        log_arg_signs = torch.sign(log_arg)
+        s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+                + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+                + 2 * scale.log() \
+                + torch.log(log_arg_signs * log_arg)
+        S1 = log_sum_exp_signs(s1, signs * log_arg_signs, dim=0)
+
+        output = torch.exp(S1 - S2)
+        output = output.float() - self.mean.pow(2)
+        return output
+
+    @property
+    def stddev(self): return self.variance.sqrt()
+
+    def _log_normalizer(self): return _log_normalizer_closed_grad.apply(self.scale, self.c, self.dim)
diff --git a/PVAE/distributions/hyperspherical_uniform.py b/PVAE/distributions/hyperspherical_uniform.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a31f12840af77f161816e0c3b2cc8fdaede3020
--- /dev/null
+++ b/PVAE/distributions/hyperspherical_uniform.py
@@ -0,0 +1,42 @@
+import math
+import torch
+from torch.distributions.utils import _standard_normal
+
+class HypersphericalUniform(torch.distributions.Distribution):
+    """ source: https://github.com/nicola-decao/s-vae-pytorch/blob/master/hyperspherical_vae/distributions/von_mises_fisher.py """
+
+    support = torch.distributions.constraints.real
+    has_rsample = False
+    _mean_carrier_measure = 0
+
+    @property
+    def dim(self):
+        return self._dim
+    
+    def __init__(self, dim, device='cpu', validate_args=None):
+        super(HypersphericalUniform, self).__init__(torch.Size([dim]), validate_args=validate_args)
+        self._dim = dim
+        self._device = device
+
+    def sample(self, shape=torch.Size()):
+        with torch.no_grad():
+            return self.rsample(shape)
+
+    def rsample(self, sample_shape=torch.Size()):
+        shape = torch.Size([*sample_shape, self._dim + 1])
+        output = _standard_normal(shape, dtype=torch.float, device=self._device)
+
+        return output / output.norm(dim=-1, keepdim=True)
+
+    def entropy(self):
+        return self.__log_surface_area()
+    
+    def log_prob(self, x):
+        return - torch.ones(x.shape[:-1]).to(self._device) * self._log_normalizer()
+
+    def _log_normalizer(self):
+        return self._log_surface_area().to(self._device)
+
+    def _log_surface_area(self):
+        return math.log(2) + ((self._dim + 1) / 2) * math.log(math.pi) - torch.lgamma(
+            torch.Tensor([(self._dim + 1) / 2]))
diff --git a/PVAE/distributions/riemannian_normal.py b/PVAE/distributions/riemannian_normal.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea59144a3c11fa2be735c4c581de0269d84948d8
--- /dev/null
+++ b/PVAE/distributions/riemannian_normal.py
@@ -0,0 +1,49 @@
+import torch
+import torch.distributions as dist
+from torch.distributions import constraints
+from numbers import Number
+from Ghypeddings.PVAE.distributions.hyperbolic_radius import HyperbolicRadius
+from Ghypeddings.PVAE.distributions.hyperspherical_uniform import HypersphericalUniform
+
+
+class RiemannianNormal(dist.Distribution):
+    arg_constraints = {'loc': dist.constraints.interval(-1, 1), 'scale': dist.constraints.positive}
+    support = dist.constraints.interval(-1, 1)
+    has_rsample = True
+
+    @property
+    def mean(self):
+        return self.loc
+    
+    def __init__(self, loc, scale, manifold, validate_args=None):
+        assert not (torch.isnan(loc).any() or torch.isnan(scale).any())
+        self.manifold = manifold
+        self.loc = loc
+        self.manifold.assert_check_point_on_manifold(self.loc)
+        self.scale = scale.clamp(min=0.1, max=7.)
+        self.radius = HyperbolicRadius(manifold.dim, manifold.c, self.scale)
+        self.direction = HypersphericalUniform(manifold.dim - 1, device=loc.device)
+        if isinstance(loc, Number) and isinstance(scale, Number):
+            batch_shape = torch.Size()
+        else:
+            batch_shape = self.loc.size()
+        super(RiemannianNormal, self).__init__(batch_shape, validate_args=validate_args)
+
+    def sample(self, shape=torch.Size()):
+        with torch.no_grad():
+            return self.rsample(shape)
+
+    def rsample(self, sample_shape=torch.Size()):
+        shape = self._extended_shape(sample_shape)
+        alpha = self.direction.sample(torch.Size([*shape[:-1]]))
+        radius = self.radius.rsample(sample_shape)
+        # u = radius * alpha / self.manifold.lambda_x(self.loc, keepdim=True)
+        # res = self.manifold.expmap(self.loc, u)
+        res = self.manifold.expmap_polar(self.loc, alpha, radius)
+        return res
+
+    def log_prob(self, value):
+        loc = self.loc.expand(value.shape)
+        radius_sq = self.manifold.dist(loc, value, keepdim=True).pow(2)
+        res = - radius_sq / 2 / self.scale.pow(2) - self.direction._log_normalizer() - self.radius.log_normalizer
+        return res
diff --git a/PVAE/distributions/wrapped_normal.py b/PVAE/distributions/wrapped_normal.py
new file mode 100644
index 0000000000000000000000000000000000000000..29566d92498a14c25d860d8ba3450780282b70c1
--- /dev/null
+++ b/PVAE/distributions/wrapped_normal.py
@@ -0,0 +1,65 @@
+import torch
+from torch.nn import functional as F
+from torch.distributions import Normal, Independent
+from numbers import Number
+from torch.distributions.utils import _standard_normal, broadcast_all
+
+
+class WrappedNormal(torch.distributions.Distribution):
+
+    arg_constraints = {'loc': torch.distributions.constraints.real,
+                       'scale': torch.distributions.constraints.positive}
+    support = torch.distributions.constraints.real
+    has_rsample = True
+    _mean_carrier_measure = 0
+
+    @property
+    def mean(self):
+        return self.loc
+
+    @property
+    def stddev(self):
+        raise NotImplementedError
+
+    @property
+    def scale(self):
+        return F.softplus(self._scale) if self.softplus else self._scale
+
+    def __init__(self, loc, scale, manifold, validate_args=None, softplus=False):
+        self.dtype = loc.dtype
+        self.softplus = softplus
+        self.loc, self._scale = broadcast_all(loc, scale)
+        self.manifold = manifold
+        self.manifold.assert_check_point_on_manifold(self.loc)
+        self.device = loc.device
+        if isinstance(loc, Number) and isinstance(scale, Number):
+            batch_shape, event_shape = torch.Size(), torch.Size()
+        else:
+            batch_shape = self.loc.shape[:-1]
+            event_shape = torch.Size([self.manifold.dim])
+        super(WrappedNormal, self).__init__(batch_shape, event_shape, validate_args=validate_args)
+
+    def sample(self, shape=torch.Size()):
+        with torch.no_grad():
+            return self.rsample(shape)
+
+    def rsample(self, sample_shape=torch.Size()):
+        shape = self._extended_shape(sample_shape)
+        v = self.scale * _standard_normal(shape, dtype=self.loc.dtype, device=self.loc.device)
+        self.manifold.assert_check_vector_on_tangent(self.manifold.zero, v)
+        v = v / self.manifold.lambda_x(self.manifold.zero, keepdim=True)
+        u = self.manifold.transp(self.manifold.zero, self.loc, v)
+        z = self.manifold.expmap(self.loc, u)
+        return z
+
+    def log_prob(self, x):
+        shape = x.shape
+        loc = self.loc.unsqueeze(0).expand(x.shape[0], *self.batch_shape, self.manifold.coord_dim)
+        if len(shape) < len(loc.shape): x = x.unsqueeze(1)
+        v = self.manifold.logmap(loc, x)
+        v = self.manifold.transp(loc, self.manifold.zero, v)
+        u = v * self.manifold.lambda_x(self.manifold.zero, keepdim=True)
+        norm_pdf = Normal(torch.zeros_like(self.scale), self.scale).log_prob(u).sum(-1, keepdim=True)
+        logdetexp = self.manifold.logdetexp(loc, x, keepdim=True)
+        result = norm_pdf - logdetexp
+        return result
diff --git a/PVAE/manifolds/__init__.py b/PVAE/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd1d10798fac6c83bf106a00d054ff2073df7b52
--- /dev/null
+++ b/PVAE/manifolds/__init__.py
@@ -0,0 +1,4 @@
+from Ghypeddings.PVAE.manifolds.euclidean import Euclidean
+from Ghypeddings.PVAE.manifolds.poincareball import PoincareBall
+
+__all__ = [Euclidean, PoincareBall]
\ No newline at end of file
diff --git a/PVAE/manifolds/euclidean.py b/PVAE/manifolds/euclidean.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0b362b7818847a4928b6cc647c90f95de6847fc
--- /dev/null
+++ b/PVAE/manifolds/euclidean.py
@@ -0,0 +1,42 @@
+import torch
+from geoopt.manifolds import Euclidean as EuclideanParent
+
+
+class Euclidean(EuclideanParent):
+
+    def __init__(self, dim, c=0.):
+        super().__init__(1)
+        self.register_buffer("dim", torch.as_tensor(dim, dtype=torch.int))
+        self.register_buffer("c", torch.as_tensor(c, dtype=torch.get_default_dtype()))
+
+    @property
+    def coord_dim(self):
+        return int(self.dim)
+
+    @property
+    def device(self):
+        return self.c.device
+
+    @property
+    def zero(self):
+        return torch.zeros(1, self.dim).to(self.device)
+
+    def logdetexp(self, x, y, is_vector=False, keepdim=False):
+        result = torch.zeros(x.shape[:-1]).to(x)
+        if keepdim: result = result.unsqueeze(-1)
+        return result
+
+    def expmap0(self, u):
+        return u
+
+    def logmap0(self, u):
+        return u
+
+    def proju0(self, u):
+        return self.proju(self.zero.expand_as(u), u)
+
+    def transp0(self, x, u):
+        return self.transp(self.zero.expand_as(u), x, u)
+
+    def lambda_x(self, x, *, keepdim=False, dim=-1):
+        return torch.ones_like(x.sum(dim=dim, keepdim=keepdim))
diff --git a/PVAE/manifolds/poincareball.py b/PVAE/manifolds/poincareball.py
new file mode 100644
index 0000000000000000000000000000000000000000..924511de237cf5d038ef82d39b7be0e6cb30503a
--- /dev/null
+++ b/PVAE/manifolds/poincareball.py
@@ -0,0 +1,84 @@
+import torch
+from geoopt.manifolds import PoincareBall as PoincareBallParent
+from geoopt.manifolds.stereographic.math import _lambda_x, arsinh, tanh
+
+MIN_NORM = 1e-15
+
+
+class PoincareBall(PoincareBallParent):
+
+    def __init__(self, dim, c=1.0):
+        super().__init__(c)
+        self.register_buffer("dim", torch.as_tensor(dim, dtype=torch.int))
+
+    def proju0(self, u):
+        return self.proju(self.zero.expand_as(u), u)
+
+    @property
+    def coord_dim(self):
+        return int(self.dim)
+
+    @property
+    def device(self):
+        return self.c.device
+
+    @property
+    def zero(self):
+        return torch.zeros(1, self.dim).to(self.device)
+
+    def logdetexp(self, x, y, is_vector=False, keepdim=False):
+        d = self.norm(x, y, keepdim=keepdim) if is_vector else self.dist(x, y, keepdim=keepdim)
+        d[d == 0] = 1e-15
+        return (self.dim - 1) * (torch.sinh(self.c.sqrt()*d) / self.c.sqrt() / d).log()
+
+    def inner(self, x, u, v=None, *, keepdim=False, dim=-1):
+        if v is None: v = u
+        return _lambda_x(x, self.c, keepdim=keepdim, dim=dim) ** 2 * (u * v).sum(
+            dim=dim, keepdim=keepdim
+        )
+
+    def expmap_polar(self, x, u, r, dim: int = -1):
+        sqrt_c = self.c ** 0.5
+        u_norm = u.norm(dim=dim, p=2, keepdim=True).clamp_min(MIN_NORM)
+        second_term = (
+            tanh(sqrt_c / 2 * r)
+            * u
+            / (sqrt_c * u_norm)
+        )
+        gamma_1 = self.mobius_add(x, second_term, dim=dim)
+        return gamma_1
+
+    def normdist2plane(self, x, a, p, keepdim: bool = False, signed: bool = False, dim: int = -1, norm: bool = False):
+        c = self.c
+        sqrt_c = c ** 0.5
+        diff = self.mobius_add(-p, x, dim=dim)
+        diff_norm2 = diff.pow(2).sum(dim=dim, keepdim=keepdim).clamp_min(MIN_NORM)
+        sc_diff_a = (diff * a).sum(dim=dim, keepdim=keepdim)
+        if not signed:
+            sc_diff_a = sc_diff_a.abs()
+        a_norm = a.norm(dim=dim, keepdim=keepdim, p=2).clamp_min(MIN_NORM)
+        num = 2 * sqrt_c * sc_diff_a
+        denom = (1 - c * diff_norm2) * a_norm
+        res = arsinh(num / denom.clamp_min(MIN_NORM)) / sqrt_c
+        if norm:
+            res = res * a_norm# * self.lambda_x(a, dim=dim, keepdim=keepdim)
+        return res
+
+
+
+class PoincareBallExact(PoincareBall):
+    __doc__ = r"""
+    See Also
+    --------
+    :class:`PoincareBall`
+    Notes
+    -----
+    The implementation of retraction is an exact exponential map, this retraction will be used in optimization
+    """
+
+    retr_transp = PoincareBall.expmap_transp
+    transp_follow_retr = PoincareBall.transp_follow_expmap
+    retr = PoincareBall.expmap
+
+    def extra_repr(self):
+        return "exact"
diff --git a/PVAE/models/__init__.py b/PVAE/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdb822cc3ef52a1adbb1e24f356b3857d5479067
--- /dev/null
+++ b/PVAE/models/__init__.py
@@ -0,0 +1,2 @@
+from Ghypeddings.PVAE.models.tabular import Tabular
+__all__ = [Tabular]
\ No newline at end of file
diff --git a/PVAE/models/architectures.py b/PVAE/models/architectures.py
new file mode 100644
index 0000000000000000000000000000000000000000..92a049661f17533ce909b5b3f3fe4f1b79c53595
--- /dev/null
+++ b/PVAE/models/architectures.py
@@ -0,0 +1,180 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from numpy import prod
+from Ghypeddings.PVAE.utils import Constants
+from Ghypeddings.PVAE.ops.manifold_layers import GeodesicLayer, MobiusLayer, LogZero, ExpZero
+from torch.nn.modules.module import Module
+
+def get_dim_act(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+    dims = [args.feat_dim] + ([args.hidden_dim] * (args.num_layers - 1))
+
+    return dims, acts
+
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x, adj):
+        input = (x, adj)
+        output, _ = self.layers.forward(input)
+        return output
+
+class GraphConvolution(Module):
+    """
+    Simple GCN layer.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(GraphConvolution, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+        self.in_features = in_features
+        self.out_features = out_features
+
+    def forward(self, input):
+        x, adj = input
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        if adj.is_sparse:
+            support = torch.spmm(adj, hidden)
+        else:
+            support = torch.mm(adj, hidden)
+        output = self.act(support), adj
+        return output
+
+    def extra_repr(self):
+        return 'input_dim={}, output_dim={}'.format(
+                self.in_features, self.out_features
+        )
+
+class GCN(Encoder):
+    """
+    Graph Convolution Networks.
+    """
+
+    def __init__(self, c, args):
+        super(GCN, self).__init__(c)
+        assert args.num_layers > 0
+        dims, acts = get_dim_act(args)
+        gc_layers = []
+        for i in range(len(dims) - 1):
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            gc_layers.append(GraphConvolution(in_dim, out_dim, args.dropout, act, args.bias))
+        self.layers = nn.Sequential(*gc_layers)
+
+
+def extra_hidden_layer(hidden_dim, non_lin):
+     return nn.Sequential(nn.Linear(hidden_dim, hidden_dim), non_lin)       
+
+class EncWrapped(nn.Module):
+    """ Usual encoder followed by an exponential map """
+    def __init__(self,c,args, manifold, data_size, non_lin, num_hidden_layers, hidden_dim, prior_iso):
+        super(EncWrapped, self).__init__()
+        self.manifold = manifold
+        self.data_size = data_size
+        self.enc = GCN(c,args)
+        self.fc21 = nn.Linear(hidden_dim, manifold.coord_dim)
+        self.fc22 = nn.Linear(hidden_dim, manifold.coord_dim if not prior_iso else 1)
+
+    def forward(self,adj,x):
+        e = self.enc.encode(x,adj)
+        mu = self.fc21(e)          # flatten data
+        mu = self.manifold.expmap0(mu)
+        return mu, F.softplus(self.fc22(e)) + Constants.eta,  self.manifold
+
+
+class DecWrapped(nn.Module):
+    """ Usual encoder preceded by a logarithm map """
+    def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim):
+        super(DecWrapped, self).__init__()
+        self.data_size = data_size
+        self.manifold = manifold
+        modules = []
+        modules.append(nn.Sequential(nn.Linear(manifold.coord_dim, hidden_dim), non_lin))
+        modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)])
+        self.dec = nn.Sequential(*modules)
+        # self.fc31 = nn.Linear(hidden_dim, prod(data_size))
+        self.fc31 = nn.Linear(hidden_dim, data_size[1])
+
+    def forward(self, z):
+        z = self.manifold.logmap0(z)
+        d = self.dec(z)
+        # mu = self.fc31(d).view(*z.size()[:-1], *self.data_size)  # reshape data
+        mu = self.fc31(d).view(*z.size()[:-1], 1, self.data_size[1]) 
+        return mu, torch.ones_like(mu)
+
+
+class DecGeo(nn.Module):
+    """ First layer is a Hypergyroplane followed by usual decoder """
+    def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim):
+        super(DecGeo, self).__init__()
+        self.data_size = data_size
+        modules = []
+        modules.append(nn.Sequential(GeodesicLayer(manifold.coord_dim, hidden_dim, manifold), non_lin))
+        modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)])
+        self.dec = nn.Sequential(*modules)
+        self.fc31 = nn.Linear(hidden_dim, data_size[1])
+
+    def forward(self, z):
+        d = self.dec(z)
+        # mu = self.fc31(d).view(*z.size()[:-1], *self.data_size)  # reshape data
+        mu = self.fc31(d).view(*z.size()[:-1], 1, self.data_size[1]) 
+        return mu, torch.ones_like(mu)
+
+
+class EncMob(nn.Module):
+    """ Last layer is a Mobius layers """
+    def __init__(self,c,args, manifold, data_size, non_lin, num_hidden_layers, hidden_dim, prior_iso):
+        super(EncMob, self).__init__()
+        self.manifold = manifold
+        self.data_size = data_size
+        # modules = []
+        # modules.append(nn.Sequential(nn.Linear(data_size[1], hidden_dim), non_lin))
+        # modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)])
+        # self.enc = nn.Sequential(*modules)
+        self.enc = GCN(c,args)
+        self.fc21 = MobiusLayer(hidden_dim, manifold.coord_dim, manifold)
+        self.fc22 = nn.Linear(hidden_dim, manifold.coord_dim if not prior_iso else 1)
+
+    def forward(self,adj,x):
+        #e = self.enc(x.view(*x.size()[:-len(self.data_size)], -1))            # flatten data
+        e = self.enc.encode(x,adj)
+        mu = self.fc21(e)          # flatten data
+        mu = self.manifold.expmap0(mu)
+        return mu, F.softplus(self.fc22(e)) + Constants.eta,  self.manifold
+
+
+class DecMob(nn.Module):
+    """ First layer is a Mobius Matrix multiplication """
+    def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim):
+        super(DecMob, self).__init__()
+        self.data_size = data_size
+        modules = []
+        modules.append(nn.Sequential(MobiusLayer(manifold.coord_dim, hidden_dim, manifold), LogZero(manifold), non_lin))
+        modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)])
+        self.dec = nn.Sequential(*modules)
+        self.fc31 = nn.Linear(hidden_dim, prod(data_size))
+
+    def forward(self, z):
+        d = self.dec(z)
+        mu = self.fc31(d).view(*z.size()[:-1], *self.data_size)  # reshape data
+        return mu, torch.ones_like(mu)
diff --git a/PVAE/models/tabular.py b/PVAE/models/tabular.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c5b4d571562880727795fea74d2e8560b793624
--- /dev/null
+++ b/PVAE/models/tabular.py
@@ -0,0 +1,36 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.distributions as dist
+from torch.utils.data import DataLoader
+
+import math
+from Ghypeddings.PVAE.models.vae import VAE
+
+from Ghypeddings.PVAE.distributions import RiemannianNormal, WrappedNormal
+from torch.distributions import Normal
+import Ghypeddings.PVAE.manifolds as manifolds
+from Ghypeddings.PVAE.models.architectures import EncWrapped, DecWrapped, EncMob, DecMob, DecGeo
+from Ghypeddings.PVAE.utils import get_activation
+
+class Tabular(VAE):
+    """ Derive a specific sub-class of a VAE for tabular data. """
+    def __init__(self, params):
+        c = nn.Parameter(params.c * torch.ones(1), requires_grad=False)
+        manifold = getattr(manifolds, 'PoincareBall')(params.dim, c)
+        super(Tabular, self).__init__(
+            eval(params.prior),           # prior distribution
+            eval(params.posterior),       # posterior distribution
+            dist.Normal,                  # likelihood distribution
+            eval('Enc' + params.enc)(params.c,params,manifold, params.data_size, get_activation(params), params.num_layers, params.hidden_dim, params.prior_iso),
+            eval('Dec' + params.dec)(manifold, params.data_size, get_activation(params), params.num_layers, params.hidden_dim),
+            params
+        )
+        self.manifold = manifold
+        self._pz_mu = nn.Parameter(torch.zeros(1, params.dim), requires_grad=False)
+        self._pz_logvar = nn.Parameter(torch.zeros(1, 1), requires_grad=params.learn_prior_std)
+        self.modelName = 'Tabular'
+
+    @property
+    def pz_params(self):
+        return self._pz_mu.mul(1), F.softplus(self._pz_logvar).div(math.log(2)).mul(self.prior_std), self.manifold
\ No newline at end of file
diff --git a/PVAE/models/vae.py b/PVAE/models/vae.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb79df5b30a7370118d1d77aa88f47c4b341e2f
--- /dev/null
+++ b/PVAE/models/vae.py
@@ -0,0 +1,63 @@
+# Base VAE class definition
+
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.distributions as dist
+from Ghypeddings.PVAE.utils import get_mean_param
+
+class VAE(nn.Module):
+    def __init__(self, prior_dist, posterior_dist, likelihood_dist, enc, dec, params):
+        super(VAE, self).__init__()
+        self.pz = prior_dist
+        self.px_z = likelihood_dist
+        self.qz_x = posterior_dist
+        self.enc = enc
+        self.dec = dec
+        self.modelName = None
+        self.params = params
+        self.data_size = params.data_size
+        self.prior_std = params.prior_std
+
+        if self.px_z == dist.RelaxedBernoulli:
+            self.px_z.log_prob = lambda self, value: \
+                -F.binary_cross_entropy_with_logits(
+                    self.probs if value.dim() <= self.probs.dim() else self.probs.expand_as(value),
+                    value.expand(self.batch_shape) if value.dim() <= self.probs.dim() else value,
+                    reduction='none'
+                )
+
+    def generate(self, N, K):
+        self.eval()
+        with torch.no_grad():
+            mean_pz = get_mean_param(self.pz_params)
+            mean = get_mean_param(self.dec(mean_pz))
+            px_z_params = self.dec(self.pz(*self.pz_params).sample(torch.Size([N])))
+            means = get_mean_param(px_z_params)
+            samples = self.px_z(*px_z_params).sample(torch.Size([K]))
+
+        return mean, \
+            means.view(-1, *means.size()[2:]), \
+            samples.view(-1, *samples.size()[3:])
+
+    def reconstruct(self, data , edge_index):
+        self.eval()
+        with torch.no_grad():
+            qz_x = self.qz_x(*self.enc(edge_index,data))
+            px_z_params = self.dec(qz_x.rsample(torch.Size([1])).squeeze(0))
+
+        return get_mean_param(px_z_params)
+
+    def forward(self, x , edge_index, K=1):
+        embeddings = self.enc(edge_index,x)
+        qz_x = self.qz_x(*embeddings)
+        zs = qz_x.rsample(torch.Size([K]))
+        px_z = self.px_z(*self.dec(zs))
+        return qz_x, px_z, zs , embeddings
+
+    @property
+    def pz_params(self):
+        return self._pz_mu.mul(1), F.softplus(self._pz_logvar).div(math.log(2)).mul(self.prior_std_scale)
+
+    def init_last_layer_bias(self, dataset): pass
diff --git a/PVAE/objectives.py b/PVAE/objectives.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd9afeabd8f589bb33659f7f7b1aae36264e4159
--- /dev/null
+++ b/PVAE/objectives.py
@@ -0,0 +1,46 @@
+import torch
+import torch.distributions as dist
+from numpy import prod
+from Ghypeddings.PVAE.utils import has_analytic_kl, log_mean_exp
+import torch.nn.functional as F
+
+def vae_objective(model, idx, x , graph, K=1, beta=1.0, components=False, analytical_kl=False, **kwargs):
+    """Computes E_{p(x)}[ELBO] """
+    qz_x, px_z, zs , embeddings = model(x, graph,K)
+    _, B, D = zs.size()
+    flat_rest = torch.Size([*px_z.batch_shape[:2], -1])
+    x = x.unsqueeze(0).unsqueeze(2)
+    lpx_z = px_z.log_prob(x.expand(px_z.batch_shape)).view(flat_rest).sum(-1)
+    pz = model.pz(*model.pz_params)
+    kld = dist.kl_divergence(qz_x, pz).unsqueeze(0).sum(-1) if \
+        has_analytic_kl(type(qz_x), model.pz) and analytical_kl else \
+        qz_x.log_prob(zs).sum(-1) - pz.log_prob(zs).sum(-1)
+    lpx_z_selected = lpx_z[:, idx]
+    kld_selected = kld[:, idx]
+    obj = -lpx_z_selected.mean(0).sum() + beta * kld_selected.mean(0).sum()
+    return (qz_x, px_z, lpx_z_selected, kld_selected, obj , embeddings) if components else obj
+
+def _iwae_objective_vec(model, x, K):
+    """Helper for IWAE estimate for log p_\theta(x) -- full vectorisation."""
+    qz_x, px_z, zs = model(x, K)
+    flat_rest = torch.Size([*px_z.batch_shape[:2], -1])
+    lpz = model.pz(*model.pz_params).log_prob(zs).sum(-1)
+    lpx_z = px_z.log_prob(x.expand(zs.size(0), *x.size())).view(flat_rest).sum(-1)
+    lqz_x = qz_x.log_prob(zs).sum(-1)
+    obj = lpz.squeeze(-1) + lpx_z.view(lpz.squeeze(-1).shape) - lqz_x.squeeze(-1)
+    return -log_mean_exp(obj).sum()
+
+
+def iwae_objective(model, x, K):
+    """Computes an importance-weighted ELBO estimate for log p_\theta(x)
+    Iterates over the batch as necessary.
+    Appropriate negation (for minimisation) happens in the helper
+    """
+    split_size = int(x.size(0) / (K * prod(x.size()) / (3e7)))  # rough heuristic
+    if split_size >= x.size(0):
+        obj = _iwae_objective_vec(model, x, K)
+    else:
+        obj = 0
+        for bx in x.split(split_size):
+            obj = obj + _iwae_objective_vec(model, bx, K)
+    return obj
diff --git a/PVAE/ops/__init__.py b/PVAE/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/PVAE/ops/manifold_layers.py b/PVAE/ops/manifold_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..643d80fa75b50dc7a9e92ff9a3a7b305afb440cd
--- /dev/null
+++ b/PVAE/ops/manifold_layers.py
@@ -0,0 +1,90 @@
+import math
+import torch
+from torch import nn
+from torch.nn.parameter import Parameter
+from torch.nn import init
+from Ghypeddings.PVAE.manifolds import PoincareBall, Euclidean
+from geoopt import ManifoldParameter
+
+
+class RiemannianLayer(nn.Module):
+    def __init__(self, in_features, out_features, manifold, over_param, weight_norm):
+        super(RiemannianLayer, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.manifold = manifold
+
+        self._weight = Parameter(torch.Tensor(out_features, in_features))
+        self.over_param = over_param
+        self.weight_norm = weight_norm
+        if self.over_param:
+            self._bias = ManifoldParameter(torch.Tensor(out_features, in_features), manifold=manifold)
+        else:
+            self._bias = Parameter(torch.Tensor(out_features, 1))
+        self.reset_parameters()
+
+    @property
+    def weight(self):
+        return self.manifold.transp0(self.bias, self._weight) # weight \in T_0 => weight \in T_bias
+
+    @property
+    def bias(self):
+        if self.over_param:
+            return self._bias
+        else:
+            return self.manifold.expmap0(self._weight * self._bias) # reparameterisation of a point on the manifold
+
+    def reset_parameters(self):
+        init.kaiming_normal_(self._weight, a=math.sqrt(5))
+        fan_in, _ = init._calculate_fan_in_and_fan_out(self._weight)
+        bound = 4 / math.sqrt(fan_in)
+        init.uniform_(self._bias, -bound, bound)
+        if self.over_param:
+            with torch.no_grad(): self._bias.set_(self.manifold.expmap0(self._bias))
+
+
+class GeodesicLayer(RiemannianLayer):
+    def __init__(self, in_features, out_features, manifold, over_param=False, weight_norm=False):
+        super(GeodesicLayer, self).__init__(in_features, out_features, manifold, over_param, weight_norm)
+
+    def forward(self, input):
+        input = input.unsqueeze(-2).expand(*input.shape[:-(len(input.shape) - 2)], self.out_features, self.in_features)
+        res = self.manifold.normdist2plane(input, self.bias, self.weight,
+                                               signed=True, norm=self.weight_norm)
+        return res
+
+
+class Linear(nn.Linear):
+    def __init__(self, in_features, out_features, **kwargs):
+        super(Linear, self).__init__(
+            in_features,
+            out_features,
+        )
+
+
+class MobiusLayer(RiemannianLayer):
+    def __init__(self, in_features, out_features, manifold, over_param=False, weight_norm=False):
+        super(MobiusLayer, self).__init__(in_features, out_features, manifold, over_param, weight_norm)
+
+    def forward(self, input):
+        res = self.manifold.mobius_matvec(self.weight, input)
+        return res
+
+
+class ExpZero(nn.Module):
+    def __init__(self, manifold):
+        super(ExpZero, self).__init__()
+        self.manifold = manifold
+
+    def forward(self, input):
+        return self.manifold.expmap0(input)
+
+
+class LogZero(nn.Module):
+    def __init__(self, manifold):
+        super(LogZero, self).__init__()
+        self.manifold = manifold
+
+    def forward(self, input):
+        return self.manifold.logmap0(input)
+
diff --git a/PVAE/pvae.py b/PVAE/pvae.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ed28d8c9b87c3cf4e033868c6148718d92d22c1
--- /dev/null
+++ b/PVAE/pvae.py
@@ -0,0 +1,183 @@
+import sys
+sys.path.append(".")
+sys.path.append("..")
+import os
+import datetime
+from collections import defaultdict
+import torch
+from torch import optim
+import numpy as np
+import logging
+import time
+
+from Ghypeddings.PVAE.utils import probe_infnan , process_data , create_args , perform_task
+import Ghypeddings.PVAE.objectives as objectives
+from Ghypeddings.PVAE.models import Tabular
+
+runId = datetime.datetime.now().isoformat().replace(':','_')
+torch.backends.cudnn.benchmark = True
+
+class PVAE:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                hidden_dim,
+                num_layers=2,
+                c=1.0,
+                act='leaky_relu',
+                lr=0.05,
+                cuda=0,
+                epochs=100,
+                seed=42,
+                eval_freq=1,
+                val_prop=0.5,
+                test_prop=0.3,
+                dropout=0.1,
+                beta1=0.9,
+                beta2=.999,
+                K=20,
+                beta=.5,
+                analytical_kl=True,
+                posterior='WrappedNormal',
+                prior='WrappedNormal',
+                prior_iso=True,
+                prior_std=1.,
+                learn_prior_std=True,
+                enc='Mob',
+                dec='Geo',
+                bias=True,
+                alpha=0.01,
+                classifier=None,
+                clusterer=None,
+                log_freq=0,
+                normalize_adj=False,
+                normalize_feats=True
+                ): 
+
+        self.args = create_args(dim,hidden_dim,num_layers,c,act,lr,cuda,epochs,seed,eval_freq,val_prop,test_prop,dropout,beta1,beta2,K,beta,analytical_kl,posterior,prior,prior_iso,prior_std,learn_prior_std,enc,dec,bias,alpha,classifier,clusterer,log_freq,normalize_adj,normalize_feats)
+        self.args.n_classes = len(np.unique(labels))
+        self.args.feat_dim = features.shape[1]
+        self.data = process_data(self.args,adj,features,labels)
+        self.args.data_size = [adj.shape[0],self.args.feat_dim]
+        self.args.batch_size=1
+
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+            self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        else:
+            self.args.device = 'cpu'
+
+        self.args.prior_iso = self.args.prior_iso or self.args.posterior == 'RiemannianNormal'
+
+        # Choosing and saving a random seed for reproducibility
+        if self.args.seed == 0: self.args.seed = int(torch.randint(0, 2**32 - 1, (1,)).item())
+        print('seed', self.args.seed)
+        torch.manual_seed(self.args.seed)
+        np.random.seed(self.args.seed)
+        torch.cuda.manual_seed_all(self.args.seed)
+        torch.manual_seed(self.args.seed)
+        torch.backends.cudnn.deterministic = True
+        self.model = Tabular(self.args).to(self.args.device)
+        self.optimizer = optim.Adam(self.model.parameters(), lr=self.args.lr, amsgrad=True, betas=(self.args.beta1, self.args.beta2))
+        self.loss_function = getattr(objectives,'vae_objective')
+
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+
+        self.tb_embeddings = None
+
+
+    def fit(self):
+
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        agg = defaultdict(list)
+        b_loss, b_recon, b_kl , b_mlik , tb_loss = sys.float_info.max, sys.float_info.max ,sys.float_info.max,sys.float_info.max,sys.float_info.max
+        
+        best_losses = []
+        real_losses = []
+
+        for epoch in range(self.args.epochs):
+            self.model.train()
+            self.optimizer.zero_grad()
+
+            qz_x, px_z, lik, kl, loss , embeddings = self.loss_function(self.model,self.data['idx_train'], self.data['features'], self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True, analytical_kl=self.args.analytical_kl)
+            probe_infnan(loss, "Training loss:")
+            loss.backward()
+            self.optimizer.step()
+
+            t_loss = loss.item() / len(self.data['idx_train'])
+            t_recon = -lik.mean(0).sum().item() / len(self.data['idx_train'])
+            t_kl = kl.sum(-1).mean(0).sum().item() / len(self.data['idx_train'])
+
+            if(t_loss < b_loss):
+                b_loss = t_loss 
+                b_recon = t_recon 
+                b_kl = t_kl 
+
+
+            agg['train_loss'].append(t_loss )
+            agg['train_recon'].append(t_recon )
+            agg['train_kl'].append(t_kl )
+
+            real_losses.append(t_recon)
+            if(len(best_losses) == 0):
+                best_losses.append(real_losses[0])
+            elif (best_losses[-1] > real_losses[-1]):
+                best_losses.append(real_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                print('====> Epoch: {:03d} Loss: {:.2f} Recon: {:.2f} KL: {:.2f}'.format(epoch, agg['train_loss'][-1], agg['train_recon'][-1], agg['train_kl'][-1]))
+
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                with torch.no_grad():
+                    qz_x, px_z, lik, kl, loss , embeddings= self.loss_function(self.model,self.data['idx_val'], self.data['features'],self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True)
+                    tt_loss = loss.item() / len(self.data['idx_val'])
+                    if(tt_loss < tb_loss):
+                        tb_loss = tt_loss 
+                        self.tb_embeddings = embeddings[0]
+
+                    agg['test_loss'].append(tt_loss )
+                    print('====>             Test loss: {:.4f}'.format(agg['test_loss'][-1]))
+
+
+        logging.info("Optimization Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+        print('====> Training: Best Loss: {:.2f} Best Recon: {:.2f} Best KL: {:.2f}'.format(b_loss,b_recon,b_kl))
+        print('====> Testing: Best Loss: {:.2f}'.format(tb_loss))
+
+        X =  self.model.manifold.logmap0(self.tb_embeddings).cpu().detach().numpy()
+        y = self.data['labels'].cpu().reshape(-1,1)
+        acc,f1,recall,precision,roc_auc=perform_task(self.args,X,y)
+        return {'real':real_losses,'best':best_losses},acc,f1,recall,precision,roc_auc,time.time() - t_total
+
+    def predict(self):
+        self.model.eval()
+        with torch.no_grad():
+            qz_x, px_z, lik, kl, loss , embeddings=self.loss_function(self.model,self.data['idx_test'], self.data['features'],self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True)
+            tt_loss = loss.item() / len(self.data['idx_test'])
+        data = self.model.manifold.logmap0(embeddings[0]).cpu().detach().numpy()
+        labels = self.data['labels'].reshape(-1,1).cpu()
+        acc,f1,recall,precision,roc_auc=perform_task(self.args,data,labels)
+        return abs(tt_loss) , acc, f1 , recall,precision,roc_auc
+
+
+    def save_embeddings(self,directory,prefix):
+        tb_embeddings_euc = self.model.manifold.logmap0(self.tb_embeddings)
+        for_classification_hyp = np.hstack((self.tb_embeddings.cpu().detach().numpy(),self.data['labels'].reshape(-1,1).cpu()))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].reshape(-1,1).cpu()))
+        hyp_file_path = os.path.join(directory,f'{prefix}_embeddings_hyp.csv')
+        euc_file_path = os.path.join(directory,f'{prefix}_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
diff --git a/PVAE/utils.py b/PVAE/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..87b937ad038a97db9af279bb8b8363b3916432cf
--- /dev/null
+++ b/PVAE/utils.py
@@ -0,0 +1,327 @@
+import sys
+import math
+import time
+import os
+import shutil
+import torch
+import torch.distributions as dist
+from torch.autograd import Variable, Function, grad
+from sklearn.preprocessing import MinMaxScaler
+import pandas as pd
+import numpy as np
+import argparse
+import torch.nn as nn
+import scipy.sparse as sp
+
+
+def lexpand(A, *dimensions):
+    """Expand tensor, adding new dimensions on left."""
+    return A.expand(tuple(dimensions) + A.shape)
+
+
+def rexpand(A, *dimensions):
+    """Expand tensor, adding new dimensions on right."""
+    return A.view(A.shape + (1,)*len(dimensions)).expand(A.shape + tuple(dimensions))
+
+
+def assert_no_nan(name, g):
+    if torch.isnan(g).any(): raise Exception('nans in {}'.format(name))
+
+
+def assert_no_grad_nan(name, x):
+    if x.requires_grad: x.register_hook(lambda g: assert_no_nan(name, g))
+
+
+# Classes
+class Constants(object):
+    eta = 1e-5
+    log2 = math.log(2)
+    logpi = math.log(math.pi)
+    log2pi = math.log(2 * math.pi)
+    logceilc = 88                # largest cuda v s.t. exp(v) < inf
+    logfloorc = -104             # smallest cuda v s.t. exp(v) > 0
+    invsqrt2pi = 1. / math.sqrt(2 * math.pi)
+    sqrthalfpi = math.sqrt(math.pi/2)
+
+
+def logsinh(x):
+    # torch.log(sinh(x))
+    return x + torch.log(1 - torch.exp(-2 * x)) - Constants.log2
+
+
+def logcosh(x):
+    # torch.log(cosh(x))
+    return x + torch.log(1 + torch.exp(-2 * x)) - Constants.log2
+
+
+class Arccosh(Function):
+    # https://github.com/facebookresearch/poincare-embeddings/blob/master/model.py
+    @staticmethod
+    def forward(ctx, x):
+        ctx.z = torch.sqrt(x * x - 1)
+        return torch.log(x + ctx.z)
+
+    @staticmethod
+    def backward(ctx, g):
+        z = torch.clamp(ctx.z, min=Constants.eta)
+        z = g / z
+        return z
+
+
+class Arcsinh(Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.z = torch.sqrt(x * x + 1)
+        return torch.log(x + ctx.z)
+
+    @staticmethod
+    def backward(ctx, g):
+        z = torch.clamp(ctx.z, min=Constants.eta)
+        z = g / z
+        return z
+
+
+# https://stackoverflow.com/questions/14906764/how-to-redirect-stdout-to-both-file-and-console-with-scripting
+class Logger(object):
+    def __init__(self, filename):
+        self.terminal = sys.stdout
+        self.log = open(filename, "a")
+
+    def write(self, message):
+        self.terminal.write(message)
+        self.log.write(message)
+
+    def flush(self):
+        # this flush method is needed for python 3 compatibility.
+        # this handles the flush command by doing nothing.
+        # you might want to specify some extra behavior here.
+        pass
+
+
+class Timer:
+    def __init__(self, name):
+        self.name = name
+
+    def __enter__(self):
+        self.begin = time.time()
+        return self
+
+    def __exit__(self, *args):
+        self.end = time.time()
+        self.elapsed = self.end - self.begin
+        self.elapsedH = time.gmtime(self.elapsed)
+        print('====> [{}] Time: {:7.3f}s or {}'
+              .format(self.name,
+                      self.elapsed,
+                      time.strftime("%H:%M:%S", self.elapsedH)))
+
+
+# Functions
+def save_vars(vs, filepath):
+    """
+    Saves variables to the given filepath in a safe manner.
+    """
+    if os.path.exists(filepath):
+        shutil.copyfile(filepath, '{}.old'.format(filepath))
+    torch.save(vs, filepath)
+
+
+def save_model(model, filepath):
+    """
+    To load a saved model, simply use
+    `model.load_state_dict(torch.load('path-to-saved-model'))`.
+    """
+    save_vars(model.state_dict(), filepath)
+
+
+def log_mean_exp(value, dim=0, keepdim=False):
+    return log_sum_exp(value, dim, keepdim) - math.log(value.size(dim))
+
+
+def log_sum_exp(value, dim=0, keepdim=False):
+    m, _ = torch.max(value, dim=dim, keepdim=True)
+    value0 = value - m
+    if keepdim is False:
+        m = m.squeeze(dim)
+    return m + torch.log(torch.sum(torch.exp(value0), dim=dim, keepdim=keepdim))
+
+
+def log_sum_exp_signs(value, signs, dim=0, keepdim=False):
+    m, _ = torch.max(value, dim=dim, keepdim=True)
+    value0 = value - m
+    if keepdim is False:
+        m = m.squeeze(dim)
+    return m + torch.log(torch.sum(signs * torch.exp(value0), dim=dim, keepdim=keepdim))
+
+
+def get_mean_param(params):
+    """Return the parameter used to show reconstructions or generations.
+    For example, the mean for Normal, or probs for Bernoulli.
+    For Bernoulli, skip first parameter, as that's (scalar) temperature
+    """
+    if params[0].dim() == 0:
+        return params[1]
+    # elif len(params) == 3:
+    #     return params[1]
+    else:
+        return params[0]
+
+
+def probe_infnan(v, name, extras={}):
+    nps = torch.isnan(v)
+    s = nps.sum().item()
+    if s > 0:
+        print('>>> {} >>>'.format(name))
+        print(name, s)
+        print(v[nps])
+        for k, val in extras.items():
+            print(k, val, val.sum().item())
+        quit()
+
+
+def has_analytic_kl(type_p, type_q):
+    return (type_p, type_q) in torch.distributions.kl._KL_REGISTRY
+
+
+def split_data(labels, test_prop,val_prop):
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+def process_data(args, adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train'], data['features'] = process(
+            data['adj_train'], data['features'],args.normalize_adj,args.normalize_feats
+    )
+    return data
+
+def process_data_nc(args,adj,features,labels):
+    idx_test, idx_train , idx_val= split_data(labels, args.test_prop,args.val_prop)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train,  'idx_test': idx_test , 'idx_val':idx_val}
+    return data
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats: 
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj)
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--hidden_dim', type=int, default=args[1])
+    parser.add_argument('--num_layers', type=int, default=args[2])
+    parser.add_argument('--c', type=int, default=args[3])
+    parser.add_argument('--act', type=str, default=args[4])
+    parser.add_argument('--lr', type=float, default=args[5])
+    parser.add_argument('--cuda', type=int, default=args[6])
+    parser.add_argument('--epochs', type=int, default=args[7])
+    parser.add_argument('--seed', type=int, default=args[8])
+    parser.add_argument('--eval_freq', type=int, default=args[9])
+    parser.add_argument('--val_prop', type=float, default=args[10])
+    parser.add_argument('--test_prop', type=float, default=args[11])
+    parser.add_argument('--dropout', type=float, default=args[12])
+    parser.add_argument('--beta1', type=float, default=args[13])
+    parser.add_argument('--beta2', type=float, default=args[14])
+    parser.add_argument('--K', type=int, default=args[15])
+    parser.add_argument('--beta', type=float, default=args[16])
+    parser.add_argument('--analytical_kl', type=bool, default=args[17])
+    parser.add_argument('--posterior', type=str, default=args[18])
+    parser.add_argument('--prior', type=str, default=args[19])
+    parser.add_argument('--prior_iso', type=bool, default=args[20])
+    parser.add_argument('--prior_std', type=float, default=args[21])
+    parser.add_argument('--learn_prior_std', type=bool, default=args[22])
+    parser.add_argument('--enc', type=str, default=args[23])
+    parser.add_argument('--dec', type=str, default=args[24])
+    parser.add_argument('--bias', type=bool, default=args[25])
+    parser.add_argument('--alpha', type=float, default=args[26])
+    parser.add_argument('--classifier', type=str, default=args[27])
+    parser.add_argument('--clusterer', type=str, default=args[28])
+    parser.add_argument('--log_freq', type=int, default=args[29])
+    parser.add_argument('--normalize_adj', type=bool, default=args[30])
+    parser.add_argument('--normalize_feats', type=bool, default=args[31])
+    flags, unknown = parser.parse_known_args()
+    return flags
+
+
+def get_activation(args):
+    if args.act == 'leaky_relu':
+        return nn.LeakyReLU(args.alpha)
+    elif args.act == 'rrelu':
+        return nn.RReLU()
+    elif args.act == 'relu':
+        return nn.ReLU()
+    elif args.act == 'elu':
+        return nn.ELU()
+    elif args.act == 'prelu':
+        return nn.PReLU()
+    elif args.act == 'selu':
+        return nn.SELU()
+
+
+from Ghypeddings.classifiers import *
+def perform_task(args,X,y):
+    if(args.classifier and args.clusterer):
+        print('You have to chose one of them!')
+        sys.exit(1)
+    elif(args.classifier):
+        if(args.classifier == 'svm'):
+            return SVM(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'mlp'):
+            return mlp(X,y,1,10)
+        elif(args.classifier == 'decision tree'):
+            return decision_tree(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'random forest'):
+            return random_forest(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'adaboost'):
+            return adaboost(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'knn'):
+            return KNN(X,y,args.test_prop,args.seed)
+        elif(args.classifier == 'naive bayes'):
+            return naive_bayes(X,y,args.test_prop,args.seed)
+        else:
+            raise NotImplementedError
+    elif(args.clusterer):
+        pass
+    else:
+        return 99,99,99,99,99
\ No newline at end of file
diff --git a/Poincare/__init__.py b/Poincare/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9
--- /dev/null
+++ b/Poincare/__init__.py
@@ -0,0 +1,2 @@
+from __future__ import print_function
+from __future__ import division
diff --git a/Poincare/layers/__init__.py b/Poincare/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Poincare/layers/layers.py b/Poincare/layers/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..94778f8a79b92f2383dddcb7a96fc60d0fad6b70
--- /dev/null
+++ b/Poincare/layers/layers.py
@@ -0,0 +1,43 @@
+"""Euclidean layers."""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+
+def get_dim_act(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+    dims = [args.feat_dim] + ([args.dim] * (args.num_layers - 1))
+    if args.task in ['lp', 'rec']:
+        dims += [args.dim]
+        acts += [act]
+    return dims, acts
+
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
diff --git a/Poincare/manifolds/__init__.py b/Poincare/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ac57200dff3cf341b4148b750fe1ecadb88c620
--- /dev/null
+++ b/Poincare/manifolds/__init__.py
@@ -0,0 +1,3 @@
+from Ghypeddings.Poincare.manifolds.base import ManifoldParameter
+from Ghypeddings.Poincare.manifolds.poincare import PoincareBall
+from Ghypeddings.Poincare.manifolds.euclidean import Euclidean
\ No newline at end of file
diff --git a/Poincare/manifolds/base.py b/Poincare/manifolds/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..925d4a6b2a59dae47a3a8ca33a7dcdcb20e0f08e
--- /dev/null
+++ b/Poincare/manifolds/base.py
@@ -0,0 +1,88 @@
+"""Base manifold."""
+
+from torch.nn import Parameter
+
+
+class Manifold(object):
+    """
+    Abstract class to define operations on a manifold.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.eps = 10e-8
+
+    def sqdist(self, p1, p2, c):
+        """Squared distance between pairs of points."""
+        raise NotImplementedError
+
+    def egrad2rgrad(self, p, dp, c):
+        """Converts Euclidean Gradient to Riemannian Gradients."""
+        raise NotImplementedError
+
+    def proj(self, p, c):
+        """Projects point p on the manifold."""
+        raise NotImplementedError
+
+    def proj_tan(self, u, p, c):
+        """Projects u on the tangent space of p."""
+        raise NotImplementedError
+
+    def proj_tan0(self, u, c):
+        """Projects u on the tangent space of the origin."""
+        raise NotImplementedError
+
+    def expmap(self, u, p, c):
+        """Exponential map of u at point p."""
+        raise NotImplementedError
+
+    def logmap(self, p1, p2, c):
+        """Logarithmic map of point p1 at point p2."""
+        raise NotImplementedError
+
+    def expmap0(self, u, c):
+        """Exponential map of u at the origin."""
+        raise NotImplementedError
+
+    def logmap0(self, p, c):
+        """Logarithmic map of point p at the origin."""
+        raise NotImplementedError
+
+    def mobius_add(self, x, y, c, dim=-1):
+        """Adds points x and y."""
+        raise NotImplementedError
+
+    def mobius_matvec(self, m, x, c):
+        """Performs hyperboic martrix-vector multiplication."""
+        raise NotImplementedError
+
+    def init_weights(self, w, c, irange=1e-5):
+        """Initializes random weigths on the manifold."""
+        raise NotImplementedError
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        """Inner product for tangent vectors at point x."""
+        raise NotImplementedError
+
+    def ptransp(self, x, y, u, c):
+        """Parallel transport of u from x to y."""
+        raise NotImplementedError
+
+    def ptransp0(self, x, u, c):
+        """Parallel transport of u from the origin to y."""
+        raise NotImplementedError
+
+
+class ManifoldParameter(Parameter):
+    """
+    Subclass of torch.nn.Parameter for Riemannian optimization.
+    """
+    def __new__(cls, data, requires_grad, manifold, c):
+        return Parameter.__new__(cls, data, requires_grad)
+
+    def __init__(self, data, requires_grad, manifold, c):
+        self.c = c
+        self.manifold = manifold
+
+    def __repr__(self):
+        return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__()
diff --git a/Poincare/manifolds/euclidean.py b/Poincare/manifolds/euclidean.py
new file mode 100644
index 0000000000000000000000000000000000000000..177ebb2bf8a03d211732408b84d5f5d8bbec962e
--- /dev/null
+++ b/Poincare/manifolds/euclidean.py
@@ -0,0 +1,67 @@
+"""Euclidean manifold."""
+
+from Ghypeddings.Poincare.manifolds.base import Manifold
+
+
+class Euclidean(Manifold):
+    """
+    Euclidean Manifold class.
+    """
+
+    def __init__(self):
+        super(Euclidean, self).__init__()
+        self.name = 'Euclidean'
+
+    def normalize(self, p):
+        dim = p.size(-1)
+        p.view(-1, dim).renorm_(2, 0, 1.)
+        return p
+
+    def sqdist(self, p1, p2, c):
+        return (p1 - p2).pow(2).sum(dim=-1)
+
+    def egrad2rgrad(self, p, dp, c):
+        return dp
+
+    def proj(self, p, c):
+        return p
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        return p + u
+
+    def logmap(self, p1, p2, c):
+        return p2 - p1
+
+    def expmap0(self, u, c):
+        return u
+
+    def logmap0(self, p, c):
+        return p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        return x + y
+
+    def mobius_matvec(self, m, x, c):
+        mx = x @ m.transpose(-1, -2)
+        return mx
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        return (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, v, c):
+        return v
+
+    def ptransp0(self, x, v, c):
+        return x + v
diff --git a/Poincare/manifolds/poincare.py b/Poincare/manifolds/poincare.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f52cee6ada9b4a2db8f7ce5051907979a08c023
--- /dev/null
+++ b/Poincare/manifolds/poincare.py
@@ -0,0 +1,145 @@
+"""Poincare ball manifold."""
+
+import torch
+
+from Ghypeddings.Poincare.manifolds.base import Manifold
+from Ghypeddings.Poincare.utils.math_utils import artanh, tanh
+
+
+class PoincareBall(Manifold):
+    """
+    PoicareBall Manifold class.
+
+    We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c
+
+    Note that 1/sqrt(c) is the Poincare ball radius.
+
+    """
+
+    def __init__(self, ):
+        super(PoincareBall, self).__init__()
+        self.name = 'PoincareBall'
+        self.min_norm = 1e-15
+        self.eps = {torch.float32: 4e-3, torch.float64: 1e-5}
+
+    def sqdist(self, p1, p2, c):
+        sqrt_c = c ** 0.5
+        dist_c = artanh(
+            sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False)
+        )
+        dist = dist_c * 2 / sqrt_c
+        return dist ** 2
+
+    def _lambda_x(self, x, c):
+        x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True)
+        return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm)
+
+    def egrad2rgrad(self, p, dp, c):
+        lambda_p = self._lambda_x(p, c)
+        dp /= lambda_p.pow(2)
+        return dp
+
+    def proj(self, x, c):
+        norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm)
+        maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5)
+        cond = norm > maxnorm
+        projected = x / norm * maxnorm
+        return torch.where(cond, projected, x)
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        sqrt_c = c ** 0.5
+        u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        second_term = (
+                tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm)
+                * u
+                / (sqrt_c * u_norm)
+        )
+        gamma_1 = self.mobius_add(p, second_term, c)
+        return gamma_1
+
+    def logmap(self, p1, p2, c):
+        sub = self.mobius_add(-p1, p2, c)
+        sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        lam = self._lambda_x(p1, c)
+        sqrt_c = c ** 0.5
+        return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm
+
+    def expmap0(self, u, c):
+        sqrt_c = c ** 0.5
+        u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm)
+        gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
+        return gamma_1
+
+    def logmap0(self, p, c):
+        sqrt_c = c ** 0.5
+        p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm
+        return scale * p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        x2 = x.pow(2).sum(dim=dim, keepdim=True)
+        y2 = y.pow(2).sum(dim=dim, keepdim=True)
+        xy = (x * y).sum(dim=dim, keepdim=True)
+        num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y
+        denom = 1 + 2 * c * xy + c ** 2 * x2 * y2
+        return num / denom.clamp_min(self.min_norm)
+
+    def mobius_matvec(self, m, x, c):
+        sqrt_c = c ** 0.5
+        x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        mx = x @ m.transpose(-1, -2)
+        mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c)
+        cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8)
+        res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device)
+        res = torch.where(cond, res_0, res_c)
+        return res
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def _gyration(self, u, v, w, c, dim: int = -1):
+        u2 = u.pow(2).sum(dim=dim, keepdim=True)
+        v2 = v.pow(2).sum(dim=dim, keepdim=True)
+        uv = (u * v).sum(dim=dim, keepdim=True)
+        uw = (u * w).sum(dim=dim, keepdim=True)
+        vw = (v * w).sum(dim=dim, keepdim=True)
+        c2 = c ** 2
+        a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw
+        b = -c2 * vw * u2 - c * uw
+        d = 1 + 2 * c * uv + c2 * u2 * v2
+        return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm)
+
+    def inner(self, x, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        lambda_x = self._lambda_x(x, c)
+        return lambda_x ** 2 * (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def ptransp_(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def ptransp0(self, x, u, c):
+        lambda_x = self._lambda_x(x, c)
+        return 2 * u / lambda_x.clamp_min(self.min_norm)
+
+    def to_hyperboloid(self, x, c):
+        K = 1./ c
+        sqrtK = K ** 0.5
+        sqnorm = torch.norm(x, p=2, dim=1, keepdim=True) ** 2
+        return sqrtK * torch.cat([K + sqnorm, 2 * sqrtK * x], dim=1) / (K - sqnorm)
+
diff --git a/Poincare/models/__init__.py b/Poincare/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Poincare/models/base_models.py b/Poincare/models/base_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..142b9371cf08248d096f0ab313dd70fa8707f768
--- /dev/null
+++ b/Poincare/models/base_models.py
@@ -0,0 +1,77 @@
+"""Base model class."""
+
+import numpy as np
+from sklearn.metrics import roc_auc_score, average_precision_score
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.Poincare.manifolds as manifolds
+import Ghypeddings.Poincare.models.encoders as encoders
+from Ghypeddings.Poincare.models.decoders import model2decoder
+from Ghypeddings.Poincare.utils.eval_utils import acc_f1
+
+
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.manifold_name = 'PoincareBall'
+        self.c = torch.tensor([1.0])
+        if not args.cuda == -1:
+            self.c = self.c.to(args.device)
+        self.manifold = getattr(manifolds, self.manifold_name)()
+        self.nnodes = args.n_nodes
+        self.encoder = getattr(encoders, 'Shallow')(self.c, args)
+
+    def encode(self, x):
+        h = self.encoder.encode(x)
+        return h
+
+    def compute_metrics(self, embeddings, data, split):
+        raise NotImplementedError
+
+    def init_metric_dict(self):
+        raise NotImplementedError
+
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+
+
+class NCModel(BaseModel):
+    """
+    Base model for node classification task.
+    """
+
+    def __init__(self, args):
+        super(NCModel, self).__init__(args)
+        self.decoder = model2decoder(1.0, args)
+        if args.n_classes > 2:
+            self.f1_average = 'micro'
+        else:
+            self.f1_average = 'binary'
+        
+        self.weights = torch.Tensor([1.] * args.n_classes)
+        if not args.cuda == -1:
+            self.weights = self.weights.to(args.device)
+
+    def decode(self, h, idx):
+        output = self.decoder.decode(h)
+        return F.log_softmax(output[idx], dim=1)
+
+    def compute_metrics(self, embeddings, data, split):
+        idx = data[f'idx_{split}']
+        output = self.decode(embeddings, idx)
+        loss = F.nll_loss(output, data['labels'][idx], self.weights)
+        acc, f1,recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average)
+        metrics = {'loss': loss, 'acc': acc, 'f1': f1,'recall':recall,'precision':precision,'roc_auc':roc_auc}
+        return metrics
+
+    def init_metric_dict(self):
+        return {'acc': -1, 'f1': -1}
+
+    def has_improved(self, m1, m2):
+        return m1["f1"] < m2["f1"]
\ No newline at end of file
diff --git a/Poincare/models/decoders.py b/Poincare/models/decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..8532b62830f9b8d0a050d64b23f2dc1b84ab8bd1
--- /dev/null
+++ b/Poincare/models/decoders.py
@@ -0,0 +1,46 @@
+"""Graph decoders."""
+import Ghypeddings.Poincare.manifolds as manifolds
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.Poincare.layers.layers import  Linear
+import torch
+
+class Decoder(nn.Module):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+
+    def decode(self, x):
+        probs = self.cls.forward(x)
+        return probs
+
+
+class LinearDecoder(Decoder):
+    """
+    MLP Decoder for Hyperbolic/Euclidean node classification models.
+    """
+
+    def __init__(self, c, args):
+        super(LinearDecoder, self).__init__(c)
+        self.manifold = getattr(manifolds, 'PoincareBall')()
+        self.input_dim = args.dim + args.feat_dim
+        self.output_dim = args.n_classes
+        self.bias = True
+        self.cls = Linear(self.input_dim, self.output_dim, args.dropout, lambda x: x, self.bias)
+
+    def decode(self, x):
+        h = self.manifold.proj_tan0(self.manifold.logmap0(x, c=self.c), c=self.c)
+        return super(LinearDecoder, self).decode(h)
+
+    def extra_repr(self):
+        return 'in_features={}, out_features={}, bias={}, c={}'.format(
+                self.input_dim, self.output_dim, self.bias, self.c
+        )
+
+
+model2decoder = LinearDecoder
+
diff --git a/Poincare/models/encoders.py b/Poincare/models/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..42e6504898f0f6e85db56f4fd597c467890e205a
--- /dev/null
+++ b/Poincare/models/encoders.py
@@ -0,0 +1,42 @@
+"""Graph encoders."""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.Poincare.manifolds as manifolds
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x):
+        pass
+
+class Shallow(Encoder):
+    """
+    Shallow Embedding method.
+    Learns embeddings or loads pretrained embeddings and uses an MLP for classification.
+    """
+
+    def __init__(self, c, args):
+        super(Shallow, self).__init__(c)
+        self.manifold = getattr(manifolds, 'PoincareBall')()
+        weights = torch.Tensor(args.n_nodes, args.dim)
+        weights = self.manifold.init_weights(weights, self.c)
+        trainable = True
+        self.lt = manifolds.ManifoldParameter(weights, trainable, self.manifold, self.c)
+        self.all_nodes = torch.LongTensor(list(range(args.n_nodes)))
+        layers = []
+        self.layers = nn.Sequential(*layers)
+
+    def encode(self, x):
+        h = self.lt[self.all_nodes, :]
+        h = torch.cat((h, x), 1)
+        return h
diff --git a/Poincare/optimizers/__init__.py b/Poincare/optimizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b0d929f33f4e20f83e7cc3ce87c9fa8fd359447
--- /dev/null
+++ b/Poincare/optimizers/__init__.py
@@ -0,0 +1,2 @@
+from torch.optim import Adam
+from Ghypeddings.Poincare.optimizers.radam import RiemannianAdam
diff --git a/Poincare/optimizers/radam.py b/Poincare/optimizers/radam.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4904422f52d271dc7de85ed3069ef9972f3015b
--- /dev/null
+++ b/Poincare/optimizers/radam.py
@@ -0,0 +1,172 @@
+"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/)."""
+import torch.optim
+from Ghypeddings.Poincare.manifolds import Euclidean, ManifoldParameter
+
+_default_manifold = Euclidean()
+
+
+class OptimMixin(object):
+    def __init__(self, *args, stabilize=None, **kwargs):
+        self._stabilize = stabilize
+        super().__init__(*args, **kwargs)
+
+    def stabilize_group(self, group):
+        pass
+
+    def stabilize(self):
+        """Stabilize parameters if they are off-manifold due to numerical reasons
+        """
+        for group in self.param_groups:
+            self.stabilize_group(group)
+
+
+def copy_or_set_(dest, source):
+    """
+    A workaround to respect strides of :code:`dest` when copying :code:`source`
+    (https://github.com/geoopt/geoopt/issues/70)
+    Parameters
+    ----------
+    dest : torch.Tensor
+        Destination tensor where to store new data
+    source : torch.Tensor
+        Source data to put in the new tensor
+    Returns
+    -------
+    dest
+        torch.Tensor, modified inplace
+    """
+    if dest.stride() != source.stride():
+        return dest.copy_(source)
+    else:
+        return dest.set_(source)
+
+
+class RiemannianAdam(OptimMixin, torch.optim.Adam):
+    r"""Riemannian Adam with the same API as :class:`torch.optim.Adam`
+    Parameters
+    ----------
+    params : iterable
+        iterable of parameters to optimize or dicts defining
+        parameter groups
+    lr : float (optional)
+        learning rate (default: 1e-3)
+    betas : Tuple[float, float] (optional)
+        coefficients used for computing
+        running averages of gradient and its square (default: (0.9, 0.999))
+    eps : float (optional)
+        term added to the denominator to improve
+        numerical stability (default: 1e-8)
+    weight_decay : float (optional)
+        weight decay (L2 penalty) (default: 0)
+    amsgrad : bool (optional)
+        whether to use the AMSGrad variant of this
+        algorithm from the paper `On the Convergence of Adam and Beyond`_
+        (default: False)
+    Other Parameters
+    ----------------
+    stabilize : int
+        Stabilize parameters if they are off-manifold due to numerical
+        reasons every ``stabilize`` steps (default: ``None`` -- no stabilize)
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments
+        ---------
+        closure : callable (optional)
+            A closure that reevaluates the model
+            and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        with torch.no_grad():
+            for group in self.param_groups:
+                if "step" not in group:
+                    group["step"] = 0
+                betas = group["betas"]
+                weight_decay = group["weight_decay"]
+                eps = group["eps"]
+                learning_rate = group["lr"]
+                amsgrad = group["amsgrad"]
+                for point in group["params"]:
+                    grad = point.grad
+                    if grad is None:
+                        continue
+                    if isinstance(point, (ManifoldParameter)):
+                        manifold = point.manifold
+                        c = point.c
+                    else:
+                        manifold = _default_manifold
+                        c = None
+                    if grad.is_sparse:
+                        raise RuntimeError(
+                                "Riemannian Adam does not support sparse gradients yet (PR is welcome)"
+                        )
+
+                    state = self.state[point]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state["step"] = 0
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(point)
+                        # Exponential moving average of squared gradient values
+                        state["exp_avg_sq"] = torch.zeros_like(point)
+                        if amsgrad:
+                            # Maintains max of all exp. moving avg. of sq. grad. values
+                            state["max_exp_avg_sq"] = torch.zeros_like(point)
+                    # make local variables for easy access
+                    exp_avg = state["exp_avg"]
+                    exp_avg_sq = state["exp_avg_sq"]
+                    # actual step
+                    grad.add_(weight_decay, point)
+                    grad = manifold.egrad2rgrad(point, grad, c)
+                    exp_avg.mul_(betas[0]).add_(1 - betas[0], grad)
+                    exp_avg_sq.mul_(betas[1]).add_(
+                            1 - betas[1], manifold.inner(point, c, grad, keepdim=True)
+                    )
+                    if amsgrad:
+                        max_exp_avg_sq = state["max_exp_avg_sq"]
+                        # Maintains the maximum of all 2nd moment running avg. till now
+                        torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                        # Use the max. for normalizing running avg. of gradient
+                        denom = max_exp_avg_sq.sqrt().add_(eps)
+                    else:
+                        denom = exp_avg_sq.sqrt().add_(eps)
+                    group["step"] += 1
+                    bias_correction1 = 1 - betas[0] ** group["step"]
+                    bias_correction2 = 1 - betas[1] ** group["step"]
+                    step_size = (
+                        learning_rate * bias_correction2 ** 0.5 / bias_correction1
+                    )
+                    # copy the state, we need it for retraction
+                    # get the direction for ascend
+                    direction = exp_avg / denom
+                    # transport the exponential averaging to the new point
+                    new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c)
+                    exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c)
+                    # use copy only for user facing point
+                    copy_or_set_(point, new_point)
+                    exp_avg.set_(exp_avg_new)
+
+                    group["step"] += 1
+                if self._stabilize is not None and group["step"] % self._stabilize == 0:
+                    self.stabilize_group(group)
+        return loss
+
+    @torch.no_grad()
+    def stabilize_group(self, group):
+        for p in group["params"]:
+            if not isinstance(p, ManifoldParameter):
+                continue
+            state = self.state[p]
+            if not state:  # due to None grads
+                continue
+            manifold = p.manifold
+            c = p.c
+            exp_avg = state["exp_avg"]
+            copy_or_set_(p, manifold.proj(p, c))
+            exp_avg.set_(manifold.proj_tan(exp_avg, u, c))
diff --git a/Poincare/poincare.py b/Poincare/poincare.py
new file mode 100644
index 0000000000000000000000000000000000000000..e28549492de0664fdd1aa02a717a2782e27f69f2
--- /dev/null
+++ b/Poincare/poincare.py
@@ -0,0 +1,155 @@
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import time
+
+import numpy as np
+import Ghypeddings.Poincare.optimizers as optimizers
+import torch
+from Ghypeddings.Poincare.models.base_models import NCModel
+from Ghypeddings.Poincare.utils.data_utils import process_data
+from Ghypeddings.Poincare.utils.train_utils import format_metrics, create_args
+
+
+class POINCARE:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                grad_clip=None,
+                weight_decay=0.01,
+                lr=0.1,
+                gamma=0.5,
+                lr_reduce_freq=500,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.5,
+                test_prop=0.3,
+                double_precision=0,
+                dropout=0.1,
+                normalize_adj=False,
+                normalize_feats=True):
+        self.args = create_args(dim,grad_clip,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        np.random.seed(self.args.seed)
+        torch.manual_seed(self.args.seed)
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+        if not self.args.lr_reduce_freq:
+            self.args.lr_reduce_freq = self.args.epochs
+        self.model = NCModel(self.args)
+        self.optimizer = getattr(optimizers, 'RiemannianAdam')(params=self.model.parameters(), lr=self.args.lr,
+                                                        weight_decay=self.args.weight_decay)
+        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
+            self.optimizer,
+            step_size=int(self.args.lr_reduce_freq),
+            gamma=float(self.args.gamma)
+        )
+
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+        self.best_emb = None
+
+
+    def fit(self):
+
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(str(self.model))
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        real_losses = []
+
+        for epoch in range(self.args.epochs):
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'])
+            assert not torch.isnan(embeddings).any()
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.lr_scheduler.step()
+
+            real_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(real_losses[0])
+            elif (best_losses[-1] > real_losses[-1]):
+                best_losses.append(real_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                    'lr: {}'.format(self.lr_scheduler.get_lr()[0]),
+                                    format_metrics(train_metrics, 'train'),
+                                    'time: {:.4f}s'.format(time.time() - t)
+                                    ]))
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                embeddings = self.model.encode(self.data['features'])
+                val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                    
+                if self.model.has_improved(best_val_metrics, val_metrics):
+                    self.best_emb = embeddings
+                    best_val_metrics = val_metrics
+                    counter = 0
+                else:
+                    counter += 1
+                    if counter == self.args.patience and epoch > self.args.min_epochs:
+                        logging.info("Early stopping")
+                        break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+
+        return {'real':real_losses,'best':best_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
+    
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'])
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
+
+    def save_embeddings(self):
+        tb_embeddings_euc = self.model.manifold.logmap0(self.best_emb,self.model.decoder.c)
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'poincare_embeddings_hyp.csv')
+        euc_file_path = os.path.join(os.getcwd(),'poincare_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/Poincare/utils/__init__.py b/Poincare/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Poincare/utils/data_utils.py b/Poincare/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc5c634801fe17a9231ff2f582dfcae159377ad3
--- /dev/null
+++ b/Poincare/utils/data_utils.py
@@ -0,0 +1,83 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+
+
+def process_data(args, adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj,args.normalize_feats
+    )
+    return data
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj + sp.eye(adj.shape[0]))
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+def process_data_nc(args,adj,features,labels):
+    idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test}
+    return data
diff --git a/Poincare/utils/eval_utils.py b/Poincare/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7494c5f3e618155257bfa1f6af2a2c91acd2f526
--- /dev/null
+++ b/Poincare/utils/eval_utils.py
@@ -0,0 +1,14 @@
+from sklearn.metrics import accuracy_score, f1_score,precision_score,recall_score,roc_auc_score
+
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels,preds)
+    recall = recall_score(labels,preds)
+    precision = precision_score(labels,preds)
+    roc_auc = roc_auc_score(labels,preds)
+    f1 = f1_score(labels,preds, average=average)
+    return accuracy, f1,recall,precision,roc_auc
+
diff --git a/Poincare/utils/math_utils.py b/Poincare/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2fee953984adca2f6f271db79f2b5624d9ad5bd
--- /dev/null
+++ b/Poincare/utils/math_utils.py
@@ -0,0 +1,69 @@
+"""Math utils functions."""
+
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-7, 1 - 1e-7)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-7).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1.0 + 1e-7)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-7).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
diff --git a/Poincare/utils/train_utils.py b/Poincare/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb43e0d3044366d8c4d1c0bded82fa2b4e477edd
--- /dev/null
+++ b/Poincare/utils/train_utils.py
@@ -0,0 +1,38 @@
+import os
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--grad_clip', type=float, default=args[1])
+    parser.add_argument('--weight_decay', type=float, default=args[2])
+    parser.add_argument('--lr', type=float, default=args[3])
+    parser.add_argument('--gamma', type=float, default=args[4])
+    parser.add_argument('--lr_reduce_freq', type=int, default=args[5])
+    parser.add_argument('--cuda', type=int, default=args[6])
+    parser.add_argument('--epochs', type=int, default=args[7])
+    parser.add_argument('--min_epochs', type=int, default=args[8])
+    parser.add_argument('--patience', type=int, default=args[9])
+    parser.add_argument('--seed', type=int, default=args[10])
+    parser.add_argument('--log_freq', type=int, default=args[11])
+    parser.add_argument('--eval_freq', type=int, default=args[12])
+    parser.add_argument('--val_prop', type=float, default=args[13])
+    parser.add_argument('--test_prop', type=float, default=args[14])
+    parser.add_argument('--double_precision', type=int, default=args[15])
+    parser.add_argument('--dropout', type=float, default=args[16])
+    parser.add_argument('--normalize_adj', type=bool, default=args[17])
+    parser.add_argument('--normalize_feats', type=bool, default=args[18])
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b0dec0fbdb6edb2deaadf67c24db44e9dd509930
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# G-Hypeddings
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad005b655e398136449951499c1fdb39e547cd5c
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,10 @@
+from Ghypeddings.H2HGCN.h2hgcn import H2HGCN
+from Ghypeddings.HGCAE.hgcae import HGCAE
+from Ghypeddings.HGCN.hgcn import HGCN
+from Ghypeddings.HGNN.hgnn import HGNN
+from Ghypeddings.Poincare.poincare import POINCARE
+from Ghypeddings.PVAE.pvae import PVAE
+
+from Ghypeddings.datasets.datasets import CIC_DDoS2019
+from Ghypeddings.datasets.datasets import AWID3
+from Ghypeddings.datasets.datasets import TON_IoT
\ No newline at end of file
diff --git a/classifiers/__init__.py b/classifiers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..74f7dd6b3457945bb91e5e4a35e11e1ff37cc23b
--- /dev/null
+++ b/classifiers/__init__.py
@@ -0,0 +1,7 @@
+from Ghypeddings.classifiers.svm import SVM
+from Ghypeddings.classifiers.mlp import mlp
+from Ghypeddings.classifiers.decision_tree import decision_tree
+from Ghypeddings.classifiers.random_forest import random_forest
+from Ghypeddings.classifiers.adaboost import adaboost
+from Ghypeddings.classifiers.knn import KNN
+from Ghypeddings.classifiers.naive_bayes import naive_bayes
\ No newline at end of file
diff --git a/classifiers/adaboost.py b/classifiers/adaboost.py
new file mode 100644
index 0000000000000000000000000000000000000000..c22a107ee4a296ff621ab57ba32249d3723bd992
--- /dev/null
+++ b/classifiers/adaboost.py
@@ -0,0 +1,16 @@
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+
+def adaboost(X,y,test_split,seed,n_estimators=10):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed)
+    ada_boost = AdaBoostClassifier(n_estimators=n_estimators, random_state=seed)
+    ada_boost.fit(X_train, y_train)
+    y_pred = ada_boost.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred)
+    recall = recall_score(y_test, y_pred)
+    precision = precision_score(y_test, y_pred)
+    roc_auc = roc_auc_score(y_test, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
diff --git a/classifiers/decision_tree.py b/classifiers/decision_tree.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ae89c09289a0c313a2fca52e068d93e7fb0d7cf
--- /dev/null
+++ b/classifiers/decision_tree.py
@@ -0,0 +1,15 @@
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+def decision_tree(X,y,test_split,seed,max_depth=4):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed)
+    clf = DecisionTreeClassifier(max_depth=max_depth)
+    clf.fit(X_train, y_train)
+    y_pred = clf.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred)
+    recall = recall_score(y_test, y_pred)
+    precision = precision_score(y_test, y_pred)
+    roc_auc = roc_auc_score(y_test, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
diff --git a/classifiers/knn.py b/classifiers/knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..98a5b0c0f3547f3e43a31049d979e1c05d24a3dd
--- /dev/null
+++ b/classifiers/knn.py
@@ -0,0 +1,15 @@
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+def KNN(X,y,test_split,seed,k=20):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed)
+    knn = KNeighborsClassifier(n_neighbors=k)
+    knn.fit(X_train, y_train)
+    y_pred = knn.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred)
+    recall = recall_score(y_test, y_pred)
+    precision = precision_score(y_test, y_pred)
+    roc_auc = roc_auc_score(y_test, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
diff --git a/classifiers/mlp.py b/classifiers/mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..44fe0d9ccc453eddd6690b0e909b45bf1aa390a4
--- /dev/null
+++ b/classifiers/mlp.py
@@ -0,0 +1,17 @@
+from sklearn.neural_network import MLPClassifier
+import time
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+def mlp(X,y,n_hidden_layers,hidden_dim,epochs=50,batch_size=64,test_split=.3,seed=42):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed)
+    mlp = MLPClassifier(hidden_layer_sizes=(n_hidden_layers, hidden_dim),learning_rate='adaptive',batch_size=batch_size ,activation='relu', solver='adam', max_iter=epochs, random_state=seed)
+    mlp.fit(X_train, y_train)
+    y_pred = mlp.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred)
+    recall = recall_score(y_test, y_pred)
+    precision = precision_score(y_test, y_pred)
+    roc_auc = roc_auc_score(y_test, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
diff --git a/classifiers/naive_bayes.py b/classifiers/naive_bayes.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a084829077a2b70c231f202f4231275725001c1
--- /dev/null
+++ b/classifiers/naive_bayes.py
@@ -0,0 +1,15 @@
+from sklearn.naive_bayes import GaussianNB
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+def naive_bayes(X,y,test_split,seed):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed)
+    clf = GaussianNB()
+    clf.fit(X_train, y_train)
+    y_pred = clf.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred)
+    recall = recall_score(y_test, y_pred)
+    precision = precision_score(y_test, y_pred)
+    roc_auc = roc_auc_score(y_test, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
diff --git a/classifiers/random_forest.py b/classifiers/random_forest.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d1c9441385a30e9a40bb815fca1d7783d1624a3
--- /dev/null
+++ b/classifiers/random_forest.py
@@ -0,0 +1,18 @@
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+
+
+
+def random_forest(X,y,test_split,seed,n_estimators=10,max_depth=4,max_features='sqrt'):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed)
+    clf = RandomForestClassifier(max_features=max_features,n_estimators=n_estimators, max_depth=max_depth, random_state=seed)
+    clf.fit(X_train, y_train)
+    y_pred = clf.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred)
+    recall = recall_score(y_test, y_pred)
+    precision = precision_score(y_test, y_pred)
+    roc_auc = roc_auc_score(y_test, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
diff --git a/classifiers/svm.py b/classifiers/svm.py
new file mode 100644
index 0000000000000000000000000000000000000000..523a92b9b32c2ba382480adab900cbb5e5e6966e
--- /dev/null
+++ b/classifiers/svm.py
@@ -0,0 +1,23 @@
+from sklearn import svm
+import sklearn.model_selection as model_selection
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+
+def SVM(X,y,test_split,seed,kernel='rbf',gamma=.5,C=.1,degree=3,average='binary'):
+    X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=1-test_split, test_size=test_split, random_state=seed)
+    
+    if(kernel == 'rbf'):
+        model = svm.SVC(kernel='rbf', gamma=gamma, C=C).fit(X_train, y_train)
+    elif(kernel == 'poly'):
+        model = svm.SVC(kernel='poly', degree=degree, C=C).fit(X_train, y_train)
+    else:
+        raise NotImplementedError
+    
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred, average=average)
+    recall = recall_score(y_test, y_pred)
+    precision = precision_score(y_test, y_pred)
+    roc_auc = roc_auc_score(y_test, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
diff --git a/clusterers/__init__.py b/clusterers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/datasets/__init__.py b/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/datasets/datasets.py b/datasets/datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e49310420190494ae7211c3e0381c656a9ade37
--- /dev/null
+++ b/datasets/datasets.py
@@ -0,0 +1,395 @@
+import os
+
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+import pickle
+import hashlib
+from sklearn.preprocessing import LabelEncoder
+
+class Dataset:
+    def __init__(self,directory,adj_path,features_path,labels_path):
+        self.adj_path = adj_path
+        self.features_path = features_path
+        self.labels_path = labels_path
+        self.directory = directory
+
+    def _get_files(self):
+        return [os.path.join(self.directory,file) for file in os.listdir(self.directory) if os.path.isfile(os.path.join(self.directory, file))]
+
+    def save_samples(self,adj,features,labels):
+        with open(self.adj_path,'wb') as f:
+            pickle.dump(adj,f)
+        with open(self.features_path,'wb') as f:
+            pickle.dump(features,f)
+        with open(self.labels_path,'wb') as f:
+            pickle.dump(labels,f)
+
+    def load_samples(self):
+        with open(self.adj_path,'rb') as f:
+            adj = pickle.load(f)
+        with open(self.features_path,'rb') as f:
+            features = pickle.load(f)
+        with open(self.labels_path,'rb') as f:
+            labels = pickle.load(f)
+        print('features:',features.shape)
+        return adj,features,labels
+
+class CIC_DDoS2019(Dataset):
+    def __init__(self):
+        super().__init__(
+            directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','original'),
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','labels.pkl')
+        )
+        self.n_classes = 2
+
+    def build(self,n_nodes):
+        df = self._create_file_bc(n_nodes)
+        columns_to_exclude = ['Unnamed: 0', 'Flow ID', ' Source IP', ' Destination IP', ' Timestamp', 'SimillarHTTP']
+        df = df.dropna(subset=df.columns.difference(columns_to_exclude))
+        for column in df.columns:
+            max_value = df.loc[df[column] != np.inf, column].max()
+            min_value = df.loc[df[column] != -np.inf, column].min()
+            df.loc[df[column] == np.inf, column] = max_value
+            df.loc[df[column] == -np.inf, column] = min_value
+        data = df.to_numpy()
+        N = data.shape[0]
+        labels = np.where(data[:,87] == 'BENIGN', 0,1)
+        adj = self._filling_adjacency_numpy(data, N, 2, 4)
+        columns_to_exclude.append(' Label')
+        df.drop(columns_to_exclude, axis=1, inplace=True)
+        features = df.to_numpy()
+        scaler = MinMaxScaler()
+        features = scaler.fit_transform(features)
+        return adj, features, labels
+    
+    def _load_file(self,path,max_per_class,list_classes=[]):
+        df = pd.read_csv(path,low_memory=False)
+        if(len(list_classes)):
+            df = df[df[' Label'].isin(list_classes)]
+            df = df.groupby([' Label']).apply(lambda x: x.sample(max_per_class)).reset_index(drop=True)
+        return df
+        
+    def _create_file_bc(self,n_nodes):
+        file_paths = self._get_files()
+        max_per_class = int(n_nodes / (self.n_classes * len(file_paths))) +1
+        dfs = []
+        for path in file_paths:
+            class_name = path.split('\\')[-1].split('.')[0]
+            list_classes = ['BENIGN',class_name]
+            df = self._load_file(path,max_per_class,list_classes)
+            dfs.append(df)
+            print('finishing loading the file : {}'.format(path))
+        df = pd.concat(dfs, ignore_index=True)
+        df = df.sample(frac=1).reset_index(drop=True)
+        print(df[' Label'].value_counts())
+        return df
+
+    def _filling_adjacency_numpy(self,data, N, source_ip_index, destination_ip_index):
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+        source_ips = data[:, source_ip_index]
+        destination_ips = data[:, destination_ip_index]
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        return adjacency
+
+class AWID3(Dataset):
+    def __init__(self):
+        super().__init__(
+            directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','original'),
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','labels.pkl')
+        )
+        self.n_classes = 2
+
+    def _hex_to_int(self,hex_string):
+        if('.' in hex_string):
+            print(hex_string)
+            hex_string = int(float(hex_string))
+            return hex_string
+        return int(str(hex_string), 16)
+        
+    def _hash_value(self,old_value):
+        return hash(str(old_value))%1e19
+    
+    def _encode_llc(self,old_value):
+        return len(str(old_value).split('-'))
+
+    
+    def _encode_multiple_hex(self,old_value):
+        words = str(old_value).split('-')
+        return sum([self._hex_to_int(self._month_to_string(word)) for word in words])
+    
+    def _encode_checksum_status(self,old_value):
+        words = str(old_value)
+        if '2' in words  or '02' in words:
+            return 2
+        else:
+            return 0
+        
+    def _encode_to_binary(self,old_value):
+        words = str(old_value)
+        if '1' in words or 'Jan' in words:
+            return 1
+        else:
+            return 0
+        
+    def _month_to_string(self, month):
+        if month == 'Jan':
+            return '1'
+        elif month == 'Feb':
+            return '2'
+        elif month == 'Mar':
+            return '3'
+        elif month == 'Apr':
+            return '4'
+        elif month == 'May':
+            return '5'
+        elif month == 'Jun':
+            return '6'
+        elif month == 'Jul':
+            return '7'
+        elif month == 'Aug':
+            return '8'
+        elif month == 'Sep':
+            return '9'
+        elif month == 'Oct':
+            return '10'
+        elif month == 'Nov':
+            return '11'
+        elif month == 'Dec':
+            return '12'
+        else:
+            return  month
+    
+    def _encode_to_avg(self,old_value):
+        if type(old_value) in [int,float]:
+            return old_value
+        if 'e-' in old_value:
+            words = float(old_value)
+            return words
+        else:
+            words = str(old_value).split('-')
+            words = [float(self._month_to_string(i)) for i in words]
+            return np.sum(words)
+    
+    def _encode_antsignal(self,old_value):
+        if type(old_value) in [int,float]:
+            return old_value
+        if 'e-' in old_value:
+            words = float(old_value)
+            return words
+        else:
+            words = str(old_value).split('-')
+            words = [-1*float(self._month_to_string(i)) for i in words if i != '']
+            return np.sum(words)
+    
+    def _encode_http_request_method(self,old_value):
+        return hash(str(old_value))%100
+    
+    def _encode_tls_protocol(self,old_value):
+        words = str(old_value)
+        if 'http2' in words:
+            return 1
+        elif 'over' in words:
+            return 2
+        else:
+            return 0
+        
+    def _encode_ip_version(self,old_value):
+        words = str(old_value)
+        if '4' in words or '04' in words or 'Apr' in words:
+            return 4
+        else:
+            return 6
+    
+    def _encode_ip_protocol(self,old_value):
+        words = str(old_value)
+        if '17' in words:
+            return 17
+        elif '6':
+            return 6
+        elif '2':
+            return 2
+        else:
+            return 0
+
+    def _process_data(self,df):
+
+        df.drop(['frame.number','frame.time','wlan_rsna_eapol.keydes.data','wlan_rsna_eapol.keydes.nonce','wlan.country_info.code','wlan.country_info.fnm','wlan.ssid','wlan.tag','wlan.tag.length','tcp.ack','tcp.ack_raw','tcp.seq','tcp.seq_raw','dns.id','http.date','http.file_data','http.location','http.request.line','http.request.uri.path','http.request.uri.query','http.request.uri.query.parameter','http.request.version','http.response.code.desc','http.response.line','http.response.phrase','http.response.version','http.response_for.uri','http.server','json.value.string','json.key','tls.handshake.extensions_key_share_group','tls.handshake.session_ticket_length','tls.handshake.version','tls.record.version','tls.handshake.extension.type','http.host','dns.a','dhcp.option.router','dhcp.option.dhcp_server_id','dhcp.option.broadcast_address','dhcp.ip.server','dhcp.ip.relay','wlan.bssid','wlan.da','wlan.ra','wlan.sa','wlan.ta','arp.src.hw_mac','arp.dst.hw_mac','arp.dst.proto_ipv4','arp.src.proto_ipv4','dhcp.hw.mac_addr','dhcp.id','dhcp.ip.client','frame.time_delta','radiotap.mactime','wlan_radio.timestamp','dns.flags.authoritative','smb2.msg_id','smb2.pid','smb2.fid','smb2.sesid','http.last_modified','smb2.tid','http.referer','smb.server_component','smb2.filename','smb2.previous_sesid','nbss.continuation_data','tcp.checksum','data.data','tcp.payload','udp.payload','dns.qry.name','dns.resp.name','http.request.full_uri','http.content_type','smb2.acct','smb2.domain','smb2.host'], axis=1, inplace=True)
+        
+        to_binary = ['radiotap.present.tsft','tcp.flags.syn','tcp.flags.ack','tcp.flags.fin','tcp.analysis','tcp.analysis.flags','tcp.flags.push','tcp.flags.reset','tcp.analysis.retransmission','dns.retransmit_request']
+        for b in to_binary:
+            df[b] = df[b].apply(self._encode_to_binary)
+
+        to_hex = ['radiotap.rxflags','wlan.analysis.kck','wlan.analysis.kek','wlan.rsn.ie.gtk.key','wlan.rsn.ie.igtk.key','wlan.rsn.ie.pmkid','wlan.fc.ds','arp.proto.type','nbss.type','smb2.buffer_code','smb2.protocol_id','smb2.data_offset','smb2.session_flags']
+        for h in to_hex:
+            df[h] = df[h].apply(self._encode_multiple_hex)
+            df[h] = df[h].astype(np.float64)
+        
+        to_avg = ['ip.ttl','data.len','tcp.dstport','tcp.srcport','udp.dstport','udp.srcport','tcp.option_len','udp.length','dns.count.add_rr','dns.count.answers','dns.count.auth_rr','dns.count.labels','dns.count.queries','dns.flags.checkdisable','dns.flags.opcode','dns.flags.response','dns.qry.name.len','dns.resp.ttl','dns.resp.len.1','tls.record.content_type','tcp.time_relative','udp.time_delta','udp.time_relative','tcp.analysis.rto_frame','tcp.time_delta','http.content_length','smb2.cmd','smb2.header_len']
+        for a in to_avg:
+            df[a] = df[a].apply(self._encode_to_avg)
+
+        # to_hash = ['data.data','tcp.payload','udp.payload','dns.qry.name','dns.resp.name','http.content_type','http.request.full_uri']
+        # for hh in to_hash:
+        #     df[hh] = df[hh].apply(self._hash_value)
+            
+        encoder = LabelEncoder()
+        string_to_int = ['arp','dhcp','mdns','dns','ssdp','http.connection','nbns','ldap']
+        for i in string_to_int:
+            df[i] =  encoder.fit_transform(df[i])
+
+        df['llc'] = df['llc'].apply(self._encode_llc)
+        df['tcp.checksum.status'] = df['tcp.checksum.status'].apply(self._encode_checksum_status)
+        df['dhcp.cookie'] = df['dhcp.cookie'].apply(lambda x: 0 if x == '0' else 1)
+        df['http.request.method'] = df['http.request.method'].apply(self._encode_http_request_method)
+        df['tls.app_data_proto'] = df['tls.app_data_proto'].apply(self._encode_tls_protocol)
+        df['ip.version'] = df['ip.version'].apply(self._encode_ip_version)
+        df['ip.proto'] = df['ip.proto'].apply(self._encode_ip_protocol)
+        df['radiotap.dbm_antsignal'] = df['radiotap.dbm_antsignal'].apply(self._encode_antsignal)
+        df['Label'] = df['Label'].apply(lambda x: 0 if x == 'Normal' else 1)
+
+        single_value_columns = df.columns[df.nunique() == 1]
+        df.drop(columns=single_value_columns,axis=1,inplace=True)
+        df = df.sample(frac=1, random_state=self.seed)
+        return df
+    
+    def _filling_adjacency_numpy(self,data):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+        source_ips = data['ip.src'].to_numpy()
+        destination_ips = data['ip.dst'].to_numpy()
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        return adjacency 
+    
+    def _load_file(self,path,max_per_class,sample=False):
+        df = pd.read_csv(path,low_memory=False)
+        if(sample):
+            real_min = df['Label'].value_counts().min()
+            print(real_min)
+            df = df.groupby(['Label']).apply(lambda x: x.sample(min(max_per_class,real_min))).reset_index(drop=True)
+        return df
+        
+    def _create_file_bc(self,n_nodes):
+        file_paths = self._get_files()
+        max_per_class = int(n_nodes / (self.n_classes * len(file_paths))) +1
+        dfs = []
+        for path in file_paths:
+            df = self._load_file(path,max_per_class,sample=True)
+            dfs.append(df)
+            print('finishing loading the file : {}'.format(path))
+        df = pd.concat(dfs, ignore_index=True)
+        df = df.sample(frac=1).reset_index(drop=True)
+        print(df['Label'].value_counts())
+        return df
+    
+    def build(self,n_nodes):
+        df = self._create_file_bc(n_nodes)
+        df['ip.dst'] =df['ip.dst'].astype(str)
+        df['ip.src'] =df['ip.src'].astype(str)
+        condition_ip_dst = (df['ip.dst'] == 'nan') ## this is a property of certain attacks
+        df.loc[condition_ip_dst,'ip.dst'] = '-1'
+        condition_ip_src = (df['ip.src'] == 'nan') ## this is a property of certain attacks
+        df.loc[condition_ip_src,'ip.src'] = '-1'
+        df = df.fillna(0)
+        df = self._process_data(df)
+        adj = self._filling_adjacency_numpy(df)
+        df.drop(['ip.src','ip.dst'],axis=1,inplace=True)
+        labels = df['Label'].to_numpy()
+        labels = labels.astype(np.bool_)
+        df.drop(['Label'],axis=1,inplace=True)
+        features = df.to_numpy()
+        scaler = MinMaxScaler()
+        features = scaler.fit_transform(features)
+        print("features:",features.shape)
+        return adj,features,labels
+
+class TON_IoT(Dataset):
+    def __init__(self):
+        super().__init__(
+            directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','original'),
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','labels.pkl')
+        )
+        self.n_classes = 2
+
+    def _hash_string_to_int(self,inp):
+        input_string = str(inp)
+        hash_object = hashlib.sha1(input_string.encode())
+        hashed_hex = hash_object.hexdigest()
+        hashed_int = int(hashed_hex, 16)
+        return hashed_int
+    
+    def _filling_adjacency_numpy(self,data):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+        source_ips = data['src_ip'].to_numpy()
+        destination_ips = data['dst_ip'].to_numpy()
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        return adjacency
+    
+    def _load_file(self,path,max_per_class,sample=False):
+        df = pd.read_csv(path,low_memory=False)
+        if(sample):
+            real_min = df['label'].value_counts().min()
+            print(real_min)
+            df = df.groupby(['label']).apply(lambda x: x.sample(min(max_per_class,real_min))).reset_index(drop=True)
+        return df
+    
+    def _create_file_bc(self,n_nodes):
+        file_paths = self._get_files()
+        max_per_class = int(n_nodes / (self.n_classes * len(file_paths))) +1
+        dfs = []
+        for path in file_paths:
+            df = self._load_file(path,max_per_class,sample=True)
+            dfs.append(df)
+            print('finishing loading the file : {}'.format(path))
+        df = pd.concat(dfs, ignore_index=True)
+        df = df.sample(frac=1).reset_index(drop=True)
+        print(df['label'].value_counts())
+        return df
+
+    def build(self,n_nodes):
+        df = self._create_file_bc(n_nodes)
+        # remove type from the drop list and sample from the type to perform multiclass classification
+        df = df.groupby(['label']).apply(lambda x: x.sample(int(self.n_nodes / 2)).reset_index(drop=True))
+        df.drop(columns=['ts','type'],inplace=True)
+        encoder = LabelEncoder()
+        string_to_int = ['proto','ssl_subject','ssl_issuer','http_referrer','service','conn_state','dns_AA','dns_RD','dns_RA','dns_rejected','ssl_version','ssl_cipher','ssl_resumed','ssl_established','http_method','http_version','http_orig_mime_types','http_resp_mime_types','weird_addl','weird_notice','http_uri','dns_query','http_user_agent','weird_name']
+        for i in string_to_int:
+            df[i] =  encoder.fit_transform(df[i])
+        # text_to_int = ['http_uri','dns_query','http_user_agent','weird_name']
+        # for j in text_to_int:
+        #     df[j] = df[j].apply(self._hash_string_to_int)
+        #     df[j] = df[j].astype(np.float64)
+        df['src_bytes'] = df['src_bytes'].apply(lambda x: 0 if x == '0.0.0.0' else x)
+        df['src_bytes'] = df['src_bytes'].astype(np.int64)
+        df['http_trans_depth'] = df['http_trans_depth'].apply(lambda x: 0 if x == '-' else x)
+        df['http_trans_depth'] = df['http_trans_depth'].astype(np.int64)
+
+        adj = self._filling_adjacency_numpy(df)
+        df.drop(['src_ip','dst_ip'],axis=1,inplace=True)
+        labels = df['label'].to_numpy()
+        labels = labels.astype(np.bool_)
+        df.drop(['label'],axis=1,inplace=True)
+        features = df.to_numpy()
+        scaler = MinMaxScaler()
+        features = scaler.fit_transform(features)
+        print("features:",features.shape)
+        return adj,features,labels
\ No newline at end of file
diff --git a/datasets/examples/AWID3/adjacency.pkl b/datasets/examples/AWID3/adjacency.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e02e23fb6a62540606b81948cfeac0db4d458af4
Binary files /dev/null and b/datasets/examples/AWID3/adjacency.pkl differ
diff --git a/datasets/examples/AWID3/features.pkl b/datasets/examples/AWID3/features.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..08f5bbabd1c222fc8b1a0c9e8b37ffc129ecb6d3
Binary files /dev/null and b/datasets/examples/AWID3/features.pkl differ
diff --git a/datasets/examples/AWID3/labels.pkl b/datasets/examples/AWID3/labels.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..255f6cf165c48c5f367365836ce1147c197b8422
Binary files /dev/null and b/datasets/examples/AWID3/labels.pkl differ
diff --git a/datasets/examples/CICDDoS2019/adjacency.pkl b/datasets/examples/CICDDoS2019/adjacency.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..317d1c9c7e9cbc3f94a1c1b39010b02abff4d67c
Binary files /dev/null and b/datasets/examples/CICDDoS2019/adjacency.pkl differ
diff --git a/datasets/examples/CICDDoS2019/features.pkl b/datasets/examples/CICDDoS2019/features.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..6e76ec06afa8b0a85d491cfeb8aef5f0631abed3
Binary files /dev/null and b/datasets/examples/CICDDoS2019/features.pkl differ
diff --git a/datasets/examples/CICDDoS2019/labels.pkl b/datasets/examples/CICDDoS2019/labels.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..81205142e3ac95f45b0e8865eec5cd4e78645f36
Binary files /dev/null and b/datasets/examples/CICDDoS2019/labels.pkl differ
diff --git a/datasets/examples/TON_IOT/adjacency.pkl b/datasets/examples/TON_IOT/adjacency.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..629011f0607e25fa38147f523af6367cb06dcae6
Binary files /dev/null and b/datasets/examples/TON_IOT/adjacency.pkl differ
diff --git a/datasets/examples/TON_IOT/features.pkl b/datasets/examples/TON_IOT/features.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..31290456dc9659885e7f9a6b3c3abc4fef4325e5
Binary files /dev/null and b/datasets/examples/TON_IOT/features.pkl differ
diff --git a/datasets/examples/TON_IOT/labels.pkl b/datasets/examples/TON_IOT/labels.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f7d1f85af5d254ce8d63032d3be24769fb6f5914
Binary files /dev/null and b/datasets/examples/TON_IOT/labels.pkl differ
diff --git a/datasets/utils.py b/datasets/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cab0b352d0ee12f4fbb580e29583e2e676adfb81
--- /dev/null
+++ b/datasets/utils.py
@@ -0,0 +1,32 @@
+import os
+import pickle as pkl
+import sys
+import time
+import scipy.sparse as sp
+import networkx as nx
+import numpy as np
+from tqdm import tqdm
+
+def hyperbolicity(adj, num_samples):
+    curr_time = time.time()
+    hyps = []
+    G = nx.from_numpy_array(adj)
+    for i in tqdm(range(num_samples)):
+        node_tuple = np.random.choice(G.nodes(), 4, replace=False)
+        s = []
+        try:
+            d01 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[1], weight=None)
+            d23 = nx.shortest_path_length(G, source=node_tuple[2], target=node_tuple[3], weight=None)
+            d02 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[2], weight=None)
+            d13 = nx.shortest_path_length(G, source=node_tuple[1], target=node_tuple[3], weight=None)
+            d03 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[3], weight=None)
+            d12 = nx.shortest_path_length(G, source=node_tuple[1], target=node_tuple[2], weight=None)
+            s.append(d01 + d23)
+            s.append(d02 + d13)
+            s.append(d03 + d12)
+            s.sort()
+            hyps.append((s[-1] - s[-2]) / 2)
+        except Exception as e:
+            continue
+    print('Time for hyp: ', time.time() - curr_time)
+    return max(hyps)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..864bc7ec3743fdd6d8c14e512d5809354dc33aa9
Binary files /dev/null and b/requirements.txt differ