diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..c18dd8d83ceed1806b50b0aaa46beb7e335fff13
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__/
diff --git a/H2HGCN/.gitignore b/H2HGCN/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c
--- /dev/null
+++ b/H2HGCN/.gitignore
@@ -0,0 +1 @@
+__pycache__/
\ No newline at end of file
diff --git a/H2HGCN/__init__.py b/H2HGCN/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/H2HGCN/h2hgcn.py b/H2HGCN/h2hgcn.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d92fa94092b6f9e1ba716be17264f30d4765d7e
--- /dev/null
+++ b/H2HGCN/h2hgcn.py
@@ -0,0 +1,162 @@
+from __future__ import division
+from __future__ import print_function
+import logging
+import os
+import time
+import numpy as np
+import torch
+from Ghypeddings.H2HGCN.models.base_models import NCModel
+from Ghypeddings.H2HGCN.utils.data_utils import process_data
+from Ghypeddings.H2HGCN.utils.train_utils import format_metrics, create_args
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+import warnings
+warnings.filterwarnings('ignore')
+
+class H2HGCN:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='leaky_relu',
+                select_manifold='lorentz',
+                num_centroid=10,
+                lr_stie=0.009,
+                stie_vars=[],
+                stiefel_optimizer='rsgd',
+                eucl_vars=[],
+                grad_clip=None,
+                optimizer='Adam',
+                weight_decay=0.01,
+                lr=0.01,
+                lr_scheduler='step',
+                lr_gamma=0.5,
+                step_lr_gamma=0.1,
+                step_lr_reduce_freq=500,
+                proj_init='xavier',
+                tie_weight=True,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.15,
+                test_prop=0.15,
+                double_precision=0,
+                dropout=0.1,
+                normalize_adj=False,
+                normalize_feats=True
+                ):
+        
+        self.args = create_args(dim,c,num_layers,bias,act,select_manifold,num_centroid,lr_stie,stie_vars,stiefel_optimizer,eucl_vars,grad_clip,optimizer,weight_decay,lr,lr_scheduler,lr_gamma,step_lr_gamma,step_lr_reduce_freq,proj_init,tie_weight,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
+        
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+        self.model = NCModel(self.args)
+        self.optimizer, self.lr_scheduler, self.stiefel_optimizer, self.stiefel_lr_scheduler = set_up_optimizer_scheduler(True, self.args, self.model, self.args.lr, self.args.lr_stie)
+        
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+        self.best_emb = None
+
+
+    def fit(self):
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(f'Using: {self.args.device}')
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+        
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        real_losses = []
+        train_losses = []
+
+        for epoch in range(self.args.epochs):       
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            self.stiefel_optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight']) 
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.stiefel_optimizer.step()
+            self.lr_scheduler.step()
+            self.stiefel_lr_scheduler.step()
+
+            train_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(train_losses[0])
+            elif (best_losses[-1] > train_losses[-1]):
+                best_losses.append(train_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                    'lr: {:04f}, stie_lr: {:04f}'.format(self.lr_scheduler.get_lr()[0], self.stiefel_lr_scheduler.get_lr()[0]),
+                                    format_metrics(train_metrics, 'train'),
+                                    'time: {:.4f}s'.format(time.time() - t)
+                                    ]))
+                
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
+                val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                real_losses.append(val_metrics['loss'].item())
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                if self.model.has_improved(best_val_metrics, val_metrics):
+                    self.best_emb = embeddings
+                    best_val_metrics = val_metrics
+                    counter = 0
+                else:
+                    counter += 1
+                    if counter == self.args.patience and epoch > self.args.min_epochs:
+                        logging.info("Early stopping")
+                        break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+        return {'val':real_losses,'best':best_losses,'train':train_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
+
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'], self.data['hgnn_adj'], self.data['hgnn_weight'])
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        logging.info(" ".join([format_metrics(val_metrics, 'test')]))
+        return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
+    
+    def save_embeddings(self):
+        #tb_embeddings_euc = self.model.manifold.log_map_zero(self.best_emb)
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        #for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_hyp.csv')
+        #euc_file_path = os.path.join(os.getcwd(),'h2hgcn_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        #np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
diff --git a/H2HGCN/layers/CentroidDistance.py b/H2HGCN/layers/CentroidDistance.py
new file mode 100644
index 0000000000000000000000000000000000000000..546447492330997a479f47d34b3ad22094d45288
--- /dev/null
+++ b/H2HGCN/layers/CentroidDistance.py
@@ -0,0 +1,56 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.H2HGCN.utils import *
+
+class CentroidDistance(nn.Module):
+    """
+    Implement a model that calculates the pairwise distances between node representations
+    and centroids
+    """
+    def __init__(self, args, logger, manifold):
+        super(CentroidDistance, self).__init__()
+        self.args = args
+        self.logger = logger
+        self.manifold = manifold
+        self.debug = False
+
+        # centroid embedding
+        self.centroid_embedding = nn.Embedding(
+            args.num_centroid, args.dim,
+            sparse=False,
+            scale_grad_by_freq=False,
+        )
+        nn_init(self.centroid_embedding, self.args.proj_init)
+        args.eucl_vars.append(self.centroid_embedding)
+
+    def forward(self, node_repr, mask):
+        """
+        Args:
+            node_repr: [node_num, dim] 
+            mask: [node_num, 1] 1 denote real node, 0 padded node
+        return:
+            graph_centroid_dist: [1, num_centroid]
+            node_centroid_dist: [1, node_num, num_centroid]
+        """
+        node_num = node_repr.size(0)
+
+        # broadcast and reshape node_repr to [node_num * num_centroid, dim]
+        node_repr =  node_repr.unsqueeze(1).expand(
+                                                -1,
+                                                self.args.num_centroid,
+                                                -1).contiguous().view(-1, self.args.dim)
+
+        # broadcast and reshape centroid embeddings to [node_num * num_centroid, dim]
+        centroid_repr = self.manifold.exp_map_zero(self.centroid_embedding(th.arange(self.args.num_centroid).cuda().to(self.args.device)))
+        centroid_repr = centroid_repr.unsqueeze(0).expand(
+                                                node_num,
+                                                -1,
+                                                -1).contiguous().view(-1, self.args.dim) 
+        # get distance
+        node_centroid_dist = self.manifold.distance(node_repr, centroid_repr) 
+        node_centroid_dist = node_centroid_dist.view(1, node_num, self.args.num_centroid) 
+        # average pooling over nodes
+        graph_centroid_dist = th.sum(node_centroid_dist, dim=1) / th.sum(mask)
+        return graph_centroid_dist, node_centroid_dist
+
diff --git a/H2HGCN/layers/__init__.py b/H2HGCN/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/H2HGCN/layers/__init__.py
@@ -0,0 +1 @@
+
diff --git a/H2HGCN/layers/layers.py b/H2HGCN/layers/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..48d5c1f2799dbc0cb8dc5fbbc4b8236de4dc9abf
--- /dev/null
+++ b/H2HGCN/layers/layers.py
@@ -0,0 +1,24 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+
+    def __init__(self, args, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+        args.eucl_vars.append(self.linear)
+
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
\ No newline at end of file
diff --git a/H2HGCN/manifolds/LorentzManifold.py b/H2HGCN/manifolds/LorentzManifold.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ae351ce44b283e97f1bceb23eaf2bbcb9fa791b
--- /dev/null
+++ b/H2HGCN/manifolds/LorentzManifold.py
@@ -0,0 +1,194 @@
+"""Lorentz manifold."""
+import torch
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+import torch
+from Ghypeddings.H2HGCN.utils import *
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+from Ghypeddings.H2HGCN.manifolds import *
+from Ghypeddings.H2HGCN.utils.math_utils import arcosh, cosh, sinh 
+
+_eps = 1e-10
+
+class LorentzManifold:
+
+    def __init__(self, args, eps=1e-3, norm_clip=1, max_norm=1e3):
+        self.args = args
+        self.eps = eps
+        self.norm_clip = norm_clip
+        self.max_norm = max_norm
+
+    def minkowski_dot(self, x, y, keepdim=True):
+        res = torch.sum(x * y, dim=-1) - 2 * x[..., 0] * y[..., 0]
+        if keepdim:
+            res = res.view(res.shape + (1,))
+        return res
+
+
+    def sqdist(self, x, y, c):
+        K = 1. / c
+        prod = self.minkowski_dot(x, y)
+        eps = {torch.float32: 1e-7, torch.float64: 1e-15}
+        theta = torch.clamp(-prod / K, min=1.0 + eps[x.dtype])
+        sqdist = K * arcosh(theta) ** 2
+        return torch.clamp(sqdist, max=50.0)
+
+
+    @staticmethod
+    def ldot(u, v, keepdim=False):
+        """
+        Lorentzian Scalar Product
+        Args:
+            u: [batch_size, d + 1]
+            v: [batch_size, d + 1]
+        Return:
+            keepdim: False [batch_size]
+            keepdim: True  [batch_size, 1]
+        """
+        d = u.size(1) - 1
+        uv = u * v
+        uv = th.cat((-uv.narrow(1, 0, 1), uv.narrow(1, 1, d)), dim=1) 
+        return th.sum(uv, dim=1, keepdim=keepdim)
+
+    def from_lorentz_to_poincare(self, x):
+        """
+        Args:
+            u: [batch_size, d + 1]
+        """
+        d = x.size(-1) - 1
+        return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
+
+    def from_poincare_to_lorentz(self, x):
+        """
+        Args:
+            u: [batch_size, d]
+        """
+        x_norm_square = th_dot(x, x)
+        return th.cat((1 + x_norm_square, 2 * x), dim=1) / (1 - x_norm_square + self.eps)
+
+    def distance(self, u, v):
+        d = -LorentzDot.apply(u, v)
+        dis = Acosh.apply(d, self.eps)
+        return dis
+
+    def normalize(self, w):
+        """
+        Normalize vector such that it is located on the Lorentz
+        Args:
+            w: [batch_size, d + 1]
+        """
+        d = w.size(-1) - 1
+        narrowed = w.narrow(-1, 1, d)
+        if self.max_norm:
+            narrowed = th.renorm(narrowed.view(-1, d), 2, 0, self.max_norm)
+        first = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
+        first = th.sqrt(first)
+        tmp = th.cat((first, narrowed), dim=1)
+        return tmp
+
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+
+    def rgrad(self, p, d_p):
+        """Riemannian gradient for Lorentz"""
+        u = d_p
+        x = p
+        u.narrow(-1, 0, 1).mul_(-1)
+        u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
+        return d_p
+
+    def exp_map_zero(self, v):
+        zeros = th.zeros_like(v)
+        zeros[:, 0] = 1
+        return self.exp_map_x(zeros, v)
+
+    def exp_map_x(self, p, d_p, d_p_normalize=True, p_normalize=True):
+        if d_p_normalize:
+            d_p = self.normalize_tan(p, d_p)
+
+        ldv = self.ldot(d_p, d_p, keepdim=True)
+        nd_p = th.sqrt(th.clamp(ldv + self.eps, _eps))
+
+        t = th.clamp(nd_p, max=self.norm_clip)
+        newp = (th.cosh(t) * p) + (th.sinh(t) * d_p / nd_p)
+
+        if p_normalize:
+            newp = self.normalize(newp)
+        return newp
+
+    def normalize_tan(self, x_all, v_all):
+        d = v_all.size(1) - 1
+        x = x_all.narrow(1, 1, d)
+        xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
+        tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
+        tmp = th.sqrt(tmp)
+        return th.cat((xv / tmp, v_all.narrow(1, 1, d)), dim=1)
+
+    def log_map_zero(self, y, i=-1):
+        zeros = th.zeros_like(y)
+        zeros[:, 0] = 1
+        return self.log_map_x(zeros, y)
+
+    def log_map_x(self, x, y, normalize=False):
+        """Logarithmic map on the Lorentz Manifold"""
+        xy = self.ldot(x, y).unsqueeze(-1)
+        tmp = th.sqrt(th.clamp(xy * xy - 1 + self.eps, _eps))
+        v = Acosh.apply(-xy, self.eps) / (
+            tmp
+        ) * th.addcmul(y, xy, x)
+        if normalize:
+            result = self.normalize_tan(x, v)
+        else:
+            result = v
+        return result
+
+    def parallel_transport(self, x, y, v):
+        """Parallel transport for Lorentz"""
+        v_ = v
+        x_ = x
+        y_ = y
+
+        xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
+        vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
+        vnew = v_ + vy / (1 - xy) * (x_ + y_)
+        return vnew
+
+    def metric_tensor(self, x, u, v):
+        return self.ldot(u, v, keepdim=True)
+
+
+
+class LorentzDot(Function):
+    @staticmethod
+    def forward(ctx, u, v):
+        ctx.save_for_backward(u, v)
+        return LorentzManifold.ldot(u, v)
+
+    @staticmethod
+    def backward(ctx, g):
+        u, v = ctx.saved_tensors
+        g = g.unsqueeze(-1).expand_as(u).clone()
+        g.narrow(-1, 0, 1).mul_(-1)
+        return g * v, g * u
+
+class Acosh(Function):
+    @staticmethod
+    def forward(ctx, x, eps): 
+        z = th.sqrt(th.clamp(x * x - 1 + eps, _eps))
+        ctx.save_for_backward(z)
+        ctx.eps = eps
+        xz = x + z
+        tmp = th.log(xz)
+        return tmp
+
+    @staticmethod
+    def backward(ctx, g):
+        z, = ctx.saved_tensors
+        z = th.clamp(z, min=ctx.eps)
+        z = g / z
+        return z, None
+
+
diff --git a/H2HGCN/manifolds/StiefelManifold.py b/H2HGCN/manifolds/StiefelManifold.py
new file mode 100644
index 0000000000000000000000000000000000000000..f42b62f67b913d0bace7abe4c6352c311d4d85a3
--- /dev/null
+++ b/H2HGCN/manifolds/StiefelManifold.py
@@ -0,0 +1,41 @@
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+from Ghypeddings.clusterers.utils import *
+
+_eps = 1e-10
+
+class StiefelManifold:
+
+    def __init__(self, args, logger, eps=1e-3, norm_clip=1, max_norm=1e3):
+        self.args = args
+        self.logger = logger
+        self.eps = eps
+        self.norm_clip = norm_clip
+        self.max_norm = max_norm
+
+    def normalize(self, w):
+        return w
+
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+
+    def symmetric(self, A):
+        return 0.5 * (A + A.t())
+
+    def rgrad(self, A, B):
+        out = B - A.mm(self.symmetric(A.transpose(0,1).mm(B)))
+        return out
+
+    def exp_map_x(self, A, ref):
+        data = A + ref
+        Q, R = data.qr()
+        # To avoid (any possible) negative values in the output matrix, we multiply the negative values by -1
+        sign = (R.diag().sign() + 0.5).sign().diag()
+        out = Q.mm(sign)
+        return out
+
+
+
diff --git a/H2HGCN/manifolds/__init__.py b/H2HGCN/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..99a7356c05028872305abad28f5ac410681319a4
--- /dev/null
+++ b/H2HGCN/manifolds/__init__.py
@@ -0,0 +1 @@
+from Ghypeddings.H2HGCN.manifolds.LorentzManifold import LorentzManifold
\ No newline at end of file
diff --git a/H2HGCN/models/__init__.py b/H2HGCN/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/H2HGCN/models/base_models.py b/H2HGCN/models/base_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0a235cfac0a099d484449fca494e894411ee20a
--- /dev/null
+++ b/H2HGCN/models/base_models.py
@@ -0,0 +1,76 @@
+import numpy as np
+from sklearn.metrics import roc_auc_score, average_precision_score
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import Ghypeddings.H2HGCN.models.encoders as encoders
+from Ghypeddings.H2HGCN.models.encoders import H2HGCN
+from Ghypeddings.H2HGCN.models.decoders import model2decoder
+from Ghypeddings.H2HGCN.utils.eval_utils import acc_f1
+from Ghypeddings.H2HGCN.manifolds import LorentzManifold
+
+ 
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.c = torch.Tensor([1.]).cuda().to(args.device)
+        args.manifold = self.manifold = LorentzManifold(args)
+        args.feat_dim = args.feat_dim + 1
+        # add 1 for Lorentz as the degree of freedom is d - 1 with d dimensions
+        args.dim = args.dim + 1
+        self.nnodes = args.n_nodes
+        self.encoder = H2HGCN(args, 1)
+
+    def encode(self, x, hgnn_adj, hgnn_weight):
+        h = self.encoder.encode(x, hgnn_adj, hgnn_weight)
+        return h
+
+    def compute_metrics(self, embeddings, data, split):
+        raise NotImplementedError
+
+    def init_metric_dict(self):
+        raise NotImplementedError
+
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+
+
+class NCModel(BaseModel):
+    """
+    Base model for node classification task.
+    """
+
+    def __init__(self, args):
+        super(NCModel, self).__init__(args)
+        self.decoder = model2decoder(self.c, args)
+        if args.n_classes > 2:
+            self.f1_average = 'micro'
+        else:
+            self.f1_average = 'binary'
+        
+        self.weights = torch.Tensor([1.] * args.n_classes)
+        if not args.cuda == -1:
+            self.weights = self.weights.to(args.device)
+
+    def decode(self, h, adj, idx):
+        output = self.decoder.decode(h, adj)
+        return F.log_softmax(output[idx], dim=1)
+
+
+    def compute_metrics(self, embeddings, data, split):
+        idx = data[f'idx_{split}']
+        output = self.decode(embeddings, data['adj_train_norm'], idx)
+        loss = F.nll_loss(output, data['labels'][idx], self.weights)
+        acc, f1 , recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average)
+        metrics = {'loss': loss, 'acc': acc, 'f1': f1 , 'recall':recall,'precision':precision,'roc_auc':roc_auc}
+        return metrics
+
+    def init_metric_dict(self):
+        return {'acc': -1, 'f1': -1}
+
+    def has_improved(self, m1, m2):
+        return m1["f1"] < m2["f1"]
\ No newline at end of file
diff --git a/H2HGCN/models/decoders.py b/H2HGCN/models/decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..55d4be58badeb13f8ecaa040900ac3289fd065ac
--- /dev/null
+++ b/H2HGCN/models/decoders.py
@@ -0,0 +1,42 @@
+"""Graph decoders."""
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.H2HGCN.layers.layers import Linear
+
+class Decoder(nn.Module):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+
+    def decode(self, x, adj):
+        if self.decode_adj:
+            input = (x, adj)
+            probs, _ = self.cls.forward(input)
+        else:
+            probs = self.cls.forward(x)
+        return probs
+
+
+class MyDecoder(Decoder):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+
+    def __init__(self, c, args):
+        super(MyDecoder, self).__init__(c)
+        self.input_dim = args.num_centroid
+        self.output_dim = args.n_classes
+        act = lambda x: x
+        self.cls = Linear(args, self.input_dim, self.output_dim, args.dropout, act, args.bias)
+        self.decode_adj = False
+
+    def decode(self, x, adj):
+        h = x
+        return super(MyDecoder, self).decode(h, adj)
+
+model2decoder = MyDecoder
+
diff --git a/H2HGCN/models/encoders.py b/H2HGCN/models/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5ab9313c1506ffd7984ead39591098158f2dae1
--- /dev/null
+++ b/H2HGCN/models/encoders.py
@@ -0,0 +1,264 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import Ghypeddings.H2HGCN.utils.math_utils as pmath
+import torch as th
+from Ghypeddings.H2HGCN.utils import *
+from Ghypeddings.H2HGCN.utils import pre_utils
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+from Ghypeddings.H2HGCN.manifolds import *
+from Ghypeddings.H2HGCN.layers.CentroidDistance import CentroidDistance
+
+
+class H2HGCN(nn.Module):
+
+    def __init__(self, args, logger):
+        super(H2HGCN, self).__init__()
+        self.debug = False
+        self.args = args
+        self.logger = logger
+        self.set_up_params()
+        self.activation = nn.SELU()
+        fd = args.feat_dim - 1
+        self.linear = nn.Linear(
+                int(fd), int(args.dim),
+        )
+        nn_init(self.linear, self.args.proj_init)
+        self.args.eucl_vars.append(self.linear)	
+
+        self.distance = CentroidDistance(args, logger, args.manifold)
+
+
+    def create_params(self):
+        """
+        create the GNN params for a specific msg type
+        """
+        msg_weight = []
+        layer = self.args.num_layers if not self.args.tie_weight else 1
+        for iii in range(layer):
+            M = th.zeros([self.args.dim-1, self.args.dim-1], requires_grad=True)
+            init_weight(M, 'orthogonal')
+            M = nn.Parameter(M)
+            self.args.stie_vars.append(M)
+            msg_weight.append(M)
+        return nn.ParameterList(msg_weight)
+
+    def set_up_params(self):
+        """
+        set up the params for all message types
+        """
+        self.type_of_msg = 1
+
+        for i in range(0, self.type_of_msg):
+            setattr(self, "msg_%d_weight" % i, self.create_params())
+
+    def apply_activation(self, node_repr):
+        """
+        apply non-linearity for different manifolds
+        """
+        if self.args.select_manifold == "poincare":
+            return self.activation(node_repr)
+        elif self.args.select_manifold == "lorentz":
+            return self.args.manifold.from_poincare_to_lorentz(
+                self.activation(self.args.manifold.from_lorentz_to_poincare(node_repr))
+            )
+
+    def split_graph_by_negative_edge(self, adj_mat, weight):
+        """
+        Split the graph according to positive and negative edges.
+        """
+        mask = weight > 0
+        neg_mask = weight < 0
+
+        pos_adj_mat = adj_mat * mask.long()
+        neg_adj_mat = adj_mat * neg_mask.long()
+        pos_weight = weight * mask.float()
+        neg_weight = -weight * neg_mask.float()
+        return pos_adj_mat, pos_weight, neg_adj_mat, neg_weight
+
+    def split_graph_by_type(self, adj_mat, weight):
+        """
+        split the graph according to edge type for multi-relational datasets
+        """
+        multi_relation_adj_mat = []
+        multi_relation_weight = []
+        for relation in range(1, self.args.edge_type):
+            mask = (weight.int() == relation)
+            multi_relation_adj_mat.append(adj_mat * mask.long())
+            multi_relation_weight.append(mask.float())
+        return multi_relation_adj_mat, multi_relation_weight
+
+    def split_input(self, adj_mat, weight):
+        return [adj_mat], [weight]
+
+    def p2k(self, x, c):
+        denom = 1 + c * x.pow(2).sum(-1, keepdim=True)
+        return 2 * x / denom
+
+    def k2p(self, x, c):
+        denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True))
+        return x / denom
+
+    def lorenz_factor(self, x, *, c=1.0, dim=-1, keepdim=False):
+        """
+            Calculate Lorenz factors
+        """
+        x_norm = x.pow(2).sum(dim=dim, keepdim=keepdim)
+        x_norm = torch.clamp(x_norm, 0, 0.9)
+        tmp = 1 / torch.sqrt(1 - c * x_norm)
+        return tmp
+     
+    def from_lorentz_to_poincare(self, x):
+        """
+        Args:
+            u: [batch_size, d + 1]
+        """
+        d = x.size(-1) - 1
+        return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
+
+    def h2p(self, x):
+        return self.from_lorentz_to_poincare(x)
+
+    def from_poincare_to_lorentz(self, x, eps=1e-3):
+        """
+        Args:
+            u: [batch_size, d]
+        """
+        x_norm_square = x.pow(2).sum(-1, keepdim=True)
+        tmp = th.cat((1 + x_norm_square, 2 * x), dim=1)
+        tmp = tmp / (1 - x_norm_square)
+        return  tmp
+
+    def p2h(self, x):
+        return  self.from_poincare_to_lorentz(x)
+
+    def p2k(self, x, c=1.0):
+        denom = 1 + c * x.pow(2).sum(-1, keepdim=True)
+        return 2 * x / denom
+
+    def k2p(self, x, c=1.0):
+        denom = 1 + torch.sqrt(1 - c * x.pow(2).sum(-1, keepdim=True))
+        return x / denom
+
+    def h2k(self, x):
+        tmp = x.narrow(-1, 1, x.size(-1)-1) / x.narrow(-1, 0, 1)
+        return tmp
+        
+    def k2h(self, x):
+        x_norm_square = x.pow(2).sum(-1, keepdim=True)
+        x_norm_square = torch.clamp(x_norm_square, max=0.9)
+        tmp = torch.ones((x.size(0),1)).cuda().to(self.args.device)
+        tmp1 = th.cat((tmp, x), dim=1)
+        tmp2 = 1.0 / torch.sqrt(1.0 - x_norm_square)
+        tmp3 = (tmp1 * tmp2)
+        return tmp3 
+
+
+    def hyperbolic_mean(self, y, node_num, max_neighbor, real_node_num, weight, dim=0, c=1.0, ):
+        '''
+        y [node_num * max_neighbor, dim]
+        '''
+        x = y[0:real_node_num*max_neighbor, :]
+        weight_tmp = weight.view(-1,1)[0:real_node_num*max_neighbor, :]
+        x = self.h2k(x)
+        
+        lamb = self.lorenz_factor(x, c=c, keepdim=True)
+        lamb = lamb  * weight_tmp 
+        lamb = lamb.view(real_node_num, max_neighbor, -1)
+
+        x = x.view(real_node_num, max_neighbor, -1) 
+        k_mean = (torch.sum(lamb * x, dim=1, keepdim=True) / (torch.sum(lamb, dim=1, keepdim=True))).squeeze()
+        h_mean = self.k2h(k_mean)
+
+        virtual_mean = torch.cat((torch.tensor([[1.0]]), torch.zeros(1,y.size(-1)-1)), 1).cuda().to(self.args.device)
+        tmp = virtual_mean.repeat(node_num-real_node_num, 1)
+
+        mean = torch.cat((h_mean, tmp), 0)
+        return mean	
+
+    def test_lor(self, A):
+        tmp1 = (A[:,0] * A[:,0]).view(-1)
+        tmp2 = A[:,1:]
+        tmp2 = th.diag(tmp2.mm(tmp2.transpose(0,1)))
+        return (tmp1 - tmp2)
+
+    def retrieve_params(self, weight, step):
+        """
+        Args:
+            weight: a list of weights
+            step: a certain layer
+        """
+        layer_weight = th.cat((th.zeros((self.args.dim-1, 1)).cuda().to(self.args.device), weight[step]), dim=1)
+        tmp = th.zeros((1, self.args.dim)).cuda().to(self.args.device)
+        tmp[0,0] = 1
+        layer_weight = th.cat((tmp, layer_weight), dim=0)
+        return layer_weight
+
+    def aggregate_msg(self, node_repr, adj_mat, weight, layer_weight, mask):
+        """
+        message passing for a specific message type.
+        """
+        node_num, max_neighbor = adj_mat.shape[0], adj_mat.shape[1] 
+        combined_msg = node_repr.clone()
+
+        tmp = self.test_lor(node_repr)
+        msg = th.mm(node_repr, layer_weight) * mask
+        real_node_num = (mask>0).sum()
+        
+        # select out the neighbors of each node
+        neighbors = th.index_select(msg, 0, adj_mat.view(-1))
+        combined_msg = self.hyperbolic_mean(neighbors, node_num, max_neighbor, real_node_num, weight)
+        return combined_msg 
+
+    def get_combined_msg(self, step, node_repr, adj_mat, weight, mask):
+        """
+        perform message passing in the tangent space of x'
+        """
+        gnn_layer = 0 if self.args.tie_weight else step
+        combined_msg = None
+        for relation in range(0, self.type_of_msg):
+            layer_weight = self.retrieve_params(getattr(self, "msg_%d_weight" % relation), gnn_layer)
+            aggregated_msg = self.aggregate_msg(node_repr,
+                                                adj_mat[relation],
+                                                weight[relation],
+                                                layer_weight, mask)
+            combined_msg = aggregated_msg if combined_msg is None else (combined_msg + aggregated_msg)
+        return combined_msg
+
+
+    def encode(self, node_repr, adj_list, weight):
+        node_repr = self.activation(self.linear(node_repr))
+        adj_list, weight = self.split_input(adj_list, weight)
+        
+        mask = torch.ones((node_repr.size(0),1)).cuda().to(self.args.device)
+        node_repr = self.args.manifold.exp_map_zero(node_repr)
+
+        for step in range(self.args.num_layers):
+            node_repr = node_repr * mask
+            tmp = node_repr
+            combined_msg = self.get_combined_msg(step, node_repr, adj_list, weight, mask)
+            combined_msg = (combined_msg) * mask
+            node_repr = combined_msg * mask
+            node_repr = self.apply_activation(node_repr) * mask
+            real_node_num = (mask>0).sum()
+            node_repr = self.args.manifold.normalize(node_repr)
+        _, node_centroid_sim = self.distance(node_repr, mask) 
+        return node_centroid_sim.squeeze()
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x, adj):
+        if self.encode_graph:
+            input = (x, adj)
+            output, _ = self.layers.forward(input)
+        else:
+            output = self.layers.forward(x)
+        return output
diff --git a/H2HGCN/optimizers/__init__.py b/H2HGCN/optimizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..23027ab1847bcd1d4e3c46d7ae72fbfba2fc86b5
--- /dev/null
+++ b/H2HGCN/optimizers/__init__.py
@@ -0,0 +1 @@
+from torch.optim import Adam
diff --git a/H2HGCN/optimizers/rsgd.py b/H2HGCN/optimizers/rsgd.py
new file mode 100644
index 0000000000000000000000000000000000000000..968b97444edb76491fa39cdc65636cdf9fc6b432
--- /dev/null
+++ b/H2HGCN/optimizers/rsgd.py
@@ -0,0 +1,29 @@
+import torch as th
+from torch.optim.optimizer import Optimizer, required
+from Ghypeddings.H2HGCN.utils import *
+import os
+import math
+
+class RiemannianSGD(Optimizer):
+    """Riemannian stochastic gradient descent.
+    """
+    def __init__(self, args, params, lr):
+        defaults = dict(lr=lr)
+        self.args = args
+        super(RiemannianSGD, self).__init__(params, defaults)
+
+    def step(self, lr=None):
+        """
+        Performs a single optimization step.
+        """
+        loss = None
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad.data
+                d_p = self.args.manifold.rgrad(p, d_p)
+                if lr is None:
+                    lr = group['lr']
+                p.data = self.args.manifold.exp_map_x(p, -lr * d_p)
+        return loss
diff --git a/H2HGCN/utils/__init__.py b/H2HGCN/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c3b5149888b0c78501eb36595aff1f4f4027b8c
--- /dev/null
+++ b/H2HGCN/utils/__init__.py
@@ -0,0 +1 @@
+from Ghypeddings.H2HGCN.utils.pre_utils import *
\ No newline at end of file
diff --git a/H2HGCN/utils/data_utils.py b/H2HGCN/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f726ddff75b141d465f387292b13fa3fa25e6f2e
--- /dev/null
+++ b/H2HGCN/utils/data_utils.py
@@ -0,0 +1,102 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+from Ghypeddings.H2HGCN.utils.pre_utils import *
+
+def convert_hgnn_adj(adj):
+    hgnn_adj = [[i] for i in range(adj.shape[0])]
+    hgnn_weight = [[1] for i in range(adj.shape[0])]
+    for i in range(adj.shape[0]):
+        for j in range(adj.shape[1]):
+            if adj[i,j] == 1:
+                hgnn_adj[i].append(j)
+                hgnn_weight[i].append(1)
+
+    max_len = max([len(i) for i in hgnn_adj])
+    normalize_weight(hgnn_adj, hgnn_weight)
+ 
+    hgnn_adj = pad_sequence(hgnn_adj, max_len)
+    hgnn_weight = pad_sequence(hgnn_weight, max_len)
+    hgnn_adj = np.array(hgnn_adj)
+    hgnn_weight = np.array(hgnn_weight)
+    return torch.from_numpy(hgnn_adj).cuda(), torch.from_numpy(hgnn_weight).cuda().float()
+
+
+def process_data(args,adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats
+    )
+    return data
+
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj)
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+
+def process_data_nc(args,adj,features,labels):
+    adj = sp.csr_matrix(adj)
+    hgnn_adj, hgnn_weight = convert_hgnn_adj(adj.todense())
+    idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': adj, 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test, 'hgnn_adj': hgnn_adj, 'hgnn_weight': hgnn_weight}
+    return data
\ No newline at end of file
diff --git a/H2HGCN/utils/eval_utils.py b/H2HGCN/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a17797a6916957f4b9f0962f77bfe15000ecb4e
--- /dev/null
+++ b/H2HGCN/utils/eval_utils.py
@@ -0,0 +1,13 @@
+from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score
+ 
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels,preds)
+    f1 = f1_score(labels,preds , average=average)
+    recall = recall_score(labels,preds)
+    precision = precision_score(labels,preds )
+    roc_auc = roc_auc_score(labels,preds)
+    return accuracy, f1 , recall,precision, roc_auc
\ No newline at end of file
diff --git a/H2HGCN/utils/math_utils.py b/H2HGCN/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cf278ed7ce59b97f4793f5def3218f3e830d473
--- /dev/null
+++ b/H2HGCN/utils/math_utils.py
@@ -0,0 +1,69 @@
+"""Math utils functions."""
+
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-15, 1 - 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1.0 + 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
diff --git a/H2HGCN/utils/pre_utils.py b/H2HGCN/utils/pre_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..283e73035851b0e7bbe7a78b02c8eeb2257be7f7
--- /dev/null
+++ b/H2HGCN/utils/pre_utils.py
@@ -0,0 +1,167 @@
+from collections import defaultdict
+import os
+import pickle
+import json
+import torch.nn as nn
+import torch as th
+import torch.optim as optim
+import numpy as np
+import random
+from Ghypeddings.H2HGCN.optimizers.rsgd import RiemannianSGD
+import math
+import subprocess
+import random
+
+def set_seed(seed):
+    """
+    Set the random seed
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    th.manual_seed(seed)
+    th.cuda.manual_seed(seed)
+    th.cuda.manual_seed_all(seed)
+
+def th_dot(x, y, keepdim=True):
+    return th.sum(x * y, dim=1, keepdim=keepdim)
+
+def pad_sequence(data_list, maxlen, value=0):
+    return [row + [value] * (maxlen - len(row)) for row in data_list]
+
+def normalize_weight(adj_mat, weight):
+    degree = [1 / math.sqrt(sum(np.abs(w))) for w in weight]
+    for dst in range(len(adj_mat)):
+        for src_idx in range(len(adj_mat[dst])):
+            src = adj_mat[dst][src_idx]
+            weight[dst][src_idx] = degree[dst] * weight[dst][src_idx] * degree[src]
+
+def nn_init(nn_module, method='orthogonal'):
+    """
+    Initialize a Sequential or Module object
+    Args:
+        nn_module: Sequential or Module
+        method: initialization method
+    """
+    if method == 'none':
+        return
+    for param_name, _ in nn_module.named_parameters():
+        if isinstance(nn_module, nn.Sequential):
+            # for a Sequential object, the param_name contains both id and param name
+            i, name = param_name.split('.', 1)
+            param = getattr(nn_module[int(i)], name)
+        else:
+            param = getattr(nn_module, param_name)
+        if param_name.find('weight') > -1:
+            init_weight(param, method)
+        elif param_name.find('bias') > -1:
+            nn.init.uniform_(param, -1e-4, 1e-4)
+
+def get_params(params_list, vars_list):
+    """
+    Add parameters in vars_list to param_list
+    """
+    for i in vars_list:
+        if issubclass(i.__class__, nn.Module):
+            params_list.extend(list(i.parameters()))
+        elif issubclass(i.__class__, nn.Parameter):
+            params_list.append(i)
+        else:
+            print("Encounter unknown objects")
+            exit(1)
+
+def categorize_params(args):
+    """
+    Categorize parameters into hyperbolic ones and euclidean ones
+    """
+    stiefel_params, euclidean_params = [], []
+    get_params(euclidean_params, args.eucl_vars)
+    get_params(stiefel_params, args.stie_vars)
+    return stiefel_params, euclidean_params
+
+def get_activation(args):
+    if args.activation == 'leaky_relu':
+        return nn.LeakyReLU(args.leaky_relu)
+    elif args.activation == 'rrelu':
+        return nn.RReLU()
+    elif args.activation == 'relu':
+        return nn.ReLU()
+    elif args.activation == 'elu':
+        return nn.ELU()
+    elif args.activation == 'prelu':
+        return nn.PReLU()
+    elif args.activation == 'selu':
+        return nn.SELU()
+
+def init_weight(weight, method):
+    """
+    Initialize parameters
+    Args:
+        weight: a Parameter object
+        method: initialization method 
+    """
+    if method == 'orthogonal':
+        nn.init.orthogonal_(weight)
+    elif method == 'xavier':
+        nn.init.xavier_uniform_(weight)
+    elif method == 'kaiming':
+        nn.init.kaiming_uniform_(weight)
+    elif method == 'none':
+        pass
+    else:
+        raise Exception('Unknown init method')
+
+
+def get_stiefel_optimizer(args, params, lr_stie):
+    if args.stiefel_optimizer == 'rsgd':
+        optimizer = RiemannianSGD(
+            args,
+            params,
+            lr=lr_stie,
+        )
+    elif args.stiefel_optimizer == 'ramsgrad':
+        optimizer = RiemannianAMSGrad(
+            args,
+            params,
+            lr=lr_stie,
+        )
+    else:
+        print("unsupported hyper optimizer")
+        exit(1)        
+    return optimizer
+
+def get_lr_scheduler(args, optimizer):
+    if args.lr_scheduler == 'exponential':
+        return optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_gamma)
+    elif args.lr_scheduler == 'cosine':
+        return optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=0)
+    elif args.lr_scheduler == 'cycle':
+        return optim.lr_scheduler.CyclicLR(optimizer, 0, max_lr=args.lr, step_size_up=20, cycle_momentum=False)
+    elif args.lr_scheduler == 'step':
+        return optim.lr_scheduler.StepLR(
+        optimizer,
+        step_size=int(args.step_lr_reduce_freq),
+        gamma=float(args.step_lr_gamma)
+    )
+    elif args.lr_scheduler == 'none':
+        return NoneScheduler()
+
+def get_optimizer(args, params, lr):
+    if args.optimizer == 'sgd':
+        optimizer = optim.SGD(params, lr=lr, weight_decay=args.weight_decay)
+    elif args.optimizer == 'Adam':
+        optimizer = optim.Adam(params, lr=lr, weight_decay=args.weight_decay)
+    elif args.optimizer == 'amsgrad':
+        optimizer = optim.Adam(params, lr=lr, amsgrad=True, weight_decay=args.weight_decay)
+    return optimizer
+
+def set_up_optimizer_scheduler(hyperbolic, args, model, lr, lr_stie, pprint=True):
+    stiefel_params, euclidean_params = categorize_params(args)
+    #assert(len(list(model.parameters())) == len(stiefel_params) + len(euclidean_params))
+    optimizer = get_optimizer(args, euclidean_params, lr)
+    lr_scheduler = get_lr_scheduler(args, optimizer)
+    if len(stiefel_params) > 0:
+        stiefel_optimizer = get_stiefel_optimizer(args, stiefel_params, lr_stie)
+        stiefel_lr_scheduler = get_lr_scheduler(args, stiefel_optimizer)
+    else:
+        stiefel_optimizer, stiefel_lr_scheduler = None, None
+    return optimizer, lr_scheduler, stiefel_optimizer, stiefel_lr_scheduler
\ No newline at end of file
diff --git a/H2HGCN/utils/train_utils.py b/H2HGCN/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..71781f97d42cfae91a9caef4ada7b56ce7a4cbe1
--- /dev/null
+++ b/H2HGCN/utils/train_utils.py
@@ -0,0 +1,52 @@
+import os
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--c', type=int, default=args[1])
+    parser.add_argument('--num_layers', type=int, default=args[2])
+    parser.add_argument('--bias', type=bool, default=args[3])
+    parser.add_argument('--act', type=str, default=args[4])
+    parser.add_argument('--select_manifold', type=str, default=args[5])
+    parser.add_argument('--num_centroid', type=int, default=args[6])
+    parser.add_argument('--lr_stie', type=float, default=args[7])
+    parser.add_argument('--stie_vars', nargs='+', default=args[8])
+    parser.add_argument('--stiefel_optimizer', type=str, default=args[9])
+    parser.add_argument('--eucl_vars', nargs='+', default=args[10])
+    parser.add_argument('--grad_clip', type=float, default=args[11])
+    parser.add_argument('--optimizer', type=str, default=args[12])
+    parser.add_argument('--weight_decay', type=float, default=args[13])
+    parser.add_argument('--lr', type=float, default=args[14])
+    parser.add_argument('--lr_scheduler', type=str, default=args[15])
+    parser.add_argument('--lr_gamma', type=float, default=args[16])
+    parser.add_argument('--step_lr_gamma', type=float, default=args[17])
+    parser.add_argument('--step_lr_reduce_freq', type=int, default=args[18])
+    parser.add_argument('--proj_init', type=str, default=args[19])
+    parser.add_argument('--tie_weight', type=bool, default=args[20])
+    parser.add_argument('--cuda', type=int, default=args[21])
+    parser.add_argument('--epochs', type=int, default=args[22])
+    parser.add_argument('--min_epochs', type=int, default=args[23])
+    parser.add_argument('--patience', type=int, default=args[24])
+    parser.add_argument('--seed', type=int, default=args[25])
+    parser.add_argument('--log_freq', type=int, default=args[26])
+    parser.add_argument('--eval_freq', type=int, default=args[27])
+    parser.add_argument('--val_prop', type=float, default=args[28])
+    parser.add_argument('--test_prop', type=float, default=args[29])
+    parser.add_argument('--double_precision', type=int, default=args[30])
+    parser.add_argument('--dropout', type=float, default=args[31])
+    parser.add_argument('--normalize_adj', type=bool, default=args[32])
+    parser.add_argument('--normalize_feats', type=bool, default=args[33])
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
diff --git a/HGCAE/.gitignore b/HGCAE/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..bee8a64b79a99590d5303307144172cfe824fbf7
--- /dev/null
+++ b/HGCAE/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/HGCAE/__init__.py b/HGCAE/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9
--- /dev/null
+++ b/HGCAE/__init__.py
@@ -0,0 +1,2 @@
+from __future__ import print_function
+from __future__ import division
diff --git a/HGCAE/hgcae.py b/HGCAE/hgcae.py
new file mode 100644
index 0000000000000000000000000000000000000000..614885ff479917f83335decb0a7ceb0a90e54a14
--- /dev/null
+++ b/HGCAE/hgcae.py
@@ -0,0 +1,230 @@
+from Ghypeddings.HGCAE.models.base_models import LPModel
+import logging
+import torch
+import numpy as np
+import os
+import time
+from Ghypeddings.HGCAE.utils.train_utils import get_dir_name, format_metrics
+from Ghypeddings.HGCAE.utils.data_utils import process_data
+from Ghypeddings.HGCAE.utils.train_utils import create_args , get_classifier ,get_clustering_algorithm,get_anomaly_detection_algorithm
+import Ghypeddings.HGCAE.optimizers as optimizers
+from Ghypeddings.HGCAE.utils.data_utils import sparse_mx_to_torch_sparse_tensor
+
+from Ghypeddings.classifiers import calculate_metrics
+
+class HGCAE(object):
+    def __init__(self, 
+                adj,
+                features,
+                labels,
+                dim,
+                hidden_dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='relu',
+                grad_clip=None,
+                optimizer='RiemannianAdam',
+                weight_decay=0.01,
+                lr=0.001,
+                gamma=0.5,
+                lr_reduce_freq=500,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.0002,
+                test_prop=0.3,
+                double_precision=0,
+                dropout=0.1,
+                lambda_rec=1.0,
+                lambda_lp=1.0,
+                num_dec_layers=2,
+                use_att= True,
+                att_type= 'sparse_adjmask_dist',
+                att_logit='tanh',
+                beta = 0.2,
+                classifier=None,
+                clusterer = None,
+                normalize_adj=False,
+                normalize_feats=True,
+                anomaly_detector=None
+                ):
+        
+        self.args = create_args(dim,hidden_dim,c,num_layers,bias,act,grad_clip,optimizer,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,lambda_rec,lambda_lp,num_dec_layers,use_att,att_type,att_logit,beta,classifier,clusterer,normalize_adj,normalize_feats,anomaly_detector)
+        self.cls = None
+
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        if(self.args.c == None):
+            self.args.c_trainable = 1
+            self.args.c = 1.0
+        
+        np.random.seed(self.args.seed)
+        torch.manual_seed(self.args.seed)
+
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+
+        if not self.args.lr_reduce_freq:
+            self.args.lr_reduce_freq = self.args.epochs
+
+        self.args.nb_false_edges = len(self.data['train_edges_false'])
+        self.args.nb_edges = len(self.data['train_edges'])
+        st0 = np.random.get_state()
+        self.args.np_seed = st0
+        np.random.set_state(self.args.np_seed)
+
+        for x, val in self.data.items():
+            if 'adj' in x:
+                self.data[x] = sparse_mx_to_torch_sparse_tensor(self.data[x])
+
+        self.model = LPModel(self.args)
+
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+
+        self.adj_train_enc = self.data['adj_train_enc']
+        self.optimizer = getattr(optimizers, self.args.optimizer)(params=self.model.parameters(), lr=self.args.lr,
+                                                        weight_decay=self.args.weight_decay)
+        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
+            self.optimizer,
+            step_size=int(self.args.lr_reduce_freq),
+            gamma=float(self.args.gamma)
+        )
+
+        self.best_emb = None
+
+
+
+    def fit(self):
+
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(f'Using: {self.args.device}')
+        logging.info(str(self.model))
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        train_losses = []
+        val_losses = []
+
+        for epoch in range(self.args.epochs):
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train', epoch)
+            print(train_metrics)
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.lr_scheduler.step()
+
+            train_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(train_losses[0])
+            elif (best_losses[-1] > train_losses[-1]):
+                best_losses.append(train_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+            with torch.no_grad():
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                           'lr: {}'.format(self.lr_scheduler.get_lr()[0]),
+                                           format_metrics(train_metrics, 'train'),
+                                           'time: {:.4f}s'.format(time.time() - t)
+                                           ]))
+                    
+                if (epoch + 1) % self.args.eval_freq == 0:
+                    self.model.eval()
+                    embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
+                    #val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                    # val_losses.append(val_metrics['loss'].item())
+                    # if (epoch + 1) % self.args.log_freq == 0:
+                    #     logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                    # if self.model.has_improved(best_val_metrics, val_metrics):
+                    #     self.best_emb = embeddings
+                    #     best_val_metrics = val_metrics
+                    #     counter = 0
+                    # else:
+                    #     counter += 1
+                    #     if counter == self.args.patience and epoch > self.args.min_epochs:
+                    #         logging.info("Early stopping")
+                    #         break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+
+        # train_idx = np.unique(self.data['train_edges'][:,0].cpu().detach().numpy())
+        # val_idx = np.unique(self.data['val_edges'][:,0].cpu().detach().numpy())
+        # idx = np.unique(np.concatenate((train_idx,val_idx)))
+        # X = self.model.manifold.logmap0(self.best_emb[idx],self.model.encoder.curvatures[-1]).cpu().detach().numpy()
+        # y = self.data['labels'].reshape(-1,1)[idx]
+
+        # if(self.args.classifier):
+        #     self.cls = get_classifier(self.args, X,y)
+        #     acc,f1,recall,precision,roc_auc = calculate_metrics(self.cls,X,y)
+        # elif self.args.clusterer:
+        #     y = y.reshape(-1,)
+        #     acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,X,y)[6:]
+        # elif self.args.anomaly_detector:
+        #     y = y.reshape(-1,)
+        #     acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,X,y)[6:]
+        
+        # return {'train':train_losses,'best':best_losses,'val':val_losses},acc,f1,recall,precision,roc_auc , time.time() - t_total
+        return {'train':train_losses,'best':best_losses,'val':val_losses}, time.time() - t_total
+
+    def predict(self):
+        self.model.eval()
+        test_idx = np.unique(self.data['test_edges'][:,0].cpu().detach().numpy())
+        embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        data = self.model.manifold.logmap0(embeddings[test_idx],self.model.encoder.curvatures[-1]).cpu().detach().numpy()
+        labels = self.data['labels'].reshape(-1,1)[test_idx]
+        if self.args.classifier:
+            acc,f1,recall,precision,roc_auc = calculate_metrics(self.cls,data,labels)
+        elif self.args.clusterer:
+            labels = labels.reshape(-1,)
+            acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,data,labels)[6:]
+        elif self.args.anomaly_detector:
+            labels = labels.reshape(-1,)
+            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,data,labels)[6:]
+        self.tb_embeddings = embeddings
+        return val_metrics['loss'].item(),acc,f1,recall,precision,roc_auc
+
+                    
+    def save_embeddings(self,directory):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
+        tb_embeddings_euc = self.model.manifold.logmap0(embeddings,self.model.encoder.curvatures[-1])
+        for_classification_hyp = np.hstack((embeddings.cpu().detach().numpy(),self.data['labels'].reshape(-1,1)))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].reshape(-1,1)))
+        hyp_file_path = os.path.join(directory,'hgcae_embeddings_hyp.csv')
+        euc_file_path = os.path.join(directory,'hgcae_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/HGCAE/layers/__init__.py b/HGCAE/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCAE/layers/att_layers.py b/HGCAE/layers/att_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..e99964c73d3eefa9a058c73d9bb5d53fa604839d
--- /dev/null
+++ b/HGCAE/layers/att_layers.py
@@ -0,0 +1,80 @@
+"""Attention layers (some modules are copied from https://github.com/Diego999/pyGAT.)"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+def HypAggAtt(in_features, manifold, dropout, act=None, att_type=None, att_logit=None, beta=0):
+    att_logit = get_att_logit(att_logit, att_type)
+    return GeometricAwareHypAggAtt(in_features, manifold, dropout, lambda x: x, att_logit=att_logit, beta=beta)
+
+class GeometricAwareHypAggAtt(nn.Module):
+    def __init__(self, in_features, manifold, dropout, act, att_logit=torch.tanh, beta=0.):
+        super(GeometricAwareHypAggAtt, self).__init__()
+        self.dropout = dropout
+        self.att_logit=att_logit
+        self.special_spmm = SpecialSpmm()
+
+
+        self.m = manifold
+        self.beta = nn.Parameter(torch.Tensor([1e-6]))
+        self.con = nn.Parameter(torch.Tensor([1e-6]))
+        self.act = act
+        self.in_features = in_features
+
+    def forward (self, x, adj, c=1):
+        n = x.size(0)
+        edge = adj._indices()
+
+        assert not torch.isnan(self.beta).any()
+        edge_h = self.beta * self.m.sqdist(x[edge[0, :], :], x[edge[1, :], :], c) + self.con
+
+        self.edge_h = edge_h
+        assert not torch.isnan(edge_h).any()
+        edge_e = self.att_logit(edge_h)
+        self.edge_e = edge_e
+        ones = torch.ones(size=(n, 1))
+        if x.is_cuda:
+            ones = ones.to(x.device)
+        e_rowsum = self.special_spmm(edge, abs(edge_e), torch.Size([n, n]), ones) + 1e-10
+
+        return edge_e, e_rowsum
+
+class SpecialSpmmFunction(torch.autograd.Function):
+    """Special function for only sparse region backpropataion layer."""
+    # generate sparse matrix from `indicex, values, shape` and matmul with b
+    # Previously, `AXW` computing did not need bp to `A`.
+    # To trian attention of `A`, now bp through sparse matrix needed.
+    @staticmethod
+    def forward(ctx, indices, values, shape, b):
+        assert indices.requires_grad == False
+        a = torch.sparse_coo_tensor(indices, values, shape, device=b.device) # make sparse matrix shaped of `NxN` 
+        ctx.save_for_backward(a, b) # save sparse matrix for bp
+        ctx.N = shape[0] # number of nodes
+        return torch.matmul(a, b)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        assert not torch.isnan(grad_output).any()
+
+        # grad_output : Nxd  gradient
+        # a : NxN adj(attention) matrix, b: Nxd node feature
+        a, b = ctx.saved_tensors
+        grad_values = grad_b = None
+        if ctx.needs_input_grad[1]:
+            grad_a_dense = grad_output.matmul(b.t())
+            edge_idx = a._indices()[0, :] * ctx.N + a._indices()[1, :] # flattening (x,y) --> nx + y
+            grad_values = grad_a_dense.view(-1)[edge_idx]
+        if ctx.needs_input_grad[3]:
+            grad_b = a.t().matmul(grad_output)
+        return None, grad_values, None, grad_b
+
+
+class SpecialSpmm(nn.Module):
+    def forward(self, indices, values, shape, b):
+        return SpecialSpmmFunction.apply(indices, values, shape, b)
+
+def get_att_logit(att_logit, att_type):
+    if att_logit:
+        att_logit = getattr(torch, att_logit)
+    return att_logit
diff --git a/HGCAE/layers/hyp_layers.py b/HGCAE/layers/hyp_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c19b24150a0992f06790f510e4da5d1dcd516d50
--- /dev/null
+++ b/HGCAE/layers/hyp_layers.py
@@ -0,0 +1,232 @@
+"""
+Hyperbolic layers.
+Major codes of hyperbolic layers are from HGCN
+"""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+from Ghypeddings.HGCAE.layers.att_layers import HypAggAtt, SpecialSpmm
+
+
+def get_dim_act_curv(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+
+    dims = [args.feat_dim]
+    # Check layer_num and hdden_dim match
+    if args.num_layers > 1:
+        hidden_dim = [args.hidden_dim for _ in range(args.num_layers -1)]
+        if args.num_layers != len(hidden_dim) + 1:
+            raise RuntimeError('Check dimension hidden:{}, num_layers:{}'.format(args.hidden_dim, args.num_layers) )
+        dims = dims + hidden_dim
+
+    dims += [args.dim]
+    acts += [act]
+    n_curvatures = args.num_layers
+    if args.c_trainable == 1: # NOTE : changed from # if args.c is None:
+        # create list of trainable curvature parameters
+        curvatures = [nn.Parameter(torch.Tensor([args.c]).to(args.device)) for _ in range(n_curvatures)]
+    else:
+        # fixed curvature
+        curvatures = [torch.tensor([args.c]) for _ in range(n_curvatures)]
+        if not args.cuda == -1:
+            curvatures = [curv.to(args.device) for curv in curvatures]
+    return dims, acts, curvatures
+
+
+
+class HNNLayer(nn.Module):
+    """
+    Hyperbolic neural networks layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias):
+        super(HNNLayer, self).__init__()
+        self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias)
+        self.hyp_act = HypAct(manifold, c_in, c_out, act)
+
+    def forward(self, x):
+        h = self.linear.forward(x)
+        h = self.hyp_act.forward(h)
+        return h
+
+
+class HyperbolicGraphConvolution(nn.Module):
+    """
+    Hyperbolic graph convolution layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias, use_att,
+            att_type='sparse_adjmask_dist', att_logit=torch.exp, beta=0., decode=False):
+        super(HyperbolicGraphConvolution, self).__init__()
+        self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias)
+        self.agg = HypAgg(manifold, c_in, use_att, out_features, dropout, att_type=att_type, att_logit=att_logit, beta=beta, decode=decode)
+        self.hyp_act = HypAct(manifold, c_in, c_out, act)
+        self.decode = decode
+
+    def forward(self, input):
+        x, adj = input
+        assert not torch.isnan(self.hyp_act.c_in).any()
+        self.hyp_act.c_in.data = torch.clamp_min(self.hyp_act.c_in,1e-12)
+        if self.hyp_act.c_out:
+            assert not torch.isnan(self.hyp_act.c_out).any()
+            self.hyp_act.c_out.data = torch.clamp_min(self.hyp_act.c_out,1e-12)
+        assert not torch.isnan(x).any()
+        h = self.linear.forward(x)
+        assert not torch.isnan(h).any()
+        h = self.agg.forward(h, adj, prev_x=x)
+        assert not torch.isnan(h).any()
+        h = self.hyp_act.forward(h)
+        assert not torch.isnan(h).any()
+        output = h, adj
+        return output
+
+
+class HypLinear(nn.Module):
+    """
+    Hyperbolic linear layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c, dropout, use_bias):
+        super(HypLinear, self).__init__()
+        self.manifold = manifold
+        self.in_features = in_features
+        self.out_features = out_features
+        self.c = c
+        self.dropout = dropout
+        self.use_bias = use_bias
+        # self.bias = nn.Parameter(torch.Tensor(out_features))
+        self.bias = nn.Parameter(torch.Tensor(1, out_features))
+        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        init.xavier_uniform_(self.weight, gain=math.sqrt(2))
+        init.constant_(self.bias, 0)
+
+    def forward(self, x):
+        drop_weight = F.dropout(self.weight, self.dropout, training=self.training)
+        mv = self.manifold.mobius_matvec(drop_weight, x, self.c)
+        res = self.manifold.proj(mv, self.c)
+        if self.use_bias: 
+            bias = self.bias
+            hyp_bias = self.manifold.expmap0(bias, self.c)
+            hyp_bias = self.manifold.proj(hyp_bias, self.c)
+            res = self.manifold.mobius_add(res, hyp_bias, c=self.c)
+            res = self.manifold.proj(res, self.c)
+        return res
+
+    def extra_repr(self):
+        return 'in_features={}, out_features={}, c={}'.format(
+                self.in_features, self.out_features, self.c
+        )
+
+
+class HypAgg(Module):
+    """
+    Hyperbolic aggregation layer.
+    """
+
+    def __init__(self, manifold, c, use_att, in_features, dropout, att_type='sparse_adjmask_dist', att_logit=None, beta=0, decode=False):
+        super(HypAgg, self).__init__()
+        self.manifold = manifold
+        self.c = c
+        self.use_att = use_att
+
+        self.in_features = in_features
+        self.dropout = dropout
+        if use_att:
+            self.att = HypAggAtt(in_features, manifold, dropout, act=None, att_type=att_type, att_logit=att_logit, beta=beta)
+            self.att_type = att_type
+
+            self.special_spmm = SpecialSpmm()
+        self.decode = decode
+
+    def forward(self, x, adj, prev_x=None):
+
+        if self.use_att:
+            dist = 'dist' in self.att_type
+            if dist:
+                if 'sparse' in self.att_type:
+                    if self.decode:
+                        # NOTE : AGG(prev_x)
+                        edge_e, e_rowsum = self.att(prev_x, adj, self.c) # SparseAtt
+                    else:
+                        # NOTE : AGG(x)
+                        edge_e, e_rowsum = self.att(x, adj, self.c) # SparseAtt
+                    self.edge_e = edge_e
+                    self.e_rowsum = e_rowsum
+                    ## SparseAtt
+                    x_tangent = self.manifold.logmap0(x, c=self.c)
+                    N = x.size()[0]
+                    edge = adj._indices()
+                    support_t = self.special_spmm(edge, edge_e, torch.Size([N, N]), x_tangent) 
+                    assert not torch.isnan(support_t).any()
+                    support_t = support_t.div(e_rowsum)
+                    assert not torch.isnan(support_t).any()
+                    output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+                else:
+                    adj = self.att(x, adj, self.c) # DenseAtt
+                    x_tangent = self.manifold.logmap0(x, c=self.c)
+                    support_t = torch.spmm(adj, x_tangent)
+                    output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+            else:
+                ## MLP attention
+                x_tangent = self.manifold.logmap0(x, c=self.c)
+                adj = self.att(x_tangent, adj)
+                support_t = torch.spmm(adj, x_tangent)
+                output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+        else:
+            x_tangent = self.manifold.logmap0(x, c=self.c)
+            support_t = torch.spmm(adj, x_tangent)
+            output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+
+        return output
+
+    def extra_repr(self):
+        return 'c={}, use_att={}, decode={}'.format(
+                self.c, self.use_att, self.decode
+        )
+
+
+class HypAct(Module):
+    """
+    Hyperbolic activation layer.
+    """
+
+    def __init__(self, manifold, c_in, c_out, act):
+        super(HypAct, self).__init__()
+        self.manifold = manifold
+        self.c_in = c_in
+        self.c_out = c_out
+        self.act = act
+
+    def forward(self, x):
+        if self.manifold.name == 'PoincareBall':
+            if self.c_out:
+                xt = self.manifold.activation(x, self.act, self.c_in, self.c_out)
+                return xt
+            else:
+                xt = self.manifold.logmap0(x, c=self.c_in)
+                return xt
+        else:
+            NotImplementedError("not implemented")
+
+    def extra_repr(self):
+        return 'Manifold={},\n c_in={},\n act={},\n c_out={}'.format(
+                self.manifold.name, self.c_in, self.act.__name__, self.c_out
+        )
diff --git a/HGCAE/layers/layers.py b/HGCAE/layers/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..d17b37d1f62390e982b94ee4e5450d8a5a0bf632
--- /dev/null
+++ b/HGCAE/layers/layers.py
@@ -0,0 +1,68 @@
+"""Euclidean layers."""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+
+def get_dim_act(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+
+    dims = [args.feat_dim]
+    if args.num_layers > 1:
+        # Check layer_num and hdden_dim match
+        hidden_dim = [int(h) for h in args.hidden_dim.split(',')]
+        if args.num_layers != len(hidden_dim) + 1:
+            raise RuntimeError('Check dimension hidden:{}, num_laysers:{}'.format(args.hidden_dim, args.num_layers) )
+        dims = dims + hidden_dim
+
+    dims += [args.dim]
+    acts += [act]
+    return dims, acts
+
+
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
+
+'''
+InnerProductDecdoer implemntation from:
+https://github.com/zfjsail/gae-pytorch/blob/master/gae/model.py
+'''
+class InnerProductDecoder(nn.Module):
+    """Decoder for using inner product for prediction."""
+
+    def __init__(self, dropout=0, act=torch.sigmoid):
+        super(InnerProductDecoder, self).__init__()
+        self.dropout = dropout
+        self.act = act
+
+    def forward(self, emb_in, emb_out):
+        cos_dist = emb_in * emb_out
+        probs = self.act(cos_dist.sum(1))
+        return probs
diff --git a/HGCAE/manifolds/__init__.py b/HGCAE/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed1d71769d7412c7f0fa82c9425ee7fe449e9567
--- /dev/null
+++ b/HGCAE/manifolds/__init__.py
@@ -0,0 +1,7 @@
+'''
+Major codes of hyperbolic layers are from HGCN
+Refer Lorentz implementation from HGCN if you need.
+'''
+from Ghypeddings.HGCAE.manifolds.base import ManifoldParameter
+from Ghypeddings.HGCAE.manifolds.euclidean import Euclidean
+from Ghypeddings.HGCAE.manifolds.poincare import PoincareBall
diff --git a/HGCAE/manifolds/base.py b/HGCAE/manifolds/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..805edd678d9e768f22a0dce3a6691bf8556ed53d
--- /dev/null
+++ b/HGCAE/manifolds/base.py
@@ -0,0 +1,84 @@
+'''
+Major codes of hyperbolic layers are from HGCN
+'''
+from torch.nn import Parameter
+
+class Manifold(object):
+    """
+    Abstract class to define operations on a manifold.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.eps = 10e-8
+
+    def sqdist(self, p1, p2, c):
+        """Squared distance between pairs of points."""
+        raise NotImplementedError
+
+    def egrad2rgrad(self, p, dp, c):
+        """Converts Euclidean Gradient to Riemannian Gradients."""
+        raise NotImplementedError
+
+    def proj(self, p, c):
+        """Projects point p on the manifold."""
+        raise NotImplementedError
+
+    def proj_tan(self, u, p, c):
+        """Projects u on the tangent space of p."""
+        raise NotImplementedError
+
+    def proj_tan0(self, u, c):
+        """Projects u on the tangent space of the origin."""
+        raise NotImplementedError
+
+    def expmap(self, u, p, c):
+        """Exponential map of u at point p."""
+        raise NotImplementedError
+
+    def logmap(self, p1, p2, c):
+        """Logarithmic map of point p1 at point p2."""
+        raise NotImplementedError
+
+    def expmap0(self, u, c):
+        """Exponential map of u at the origin."""
+        raise NotImplementedError
+
+    def logmap0(self, p, c):
+        """Logarithmic map of point p at the origin."""
+        raise NotImplementedError
+
+    def mobius_add(self, x, y, c, dim=-1):
+        """Adds points x and y."""
+        raise NotImplementedError
+
+    def mobius_matvec(self, m, x, c):
+        """Performs hyperboic martrix-vector multiplication."""
+        raise NotImplementedError
+
+    def init_weights(self, w, c, irange=1e-5):
+        """Initializes random weigths on the manifold."""
+        raise NotImplementedError
+
+    def inner(self, p, c, u, v=None):
+        """Inner product for tangent vectors at point x."""
+        raise NotImplementedError
+
+    def ptransp(self, x, y, u, c):
+        """Parallel transport of u from x to y."""
+        raise NotImplementedError
+
+
+class ManifoldParameter(Parameter):
+    """
+    Subclass of torch.nn.Parameter for Riemannian optimization.
+    """
+    def __new__(cls, data, requires_grad, manifold, c):
+        return Parameter.__new__(cls, data, requires_grad)
+
+    def __init__(self, data, requires_grad, manifold, c):
+        self.c = c
+        self.manifold = manifold
+
+    def __repr__(self):
+        return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__()
diff --git a/HGCAE/manifolds/euclidean.py b/HGCAE/manifolds/euclidean.py
new file mode 100644
index 0000000000000000000000000000000000000000..c102023b24eebc91053be85984a8a295166e8c41
--- /dev/null
+++ b/HGCAE/manifolds/euclidean.py
@@ -0,0 +1,66 @@
+'''
+Major codes of hyperbolic layers are from HGCN
+'''
+import torch
+from Ghypeddings.HGCAE.manifolds.base import Manifold
+
+
+class Euclidean(Manifold):
+    """
+    Euclidean Manifold class.
+    """
+
+    def __init__(self):
+        super(Euclidean, self).__init__()
+        self.name = 'Euclidean'
+
+    def normalize(self, p):
+        dim = p.size(-1)
+        p_norm = torch.renorm(p, 2, 0, 1.)
+        return p_norm
+
+    def sqdist(self, p1, p2, c):
+        return (p1 - p2).pow(2).sum(dim=-1)
+
+    def egrad2rgrad(self, p, dp, c):
+        return dp
+
+    def proj(self, p, c):
+        return p
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        return p + u
+
+    def logmap(self, p1, p2, c):
+        return p2 - p1
+
+    def expmap0(self, u, c):
+        return u
+
+    def logmap0(self, p, c):
+        return p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        return x + y
+
+    def mobius_matvec(self, m, x, c):
+        mx = x @ m.transpose(-1, -2)
+        return mx
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        return (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, v, c):
+        return v
diff --git a/HGCAE/manifolds/poincare.py b/HGCAE/manifolds/poincare.py
new file mode 100644
index 0000000000000000000000000000000000000000..df06e38afaca4063ce8975527f286fb2397d8956
--- /dev/null
+++ b/HGCAE/manifolds/poincare.py
@@ -0,0 +1,136 @@
+'''
+Major codes of hyperbolic layers are from HGCN
+'''
+import torch
+from Ghypeddings.HGCAE.manifolds.base import Manifold
+from torch.autograd import Function
+from Ghypeddings.HGCAE.utils.math_utils import artanh, tanh
+
+
+class PoincareBall(Manifold):
+    """
+    PoicareBall Manifold class.
+
+    We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c
+
+    Note that 1/sqrt(c) is the Poincare ball radius.
+
+    """
+
+    def __init__(self, ):
+        super(PoincareBall, self).__init__()
+        self.name = 'PoincareBall'
+        self.min_norm = 1e-15
+        self.eps = {torch.float32: 4e-3, torch.float64: 1e-5}
+
+    def sqdist(self, p1, p2, c):
+        sqrt_c = c ** 0.5
+        dist_c = artanh(
+            sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False)
+        )
+        dist = dist_c * 2 / sqrt_c
+        return dist ** 2
+
+    def _lambda_x(self, x, c):
+        x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True)
+        return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm)
+
+    def egrad2rgrad(self, p, dp, c):
+        lambda_p = self._lambda_x(p, c)
+        dp /= lambda_p.pow(2)
+        return dp
+
+    def proj(self, x, c):
+        norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm)
+        maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5)
+        cond = norm > maxnorm
+        projected = x / norm * maxnorm
+        return torch.where(cond, projected, x)
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        sqrt_c = c ** 0.5
+        u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        second_term = (
+                tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm)
+                * u
+                / (sqrt_c * u_norm)
+        )
+        gamma_1 = self.mobius_add(p, second_term, c)
+        return gamma_1
+
+    def logmap(self, p1, p2, c):
+        sub = self.mobius_add(-p1, p2, c)
+        sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        lam = self._lambda_x(p1, c)
+        sqrt_c = c ** 0.5
+        return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm
+
+    def expmap0(self, u, c):
+        sqrt_c = c ** 0.5
+        u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm)
+        gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
+        return gamma_1
+
+    def logmap0(self, p, c):
+        sqrt_c = c ** 0.5
+        p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm
+        return scale * p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        x2 = x.pow(2).sum(dim=dim, keepdim=True)
+        y2 = y.pow(2).sum(dim=dim, keepdim=True)
+        xy = (x * y).sum(dim=dim, keepdim=True)
+        num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y
+        denom = 1 + 2 * c * xy + c ** 2 * x2 * y2
+        return num / denom.clamp_min(self.min_norm)
+
+    def mobius_matvec(self, m, x, c):
+        sqrt_c = c ** 0.5
+        x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        mx = x @ m.transpose(-1, -2)
+        mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c)
+        cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8)
+        res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device)
+        res = torch.where(cond, res_0, res_c)
+        return res
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def _gyration(self, u, v, w, c, dim: int = -1):
+        u2 = u.pow(2).sum(dim=dim, keepdim=True)
+        v2 = v.pow(2).sum(dim=dim, keepdim=True)
+        uv = (u * v).sum(dim=dim, keepdim=True)
+        uw = (u * w).sum(dim=dim, keepdim=True)
+        vw = (v * w).sum(dim=dim, keepdim=True)
+        c2 = c ** 2
+        a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw
+        b = -c2 * vw * u2 - c * uw
+        d = 1 + 2 * c * uv + c2 * u2 * v2
+        return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm)
+
+    def inner(self, x, c, u, v=None, keepdim=False, dim=-1):
+        if v is None:
+            v = u
+        lambda_x = self._lambda_x(x, c)
+        return lambda_x ** 2 * (u * v).sum(dim=dim, keepdim=keepdim)
+
+    def ptransp(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def activation(self, x, act, c_in, c_out):
+        x_act = act(x)
+        x_prev = self.logmap0(x_act, c_in)
+        x_next = self.expmap0(x_prev, c_out)
+        return x_next
diff --git a/HGCAE/models/__init__.py b/HGCAE/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCAE/models/base_models.py b/HGCAE/models/base_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d10fd9f134e8ffe44b5c5e58975f31723a081cd
--- /dev/null
+++ b/HGCAE/models/base_models.py
@@ -0,0 +1,200 @@
+import Ghypeddings.HGCAE.models.encoders as encoders
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGCAE.models.decoders import model2decoder
+from Ghypeddings.HGCAE.layers.layers import  InnerProductDecoder
+from sklearn.metrics import roc_auc_score, average_precision_score
+from Ghypeddings.HGCAE.utils.eval_utils import acc_f1
+from sklearn import cluster
+from sklearn.metrics import accuracy_score, normalized_mutual_info_score, adjusted_rand_score
+import Ghypeddings.HGCAE.manifolds as manifolds
+import Ghypeddings.HGCAE.models.encoders as encoders
+
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.manifold_name = "PoincareBall"
+        if args.c is not None:
+            self.c = torch.tensor([args.c])
+            if not args.cuda == -1:
+                self.c = self.c.to(args.device)
+        else:
+            self.c = nn.Parameter(torch.Tensor([1.]))
+        self.manifold = getattr(manifolds, self.manifold_name)()
+        self.nnodes = args.n_nodes
+        self.n_classes = args.n_classes
+        self.encoder = getattr(encoders, "HGCAE")(self.c, args)
+        self.num_layers=args.num_layers
+
+        # Embedding c
+        self.hyperbolic_embedding = True if args.use_att else False
+        self.decoder_type = 'InnerProductDecoder'
+        self.dc = InnerProductDecoder(dropout=0, act=torch.sigmoid)
+
+
+    def encode(self, x, adj):
+        h = self.encoder.encode(x, adj)
+        return h
+
+    def pred_link_score(self, h, idx):  # for LP,REC 
+        emb_in = h[idx[:, 0], :]
+        emb_out = h[idx[:, 1], :]
+        probs = self.dc.forward(emb_in, emb_out)
+
+        return probs
+
+    def decode(self, h, adj, idx): # REC
+        output = self.decoder.decode(h, adj)
+        return output
+
+
+    def eval_cluster(self, embeddings, data, split):
+        if self.hyperbolic_embedding:
+            emb_c = self.encoder.layers[-1].hyp_act.c_out
+            embeddings = self.manifold.logmap0(embeddings.to(emb_c.device), c=emb_c).cpu()
+
+        idx = data[f'idx_{split}']
+        n_classes = self.n_classes
+
+        embeddings_to_cluster = embeddings[idx].detach().cpu().numpy()
+        # gt_label = data['labels'][idx].cpu().numpy()
+        gt_label = data['labels']
+
+        kmeans = cluster.KMeans(n_clusters=n_classes, algorithm='auto')
+        kmeans.fit(embeddings_to_cluster)
+        pred_label = kmeans.fit_predict(embeddings_to_cluster)
+
+        from munkres import Munkres
+        def best_map(L1,L2):
+            #L1 should be the groundtruth labels and L2 should be the clustering labels we got
+            Label1 = np.unique(L1)
+            nClass1 = len(Label1)
+            Label2 = np.unique(L2)
+            nClass2 = len(Label2)
+            nClass = np.maximum(nClass1,nClass2)
+            G = np.zeros((nClass,nClass))
+            for i in range(nClass1):
+                ind_cla1 = L1 == Label1[i]
+                ind_cla1 = ind_cla1.astype(float)
+                for j in range(nClass2):
+                    ind_cla2 = L2 == Label2[j]
+                    ind_cla2 = ind_cla2.astype(float)
+                    G[i,j] = np.sum(ind_cla2 * ind_cla1)
+            m = Munkres()
+            index = m.compute(-G.T)
+            index = np.array(index)
+            c = index[:,1]
+            newL2 = np.zeros(L2.shape)
+            for i in range(nClass2):
+                newL2[L2 == Label2[i]] = Label1[c[i]]
+            return newL2
+
+
+        def err_rate(gt_s, s):
+            c_x = best_map(gt_s, s)
+            err_x = np.sum(gt_s[:] !=c_x[:])
+            missrate = err_x.astype(float) / (gt_s.shape[0])
+            return missrate
+
+
+        acc = 1-err_rate(gt_label, pred_label)
+        # acc = accuracy_score(gt_label, pred_label)
+        nmi = normalized_mutual_info_score(gt_label, pred_label, average_method='arithmetic')
+        ari = adjusted_rand_score(gt_label, pred_label)
+    
+        metrics = { 'cluster_acc': acc, 'nmi': nmi, 'ari': ari}
+        return metrics, pred_label
+
+
+    def compute_metrics(self, embeddings, data, split, epoch=None):
+        raise NotImplementedError
+
+    def init_metric_dict(self):
+        raise NotImplementedError
+
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+
+class LPModel(BaseModel):
+    """
+    Base model for link prediction task.
+    """
+
+    def __init__(self, args):
+        super(LPModel, self).__init__(args)
+        self.nb_false_edges = args.nb_false_edges
+        self.positive_edge_samplig = True
+        if self.positive_edge_samplig:
+            self.nb_edges = min(args.nb_edges, 5000) # NOTE : be-aware too dense edges
+        else:
+            self.nb_edges = args.nb_edges
+
+        if args.lambda_rec > 0:
+            self.num_dec_layers = args.num_dec_layers
+            self.lambda_rec = args.lambda_rec
+            c = self.encoder.curvatures if hasattr(self.encoder, 'curvatures') else args.c ### handle HNN
+            self.decoder = model2decoder(c, args, 'rec')
+        else:
+            self.lambda_rec = 0
+            
+        if args.lambda_lp > 0:
+            self.lambda_lp = args.lambda_lp
+        else:
+            self.lambda_lp = 0
+
+    def compute_metrics(self, embeddings, data, split, epoch=None):
+        if split == 'train':
+            num_true_edges = data[f'{split}_edges'].shape[0]
+            if self.positive_edge_samplig and num_true_edges > self.nb_edges:
+                edges_true = data[f'{split}_edges'][np.random.randint(0, num_true_edges, self.nb_edges)]
+            else:
+                edges_true = data[f'{split}_edges']
+            edges_false = data[f'{split}_edges_false'][np.random.randint(0, self.nb_false_edges, self.nb_edges)]
+        else:
+            edges_true = data[f'{split}_edges']
+            edges_false = data[f'{split}_edges_false']
+
+        pos_scores = self.pred_link_score(embeddings, edges_true)
+        neg_scores = self.pred_link_score(embeddings, edges_false)
+        assert not torch.isnan(pos_scores).any()
+        assert not torch.isnan(neg_scores).any()
+        loss = F.binary_cross_entropy(pos_scores, torch.ones_like(pos_scores))
+        loss += F.binary_cross_entropy(neg_scores, torch.zeros_like(neg_scores))
+        if pos_scores.is_cuda:
+            pos_scores = pos_scores.cpu()
+            neg_scores = neg_scores.cpu()
+        labels = [1] * pos_scores.shape[0] + [0] * neg_scores.shape[0]
+        preds = list(pos_scores.data.numpy()) + list(neg_scores.data.numpy())
+        roc = roc_auc_score(labels, preds)
+        ap = average_precision_score(labels, preds)
+        metrics = {'loss': loss, 'roc': roc, 'ap': ap}
+
+        assert not torch.isnan(loss).any()
+        if self.lambda_rec:
+            idx = data['idx_all']
+            recon = self.decode(embeddings, data['adj_train_dec'], idx) ## NOTE : adj
+            assert not torch.isnan(recon).any()
+            if self.num_dec_layers == self.num_layers:
+                target = data['features'][idx]
+            elif self.num_dec_layers == self.num_layers - 1: 
+                target = self.encoder.features[0].detach()[idx]
+            else:
+                raise RuntimeError('num_dec_layers only support 1,2')
+            loss_rec = self.lambda_rec * torch.nn.functional.mse_loss(recon[idx], target , reduction='mean')
+            assert not torch.isnan(loss_rec).any()
+            loss_lp = loss * self.lambda_lp
+            metrics.update({'loss': loss_lp + loss_rec, 'loss_rec': loss_rec, 'loss_lp': loss_lp})
+
+        return metrics
+
+    def init_metric_dict(self):
+        return {'roc': -1, 'ap': -1}
+
+    def has_improved(self, m1, m2):
+        return 0.5 * (m1['roc'] + m1['ap']) < 0.5 * (m2['roc'] + m2['ap'])
diff --git a/HGCAE/models/decoders.py b/HGCAE/models/decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..72c46942397dc97d00f1980e6569399e2792a644
--- /dev/null
+++ b/HGCAE/models/decoders.py
@@ -0,0 +1,106 @@
+"""Graph decoders."""
+import Ghypeddings.HGCAE.manifolds as manifolds
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+import torch
+
+
+class Decoder(nn.Module):
+    """
+    Decoder abstract class
+    """
+
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+
+    def classify(self, x, adj):
+        '''
+        output
+        - nc : probs 
+        - rec : input_feat
+        '''
+        if self.decode_adj:
+            input = (x, adj)
+            output, _ = self.classifier.forward(input)
+        else:
+            output = self.classifier.forward(x)
+        return output
+
+
+    def decode(self, x, adj):
+        '''
+        output
+        - nc : probs 
+        - rec : input_feat
+        '''
+        if self.decode_adj:
+            input = (x, adj)
+            output, _ = self.decoder.forward(input)
+        else:
+            output = self.decoder.forward(x)
+        return output
+
+
+
+import Ghypeddings.HGCAE.layers.hyp_layers as hyp_layers
+class HGCAEDecoder(Decoder):
+    """
+    Decoder for HGCAE
+    """
+
+    def __init__(self, c, args, task):
+        super(HGCAEDecoder, self).__init__(c)
+        self.manifold = getattr(manifolds, 'PoincareBall')()
+    
+        assert args.num_layers > 0
+
+        dims, acts, _ = hyp_layers.get_dim_act_curv(args)
+        dims = dims[::-1]
+        acts = acts[::-1][:-1] + [lambda x: x] # Last layer without act
+        self.curvatures = self.c[::-1]
+
+        encdec_share_curvature = False
+        if not encdec_share_curvature and args.num_layers == args.num_dec_layers: # do not share and enc-dec mirror-shape
+            num_c = len(self.curvatures)
+            self.curvatures = self.curvatures[:1] 
+            if args.c_trainable == 1:
+                self.curvatures += [nn.Parameter(torch.Tensor([args.c]).to(args.device))] * (num_c - 1)
+            else:
+                self.curvatures += [torch.tensor([args.c])] * (num_c - 1)
+                if not args.cuda == -1:
+                    self.curvatures = [curv.to(args.device) for curv in self.curvatures]
+
+
+        self.curvatures = self.curvatures[:-1] + [None]
+
+
+        hgc_layers = []
+        num_dec_layers = args.num_dec_layers
+        for i in range(num_dec_layers):
+            c_in, c_out = self.curvatures[i], self.curvatures[i + 1]
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            hgc_layers.append(
+                hyp_layers.HyperbolicGraphConvolution(
+                        self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att,
+                        att_type=args.att_type, att_logit=args.att_logit, beta=args.beta, decode=True
+                )
+            )
+
+        self.decoder = nn.Sequential(*hgc_layers)
+        self.decode_adj = True
+
+    # NOTE : self.c is fixed, not trainable
+    def classify(self, x, adj):
+        h = self.manifold.logmap0(x, c=self.c)
+        return super(HGCAEDecoder, self).classify(h, adj)
+    
+    def decode(self, x, adj):
+        output = super(HGCAEDecoder, self).decode(x, adj)
+        return output
+
+model2decoder = HGCAEDecoder
+
diff --git a/HGCAE/models/encoders.py b/HGCAE/models/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4b0a7475c463f12b61cf8b07af5455b5ff0fc66
--- /dev/null
+++ b/HGCAE/models/encoders.py
@@ -0,0 +1,65 @@
+"""Graph encoders."""
+import Ghypeddings.HGCAE.manifolds as manifolds
+import Ghypeddings.HGCAE.layers.hyp_layers as hyp_layers
+import torch
+import torch.nn as nn
+
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c, use_cnn=None):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x, adj):
+        self.features = []
+        if self.encode_graph:
+            input = (x, adj)
+            xx = input
+            for i in range(len(self.layers)):
+                out = self.layers[i].forward(xx)
+                self.features.append(out[0])
+                xx = out
+            output , _ = xx
+        else:
+            output = self.layers.forward(x)
+        return output
+
+class HGCAE(Encoder):
+    """
+    Hyperbolic Graph Convolutional Auto-Encoders.
+    """
+
+    def __init__(self, c, args): #, use_cnn
+        super(HGCAE, self).__init__(c, use_cnn=True)
+        self.manifold = getattr(manifolds, "PoincareBall")()
+        assert args.num_layers > 0 
+        dims, acts, self.curvatures = hyp_layers.get_dim_act_curv(args)
+        if args.c_trainable == 1: 
+            self.curvatures.append(nn.Parameter(torch.Tensor([args.c]).to(args.device)))
+        else:
+            self.curvatures.append(torch.tensor([args.c]).to(args.device)) 
+        hgc_layers = []
+        for i in range(len(dims) - 1):
+            c_in, c_out = self.curvatures[i], self.curvatures[i + 1]
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+
+            hgc_layers.append(
+                    hyp_layers.HyperbolicGraphConvolution(
+                            self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att,
+                            att_type=args.att_type, att_logit=args.att_logit, beta=args.beta
+                    )
+            )
+        self.layers = nn.Sequential(*hgc_layers)
+        self.encode_graph = True
+
+    def encode(self, x, adj):
+        self.curvatures[0] = torch.clamp_min(self.curvatures[0],min=1e-12)
+        x_hyp = self.manifold.proj(
+                self.manifold.expmap0(self.manifold.proj_tan0(x, self.curvatures[0]), c=self.curvatures[0]),
+                c=self.curvatures[0])
+        return super(HGCAE, self).encode(x_hyp, adj)
diff --git a/HGCAE/optimizers/__init__.py b/HGCAE/optimizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1198f3d759b39b51aedfe5b2d92f068151a0fe7
--- /dev/null
+++ b/HGCAE/optimizers/__init__.py
@@ -0,0 +1,2 @@
+from torch.optim import Adam
+from Ghypeddings.HGCAE.optimizers.radam import RiemannianAdam
diff --git a/HGCAE/optimizers/radam.py b/HGCAE/optimizers/radam.py
new file mode 100644
index 0000000000000000000000000000000000000000..b48cb6fe6f1a66a8b2103a49b207485a143df1f8
--- /dev/null
+++ b/HGCAE/optimizers/radam.py
@@ -0,0 +1,175 @@
+"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/)."""
+import torch.optim
+from Ghypeddings.HGCAE.manifolds import Euclidean,ManifoldParameter
+
+_default_manifold = Euclidean()
+
+
+class OptimMixin(object):
+    def __init__(self, *args, stabilize=None, **kwargs):
+        self._stabilize = stabilize
+        super().__init__(*args, **kwargs)
+
+    def stabilize_group(self, group):
+        pass
+
+    def stabilize(self):
+        """Stabilize parameters if they are off-manifold due to numerical reasons
+        """
+        for group in self.param_groups:
+            self.stabilize_group(group)
+
+
+def copy_or_set_(dest, source):
+    """
+    A workaround to respect strides of :code:`dest` when copying :code:`source`
+    (https://github.com/geoopt/geoopt/issues/70)
+    Parameters
+    ----------
+    dest : torch.Tensor
+        Destination tensor where to store new data
+    source : torch.Tensor
+        Source data to put in the new tensor
+    Returns
+    -------
+    dest
+        torch.Tensor, modified inplace
+    """
+    if dest.stride() != source.stride():
+        return dest.copy_(source)
+    else:
+        return dest.set_(source)
+
+
+class RiemannianAdam(OptimMixin, torch.optim.Adam):
+    r"""Riemannian Adam with the same API as :class:`torch.optim.Adam`
+    Parameters
+    ----------
+    params : iterable
+        iterable of parameters to optimize or dicts defining
+        parameter groups
+    lr : float (optional)
+        learning rate (default: 1e-3)
+    betas : Tuple[float, float] (optional)
+        coefficients used for computing
+        running averages of gradient and its square (default: (0.9, 0.999))
+    eps : float (optional)
+        term added to the denominator to improve
+        numerical stability (default: 1e-8)
+    weight_decay : float (optional)
+        weight decay (L2 penalty) (default: 0)
+    amsgrad : bool (optional)
+        whether to use the AMSGrad variant of this
+        algorithm from the paper `On the Convergence of Adam and Beyond`_
+        (default: False)
+    Other Parameters
+    ----------------
+    stabilize : int
+        Stabilize parameters if they are off-manifold due to numerical
+        reasons every ``stabilize`` steps (default: ``None`` -- no stabilize)
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments
+        ---------
+        closure : callable (optional)
+            A closure that reevaluates the model
+            and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        with torch.no_grad():
+            for group in self.param_groups:
+                if "step" not in group:
+                    group["step"] = 0
+                betas = group["betas"]
+                weight_decay = group["weight_decay"]
+                eps = group["eps"]
+                learning_rate = group["lr"]
+                amsgrad = group["amsgrad"]
+                for point in group["params"]:
+                    grad = point.grad
+                    if grad is None:
+                        continue
+
+                    if isinstance(point, (ManifoldParameter)):
+                        manifold = point.manifold
+                        c = point.c
+                    else:
+                        manifold = _default_manifold
+                        c = None
+                    if grad.is_sparse:
+                        raise RuntimeError(
+                                "Riemannian Adam does not support sparse gradients yet (PR is welcome)"
+                        )
+
+                    state = self.state[point]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state["step"] = 0
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(point)
+                        # Exponential moving average of squared gradient values
+                        state["exp_avg_sq"] = torch.zeros_like(point)
+                        if amsgrad:
+                            # Maintains max of all exp. moving avg. of sq. grad. values
+                            state["max_exp_avg_sq"] = torch.zeros_like(point)
+                    # make local variables for easy access
+                    exp_avg = state["exp_avg"]
+                    exp_avg_sq = state["exp_avg_sq"]
+                    # actual step
+                    grad.add_(weight_decay, point)
+                    grad = manifold.egrad2rgrad(point, grad, c)
+
+                    exp_avg.mul_(betas[0]).add_(1 - betas[0], grad)
+                    exp_avg_sq.mul_(betas[1]).add_(
+                            1 - betas[1], manifold.inner(point, c, grad, keepdim=True)
+                    )
+                    if amsgrad:
+                        max_exp_avg_sq = state["max_exp_avg_sq"]
+                        # Maintains the maximum of all 2nd moment running avg. till now
+                        torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                        # Use the max. for normalizing running avg. of gradient
+                        denom = max_exp_avg_sq.sqrt().add_(eps)
+                    else:
+                        denom = exp_avg_sq.sqrt().add_(eps)
+                    group["step"] += 1
+                    bias_correction1 = 1 - betas[0] ** group["step"]
+                    bias_correction2 = 1 - betas[1] ** group["step"]
+                    step_size = (
+                        learning_rate * bias_correction2 ** 0.5 / bias_correction1
+                    )
+
+                    # copy the state, we need it for retraction
+                    # get the direction for ascend
+                    direction = exp_avg / denom
+                    # transport the exponential averaging to the new point
+                    new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c)
+                    exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c)
+                    # use copy only for user facing point
+                    copy_or_set_(point, new_point)
+                    exp_avg.set_(exp_avg_new)
+
+                    group["step"] += 1
+                if self._stabilize is not None and group["step"] % self._stabilize == 0:
+                    self.stabilize_group(group)
+        return loss
+
+    @torch.no_grad()
+    def stabilize_group(self, group):
+        for p in group["params"]:
+            if not isinstance(p, ManifoldParameter):
+                continue
+            state = self.state[p]
+            if not state:  # due to None grads
+                continue
+            manifold = p.manifold
+            c = p.c
+            exp_avg = state["exp_avg"]
+            copy_or_set_(p, manifold.proj(p, c))
+            exp_avg.set_(manifold.proj_tan(exp_avg, u, c))
diff --git a/HGCAE/utils/__init__.py b/HGCAE/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCAE/utils/data_utils.py b/HGCAE/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8739c8a56bc1e8bf6b7dad1e98e88ccce79a28d7
--- /dev/null
+++ b/HGCAE/utils/data_utils.py
@@ -0,0 +1,134 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+
+from scipy import sparse
+import logging
+
+import pandas as pd
+
+def process_data(args, adj , features, labels):
+    ## Load data
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels}
+    adj = data['adj_train']
+
+    ## TAKES a lot of time
+
+    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_edges(
+                adj, args.val_prop, args.test_prop, args.seed
+        )
+
+    ## TAKES a lot of time
+    data['adj_train'] = adj_train
+    data['train_edges'], data['train_edges_false'] = train_edges, train_edges_false
+    if args.val_prop + args.test_prop > 0:
+        data['val_edges'], data['val_edges_false'] = val_edges, val_edges_false
+        data['test_edges'], data['test_edges_false'] = test_edges, test_edges_false
+    all_info=""
+
+    ## Adj matrix
+    adj = data['adj_train']
+    data['adj_train_enc'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats
+    )
+
+    if args.lambda_rec:
+        data['adj_train_dec'] = rowwise_normalizing(data['adj_train'])
+
+    adj_2hop = get_adj_2hop(adj)
+    data['adj_train_enc_2hop'] = symmetric_laplacian_smoothing(adj_2hop)
+
+    # NOTE : Re-adjust labels
+    # Some data omit `0` class, thus n_classes are wrong with `max(labels)+1`
+    args.n_classes = int(data['labels'].max() + 1)
+
+    data['idx_all'] =  range(data['features'].shape[0])
+    data_info = "Dataset {} Loaded : dimensions are adj:{}, edges:{}, features:{}, labels:{}\n".format(
+            'ddos2019', data['adj_train'].shape, data['adj_train'].sum(), data['features'].shape, data['labels'].shape)
+    data['info'] = data_info
+    return data
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj + sp.eye(adj.shape[0]))
+    return adj, features
+
+def get_adj_2hop(adj):
+    adj_self = adj + sp.eye(adj.shape[0])
+    adj_2hop = adj_self.dot(adj_self)
+    adj_2hop.data = np.clip(adj_2hop.data, 0, 1)
+    adj_2hop = adj_2hop - sp.eye(adj.shape[0]) - adj
+    return adj_2hop
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+def symmetric_laplacian_smoothing(adj):
+    """Symmetrically normalize adjacency matrix."""
+    adj = adj + sp.eye(adj.shape[0])  # self-loop
+
+    adj = sp.coo_matrix(adj)
+    rowsum = np.array(adj.sum(1))
+    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
+    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
+    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
+    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
+
+def rowwise_normalizing(adj):
+    """Row-wise normalize adjacency matrix."""
+    adj = adj + sp.eye(adj.shape[0])  # self-loop
+    adj = sp.coo_matrix(adj)
+    rowsum = np.array(adj.sum(1))
+    d_inv = np.power(rowsum, -1.0).flatten()
+    d_inv[np.isinf(d_inv)] = 0.
+    d_mat_inv = sp.diags(d_inv)
+    return adj.dot(d_mat_inv).transpose().tocoo()
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+def mask_edges(adj, val_prop, test_prop, seed):
+    np.random.seed(seed)  # get tp edges
+    x, y = sp.triu(adj).nonzero()
+    pos_edges = np.array(list(zip(x, y)))
+    np.random.shuffle(pos_edges)
+    # get tn edges
+    x, y = sp.triu(sp.csr_matrix(1. - adj.toarray())).nonzero()   #  LONG
+    neg_edges = np.array(list(zip(x, y)))   #  EVEN LONGER
+    np.random.shuffle(neg_edges)  # ALSO LONG
+
+    m_pos = len(pos_edges)
+    n_val = int(m_pos * val_prop)
+    n_test = int(m_pos * test_prop)
+    val_edges, test_edges, train_edges = pos_edges[:n_val], pos_edges[n_val:n_test + n_val], pos_edges[n_test + n_val:]
+    val_edges_false, test_edges_false = neg_edges[:n_val], neg_edges[n_val:n_test + n_val]
+    train_edges_false = np.concatenate([neg_edges, val_edges, test_edges], axis=0)
+    adj_train = sp.csr_matrix((np.ones(train_edges.shape[0]), (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
+    adj_train = adj_train + adj_train.T
+    return adj_train, torch.LongTensor(train_edges), torch.LongTensor(train_edges_false), torch.LongTensor(val_edges), \
+           torch.LongTensor(val_edges_false), torch.LongTensor(test_edges), torch.LongTensor(
+            test_edges_false)  
diff --git a/HGCAE/utils/eval_utils.py b/HGCAE/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2793a673e8d9a19d78be82733c652e568cec985
--- /dev/null
+++ b/HGCAE/utils/eval_utils.py
@@ -0,0 +1,11 @@
+from sklearn.metrics import average_precision_score, accuracy_score, f1_score
+
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels,preds)
+    f1 = f1_score(labels,preds, average=average)
+    return accuracy, f1
+
diff --git a/HGCAE/utils/math_utils.py b/HGCAE/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..56a0de2552fcc9ef35e0e933904f1b391d63f3ec
--- /dev/null
+++ b/HGCAE/utils/math_utils.py
@@ -0,0 +1,70 @@
+'''
+Code from HGCN (https://github.com/HazyResearch/hgcn/blob/master/utils/math_utils.py)
+'''
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-15, 1 - 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1 + 1e-7)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
diff --git a/HGCAE/utils/train_utils.py b/HGCAE/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..42026c479f9cbe09851ff95469669c1cd292b1f0
--- /dev/null
+++ b/HGCAE/utils/train_utils.py
@@ -0,0 +1,225 @@
+import os
+import sys
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.8f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+def get_dir_name(models_dir):
+    """Gets a directory to save the model.
+
+    If the directory already exists, then append a new integer to the end of
+    it. This method is useful so that we don't overwrite existing models
+    when launching new jobs.
+
+    Args:
+        models_dir: The directory where all the models are.
+
+    Returns:
+        The name of a new directory to save the training logs and model weights.
+    """
+    if not os.path.exists(models_dir):
+        save_dir = os.path.join(models_dir, '0')
+        os.makedirs(save_dir)
+    else:
+        existing_dirs = np.array(
+                [
+                    d
+                    for d in os.listdir(models_dir)
+                    if os.path.isdir(os.path.join(models_dir, d))
+                    ]
+        ).astype(np.int)
+        if len(existing_dirs) > 0:
+            dir_id = str(existing_dirs.max() + 1)
+        else:
+            dir_id = "1"
+        save_dir = os.path.join(models_dir, dir_id)
+        os.makedirs(save_dir)
+    return save_dir
+
+
+def add_flags_from_config(parser, config_dict):
+    """
+    Adds a flag (and default value) to an ArgumentParser for each parameter in a config
+    """
+
+    def OrNone(default):
+        def func(x):
+            # Convert "none" to proper None object
+            if x.lower() == "none":
+                return None
+            # If default is None (and x is not None), return x without conversion as str
+            elif default is None:
+                return str(x)
+            # Otherwise, default has non-None type; convert x to that type
+            else:
+                return type(default)(x)
+
+        return func
+
+    for param in config_dict:
+        default, description = config_dict[param]
+        try:
+            if isinstance(default, dict):
+                parser = add_flags_from_config(parser, default)
+            elif isinstance(default, list):
+                if len(default) > 0:
+                    # pass a list as argument
+                    parser.add_argument(
+                            f"--{param}",
+                            action="append",
+                            type=type(default[0]),
+                            default=default,
+                            help=description
+                    )
+                else:
+                    pass
+                    parser.add_argument(f"--{param}", action="append", default=default, help=description)
+            else:
+                pass
+                parser.add_argument(f"--{param}", type=OrNone(default), default=default, help=description)
+        except argparse.ArgumentError:
+            print(
+                f"Could not add flag for param {param} because it was already present."
+            )
+    return parser
+
+
+
+import subprocess
+def check_gpustats(columns=None):
+    query = r'nvidia-smi --query-gpu=%s --format=csv,noheader' % ','.join(columns)
+    smi_output = subprocess.check_output(query, shell=True).decode().strip()
+
+    gpustats = []
+    for line in smi_output.split('\n'):
+        if not line:
+            continue
+        gpustat = line.split(',')
+        gpustats.append({k: v.strip() for k, v in zip(columns, gpustat)})
+
+    return gpustats
+
+
+def assign_gpus(num_gpu, memory_threshold=1000):    # (MiB)
+    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+
+    columns = ['index', 'memory.used']
+    gpustats = {i['index']: i['memory.used'] for i in check_gpustats(columns)}
+
+
+
+    available_gpus = []
+    for gpu in sorted(gpustats.keys()):
+        if int(gpustats.get(gpu).split(' ')[0]) < memory_threshold:
+            available_gpus.append(gpu)
+
+    if len(available_gpus) < num_gpu:
+        raise MemoryError('{} GPUs requested, but only {} available'.format(num_gpu, len(available_gpus)))
+
+    gpus_to_assign = available_gpus[:num_gpu]
+    # os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(gpus_to_assign)
+    return gpus_to_assign
+
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--hidden_dim', type=int, default=args[1])
+    parser.add_argument('--c', type=int, default=args[2])
+    parser.add_argument('--num_layers', type=int, default=args[3])
+    parser.add_argument('--bias', type=bool, default=args[4])
+    parser.add_argument('--act', type=str, default=args[5])
+    parser.add_argument('--grad_clip', type=float, default=args[6])
+    parser.add_argument('--optimizer', type=str, default=args[7])
+    parser.add_argument('--weight_decay', type=float, default=args[8])
+    parser.add_argument('--lr', type=float, default=args[9])
+    parser.add_argument('--gamma', type=float, default=args[10])
+    parser.add_argument('--lr_reduce_freq', type=int, default=args[11])
+    parser.add_argument('--cuda', type=int, default=args[12])
+    parser.add_argument('--epochs', type=int, default=args[13])
+    parser.add_argument('--min_epochs', type=int, default=args[14])
+    parser.add_argument('--patience', type=int, default=args[15])
+    parser.add_argument('--seed', type=int, default=args[16])
+    parser.add_argument('--log_freq', type=int, default=args[17])
+    parser.add_argument('--eval_freq', type=int, default=args[18])
+    parser.add_argument('--val_prop', type=float, default=args[19])
+    parser.add_argument('--test_prop', type=float, default=args[20])
+    parser.add_argument('--double_precision', type=int, default=args[21])
+    parser.add_argument('--dropout', type=float, default=args[22])
+    parser.add_argument('--lambda_rec', type=float, default=args[23])
+    parser.add_argument('--lambda_lp', type=float, default=args[24])
+    parser.add_argument('--num_dec_layers', type=int, default=args[25])
+    parser.add_argument('--use_att', type=bool, default=args[26])
+    parser.add_argument('--att_type', type=str, default=args[27])
+    parser.add_argument('--att_logit', type=str, default=args[28])
+    parser.add_argument('--beta', type=float, default=args[29])
+    parser.add_argument('--classifier', type=str, default=args[30])
+    parser.add_argument('--clusterer', type=str, default=args[31])
+    parser.add_argument('--normalize_adj', type=bool, default=args[32])
+    parser.add_argument('--normalize_feats', type=bool, default=args[33])
+    parser.add_argument('--anomaly_detector', type=str, default=args[34])
+    flags, unknown = parser.parse_known_args()
+    return flags
+
+
+
+from Ghypeddings.classifiers import *
+def get_classifier(args,X,y):
+    if(args.classifier == 'svm'):
+        return SVM(X,y)
+    elif(args.classifier == 'mlp'):
+        return mlp(X,y,1,10,seed=args.seed)
+    elif(args.classifier == 'decision tree'):
+        return decision_tree(X,y)
+    elif(args.classifier == 'random forest'):
+        return random_forest(X,y,args.seed)
+    elif(args.classifier == 'adaboost'):
+        return adaboost(X,y,args.seed)
+    elif(args.classifier == 'knn'):
+        return KNN(X,y)
+    elif(args.classifier == 'naive bayes'):
+        return naive_bayes(X,y)
+    else:
+        raise NotImplementedError
+    
+from Ghypeddings.clusterers import *
+def get_clustering_algorithm(clusterer,X,y):
+    if(clusterer == 'agglomerative_clustering'):
+        return agglomerative_clustering(X,y)
+    elif(clusterer == 'dbscan'):
+        return dbscan(X,y)
+    elif(clusterer == 'fuzzy_c_mean'):
+        return fuzzy_c_mean(X,y)
+    elif(clusterer == 'gaussian_mixture'):
+        return gaussian_mixture(X,y)
+    elif(clusterer == 'kmeans'):
+        return kmeans(X,y)
+    elif(clusterer == 'mean_shift'):
+        return mean_shift(X,y)
+    else:
+        raise NotImplementedError
+    
+
+from Ghypeddings.anomaly_detection import *
+def get_anomaly_detection_algorithm(algorithm,X,y):
+    if(algorithm == 'isolation_forest'):
+        return isolation_forest(X,y)
+    elif(algorithm == 'one_class_svm'):
+        return one_class_svm(X,y)
+    elif(algorithm == 'dbscan'):
+        return dbscan(X,y)
+    elif(algorithm == 'kmeans'):
+        return kmeans(X,y)
+    elif(algorithm == 'local_outlier_factor'):
+        return local_outlier_factor(X,y)
+    else:
+        raise NotImplementedError
\ No newline at end of file
diff --git a/HGCN/.gitignore b/HGCN/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c
--- /dev/null
+++ b/HGCN/.gitignore
@@ -0,0 +1 @@
+__pycache__/
\ No newline at end of file
diff --git a/HGCN/__init__.py b/HGCN/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9
--- /dev/null
+++ b/HGCN/__init__.py
@@ -0,0 +1,2 @@
+from __future__ import print_function
+from __future__ import division
diff --git a/HGCN/hgcn.py b/HGCN/hgcn.py
new file mode 100644
index 0000000000000000000000000000000000000000..84c735f9a3aae0aeb53bf20e2c72fc1ad8762f53
--- /dev/null
+++ b/HGCN/hgcn.py
@@ -0,0 +1,165 @@
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import time
+
+import numpy as np
+import Ghypeddings.HGCN.optimizers as optimizers
+import torch
+from Ghypeddings.HGCN.models.base_models import NCModel
+from Ghypeddings.HGCN.utils.data_utils import process_data
+from Ghypeddings.HGCN.utils.train_utils import format_metrics
+from Ghypeddings.HGCN.utils.train_utils import create_args
+import warnings
+warnings.filterwarnings('ignore')
+
+
+class HGCN:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='relu',
+                select_manifold='Euclidean', #Euclidean , Hyperboloid
+                grad_clip=1.0,
+                optimizer='Adam', #Adam , RiemannianAdam
+                weight_decay=0.01,
+                lr=0.1, #0.009
+                gamma=0.5,
+                lr_reduce_freq=200,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.15,
+                test_prop=0.15,
+                double_precision=0,
+                dropout=0.1,
+                use_att= True,
+                alpha=0.5,
+                local_agg = False,
+                normalize_adj=False,
+                normalize_feats=True
+                ):
+        
+        self.args = create_args(dim,c,num_layers,bias,act,select_manifold,grad_clip,optimizer,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,use_att,alpha,local_agg,normalize_adj,normalize_feats)
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        np.random.seed(self.args.seed)
+        torch.manual_seed(self.args.seed)
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+        if not self.args.lr_reduce_freq:
+            self.args.lr_reduce_freq = self.args.epochs
+        self.model = NCModel(self.args)
+        self.optimizer = getattr(optimizers, self.args.optimizer)(params=self.model.parameters(), lr=self.args.lr,weight_decay=self.args.weight_decay)
+        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
+            self.optimizer,
+            step_size=int(self.args.lr_reduce_freq),
+            gamma=float(self.args.gamma)
+        )
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+        self.best_emb = None
+
+    def fit(self):
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(f'Using: {self.args.device}')
+        logging.info(str(self.model))
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        train_losses = []
+        val_losses = []
+
+        for epoch in range(self.args.epochs):
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm'])
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.lr_scheduler.step()
+
+            train_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(train_losses[0])
+            elif (best_losses[-1] > train_losses[-1]):
+                best_losses.append(train_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                    'lr: {}'.format(self.lr_scheduler.get_lr()[0]),
+                                    format_metrics(train_metrics, 'train'),
+                                    'time: {:.4f}s'.format(time.time() - t)
+                                    ]))
+                
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm'])
+                val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                val_losses.append(val_metrics['loss'].item())
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                if self.model.has_improved(best_val_metrics, val_metrics):
+                    self.best_emb = embeddings
+                    best_val_metrics = val_metrics
+                    counter = 0
+                else:
+                    counter += 1
+                    if counter == self.args.patience and epoch > self.args.min_epochs:
+                        logging.info("Early stopping")
+                        break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+        return {'train':train_losses,'best':best_losses,'val':val_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
+
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'], self.data['adj_train_norm'])
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
+
+    def save_embeddings(self):
+        c = self.model.decoder.c
+        tb_embeddings_euc = self.manifold.proj_tan0(self.model.manifold.logmap0(self.best_emb,c),c)
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'hgcn_embeddings_hyp.csv')
+        euc_file_path = os.path.join(os.getcwd(),'hgcn_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/HGCN/layers/__init__.py b/HGCN/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCN/layers/att_layers.py b/HGCN/layers/att_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..8414d8d48dffa4dca79e38ebeacc54f480b4def1
--- /dev/null
+++ b/HGCN/layers/att_layers.py
@@ -0,0 +1,144 @@
+"""Attention layers (some modules are copied from https://github.com/Diego999/pyGAT."""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class DenseAtt(nn.Module):
+    def __init__(self, in_features, dropout):
+        super(DenseAtt, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(2 * in_features, 1, bias=True)
+        self.in_features = in_features
+
+    def forward (self, x, adj):
+        n = x.size(0)
+        # n x 1 x d
+        x_left = torch.unsqueeze(x, 1)
+        x_left = x_left.expand(-1, n, -1)
+        # 1 x n x d
+        x_right = torch.unsqueeze(x, 0)
+        x_right = x_right.expand(n, -1, -1)
+
+        x_cat = torch.cat((x_left, x_right), dim=2)
+        att_adj = self.linear(x_cat).squeeze()
+        att_adj = F.sigmoid(att_adj)
+        att_adj = torch.mul(adj.to_dense(), att_adj)
+        return att_adj
+
+
+class SpecialSpmmFunction(torch.autograd.Function):
+    """Special function for only sparse region backpropataion layer."""
+
+    @staticmethod
+    def forward(ctx, indices, values, shape, b):
+        assert indices.requires_grad == False
+        a = torch.sparse_coo_tensor(indices, values, shape)
+        ctx.save_for_backward(a, b)
+        ctx.N = shape[0]
+        return torch.matmul(a, b)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        a, b = ctx.saved_tensors
+        grad_values = grad_b = None
+        if ctx.needs_input_grad[1]:
+            grad_a_dense = grad_output.matmul(b.t())
+            edge_idx = a._indices()[0, :] * ctx.N + a._indices()[1, :]
+            grad_values = grad_a_dense.view(-1)[edge_idx]
+        if ctx.needs_input_grad[3]:
+            grad_b = a.t().matmul(grad_output)
+        return None, grad_values, None, grad_b
+
+
+class SpecialSpmm(nn.Module):
+    def forward(self, indices, values, shape, b):
+        return SpecialSpmmFunction.apply(indices, values, shape, b)
+
+
+class SpGraphAttentionLayer(nn.Module):
+    """
+    Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
+    """
+
+    def __init__(self, in_features, out_features, dropout, alpha, activation):
+        super(SpGraphAttentionLayer, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.alpha = alpha
+
+        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
+        nn.init.xavier_normal_(self.W.data, gain=1.414)
+
+        self.a = nn.Parameter(torch.zeros(size=(1, 2 * out_features)))
+        nn.init.xavier_normal_(self.a.data, gain=1.414)
+
+        self.dropout = nn.Dropout(dropout)
+        self.leakyrelu = nn.LeakyReLU(self.alpha)
+        self.special_spmm = SpecialSpmm()
+        self.act = activation
+
+    def forward(self, input, adj):
+        N = input.size()[0]
+        edge = adj._indices()
+
+        h = torch.mm(input, self.W)
+        # h: N x out
+        assert not torch.isnan(h).any()
+
+        # Self-attention on the nodes - Shared attention mechanism
+        edge_h = torch.cat((h[edge[0, :], :], h[edge[1, :], :]), dim=1).t()
+        # edge: 2*D x E
+
+        edge_e = torch.exp(-self.leakyrelu(self.a.mm(edge_h).squeeze()))
+        assert not torch.isnan(edge_e).any()
+        # edge_e: E
+
+        ones = torch.ones(size=(N, 1))
+        if h.is_cuda:
+            ones = ones.cuda()
+        e_rowsum = self.special_spmm(edge, edge_e, torch.Size([N, N]), ones)
+        # e_rowsum: N x 1
+
+        edge_e = self.dropout(edge_e)
+        # edge_e: E
+
+        h_prime = self.special_spmm(edge, edge_e, torch.Size([N, N]), h)
+        assert not torch.isnan(h_prime).any()
+        # h_prime: N x out
+
+        h_prime = h_prime.div(e_rowsum)
+        # h_prime: N x out
+        assert not torch.isnan(h_prime).any()
+        return self.act(h_prime)
+
+    def __repr__(self):
+        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
+
+
+class GraphAttentionLayer(nn.Module):
+    def __init__(self, input_dim, output_dim, dropout, activation, alpha, nheads, concat):
+        """Sparse version of GAT."""
+        super(GraphAttentionLayer, self).__init__()
+        self.dropout = dropout
+        self.output_dim = output_dim
+        self.attentions = [SpGraphAttentionLayer(input_dim,
+                                                 output_dim,
+                                                 dropout=dropout,
+                                                 alpha=alpha,
+                                                 activation=activation) for _ in range(nheads)]
+        self.concat = concat
+        for i, attention in enumerate(self.attentions):
+            self.add_module('attention_{}'.format(i), attention)
+
+    def forward(self, input):
+        x, adj = input
+        x = F.dropout(x, self.dropout, training=self.training)
+        if self.concat:
+            h = torch.cat([att(x, adj) for att in self.attentions], dim=1)
+        else:
+            h_cat = torch.cat([att(x, adj).view((-1, self.output_dim, 1)) for att in self.attentions], dim=2)
+            h = torch.mean(h_cat, dim=2)
+        h = F.dropout(h, self.dropout, training=self.training)
+        return (h, adj)
diff --git a/HGCN/layers/hyp_layers.py b/HGCN/layers/hyp_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..0913411c986dbd2b70f2f8e8a5ce216e816cb2be
--- /dev/null
+++ b/HGCN/layers/hyp_layers.py
@@ -0,0 +1,158 @@
+"""Hyperbolic layers."""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from torch.nn.modules.module import Module
+
+from Ghypeddings.HGCN.layers.att_layers import DenseAtt
+
+
+def get_dim_act_curv(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+    dims = [args.feat_dim] + ([args.dim] * (args.num_layers - 1))
+    n_curvatures = args.num_layers - 1
+    if args.c is None:
+        # create list of trainable curvature parameters
+        curvatures = [nn.Parameter(torch.Tensor([1.])) for _ in range(n_curvatures)]
+    else:
+        # fixed curvature
+        curvatures = [torch.tensor([args.c]) for _ in range(n_curvatures)]
+        if not args.cuda == -1:
+            curvatures = [curv.to(args.device) for curv in curvatures]
+    return dims, acts, curvatures
+
+
+class HyperbolicGraphConvolution(nn.Module):
+    """
+    Hyperbolic graph convolution layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c_in, c_out, dropout, act, use_bias, use_att, local_agg):
+        super(HyperbolicGraphConvolution, self).__init__()
+        self.linear = HypLinear(manifold, in_features, out_features, c_in, dropout, use_bias)
+        self.agg = HypAgg(manifold, c_in, out_features, dropout, use_att, local_agg)
+        self.hyp_act = HypAct(manifold, c_in, c_out, act)
+
+    def forward(self, input):
+        x, adj = input
+        h = self.linear.forward(x)
+        h = self.agg.forward(h, adj)
+        h = self.hyp_act.forward(h)
+        output = h, adj
+        return output
+
+
+class HypLinear(nn.Module):
+    """
+    Hyperbolic linear layer.
+    """
+
+    def __init__(self, manifold, in_features, out_features, c, dropout, use_bias):
+        super(HypLinear, self).__init__()
+        self.manifold = manifold
+        self.in_features = in_features
+        self.out_features = out_features
+        self.c = c
+        self.dropout = dropout
+        self.use_bias = use_bias
+        self.bias = nn.Parameter(torch.Tensor(out_features))
+        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        init.xavier_uniform_(self.weight, gain=math.sqrt(2))
+        init.constant_(self.bias, 0)
+
+    def forward(self, x):
+        drop_weight = F.dropout(self.weight, self.dropout, training=self.training)
+        mv = self.manifold.mobius_matvec(drop_weight, x, self.c)
+        res = self.manifold.proj(mv, self.c)
+        if self.use_bias:
+            bias = self.manifold.proj_tan0(self.bias.view(1, -1), self.c)
+            hyp_bias = self.manifold.expmap0(bias, self.c)
+            hyp_bias = self.manifold.proj(hyp_bias, self.c)
+            res = self.manifold.mobius_add(res, hyp_bias, c=self.c)
+            res = self.manifold.proj(res, self.c)
+        return res
+
+    def extra_repr(self):
+        return 'in_features={}, out_features={}, c={}'.format(
+            self.in_features, self.out_features, self.c
+        )
+
+
+class HypAgg(Module):
+    """
+    Hyperbolic aggregation layer.
+    """
+
+    def __init__(self, manifold, c, in_features, dropout, use_att, local_agg):
+        super(HypAgg, self).__init__()
+        self.manifold = manifold
+        self.c = c
+
+        self.in_features = in_features
+        self.dropout = dropout
+        self.local_agg = local_agg
+        self.use_att = use_att
+        if self.use_att:
+            self.att = DenseAtt(in_features, dropout)
+
+    def forward(self, x, adj):
+        x_tangent = self.manifold.logmap0(x, c=self.c)
+        if self.use_att:
+            if self.local_agg:
+                x_local_tangent = []
+                for i in range(x.size(0)):
+                    x_local_tangent.append(self.manifold.logmap(x[i], x, c=self.c))
+                x_local_tangent = torch.stack(x_local_tangent, dim=0)
+                adj_att = self.att(x_tangent, adj)
+                att_rep = adj_att.unsqueeze(-1) * x_local_tangent
+                support_t = torch.sum(adj_att.unsqueeze(-1) * x_local_tangent, dim=1)
+                output = self.manifold.proj(self.manifold.expmap(x, support_t, c=self.c), c=self.c)
+                return output
+            else:
+                adj_att = self.att(x_tangent, adj)
+                support_t = torch.matmul(adj_att, x_tangent)
+        else:
+            support_t = torch.spmm(adj, x_tangent)
+        output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+        return output
+
+    def extra_repr(self):
+        return 'c={}'.format(self.c)
+
+
+class HypAct(Module):
+    """
+    Hyperbolic activation layer.
+    """
+
+    def __init__(self, manifold, c_in, c_out, act):
+        super(HypAct, self).__init__()
+        self.manifold = manifold
+        self.c_in = c_in
+        self.c_out = c_out
+        self.act = act
+
+    def forward(self, x):
+        xt = self.act(self.manifold.logmap0(x, c=self.c_in))
+        xt = self.manifold.proj_tan0(xt, c=self.c_out)
+        return self.manifold.proj(self.manifold.expmap0(xt, c=self.c_out), c=self.c_out)
+
+    def extra_repr(self):
+        return 'c_in={}, c_out={}'.format(
+            self.c_in, self.c_out
+        )
diff --git a/HGCN/layers/layers.py b/HGCN/layers/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2eeb70eafdda7141eb6047a7ffbb37c14f6a910
--- /dev/null
+++ b/HGCN/layers/layers.py
@@ -0,0 +1,71 @@
+"""Euclidean layers."""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
+
+
+
+def get_dim_act(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+    dims = [args.feat_dim] + ([args.dim] * (args.num_layers - 1))
+    return dims, acts
+
+
+class GraphConvolution(Module):
+    """
+    Simple GCN layer.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(GraphConvolution, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+        self.in_features = in_features
+        self.out_features = out_features
+
+    def forward(self, input):
+        x, adj = input
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        if adj.is_sparse:
+            support = torch.spmm(adj, hidden)
+        else:
+            support = torch.mm(adj, hidden)
+        output = self.act(support), adj
+        return output
+
+    def extra_repr(self):
+        return 'input_dim={}, output_dim={}'.format(
+                self.in_features, self.out_features
+        )
diff --git a/HGCN/manifolds/__init__.py b/HGCN/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd4b8d81f23de1d855c70804d1e1fb9441cdc960
--- /dev/null
+++ b/HGCN/manifolds/__init__.py
@@ -0,0 +1,4 @@
+from Ghypeddings.HGCN.manifolds.base import ManifoldParameter
+from Ghypeddings.HGCN.manifolds.hyperboloid import Hyperboloid
+from Ghypeddings.HGCN.manifolds.euclidean import Euclidean
+from Ghypeddings.HGCN.manifolds.poincare import PoincareBall
diff --git a/HGCN/manifolds/base.py b/HGCN/manifolds/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..925d4a6b2a59dae47a3a8ca33a7dcdcb20e0f08e
--- /dev/null
+++ b/HGCN/manifolds/base.py
@@ -0,0 +1,88 @@
+"""Base manifold."""
+
+from torch.nn import Parameter
+
+
+class Manifold(object):
+    """
+    Abstract class to define operations on a manifold.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.eps = 10e-8
+
+    def sqdist(self, p1, p2, c):
+        """Squared distance between pairs of points."""
+        raise NotImplementedError
+
+    def egrad2rgrad(self, p, dp, c):
+        """Converts Euclidean Gradient to Riemannian Gradients."""
+        raise NotImplementedError
+
+    def proj(self, p, c):
+        """Projects point p on the manifold."""
+        raise NotImplementedError
+
+    def proj_tan(self, u, p, c):
+        """Projects u on the tangent space of p."""
+        raise NotImplementedError
+
+    def proj_tan0(self, u, c):
+        """Projects u on the tangent space of the origin."""
+        raise NotImplementedError
+
+    def expmap(self, u, p, c):
+        """Exponential map of u at point p."""
+        raise NotImplementedError
+
+    def logmap(self, p1, p2, c):
+        """Logarithmic map of point p1 at point p2."""
+        raise NotImplementedError
+
+    def expmap0(self, u, c):
+        """Exponential map of u at the origin."""
+        raise NotImplementedError
+
+    def logmap0(self, p, c):
+        """Logarithmic map of point p at the origin."""
+        raise NotImplementedError
+
+    def mobius_add(self, x, y, c, dim=-1):
+        """Adds points x and y."""
+        raise NotImplementedError
+
+    def mobius_matvec(self, m, x, c):
+        """Performs hyperboic martrix-vector multiplication."""
+        raise NotImplementedError
+
+    def init_weights(self, w, c, irange=1e-5):
+        """Initializes random weigths on the manifold."""
+        raise NotImplementedError
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        """Inner product for tangent vectors at point x."""
+        raise NotImplementedError
+
+    def ptransp(self, x, y, u, c):
+        """Parallel transport of u from x to y."""
+        raise NotImplementedError
+
+    def ptransp0(self, x, u, c):
+        """Parallel transport of u from the origin to y."""
+        raise NotImplementedError
+
+
+class ManifoldParameter(Parameter):
+    """
+    Subclass of torch.nn.Parameter for Riemannian optimization.
+    """
+    def __new__(cls, data, requires_grad, manifold, c):
+        return Parameter.__new__(cls, data, requires_grad)
+
+    def __init__(self, data, requires_grad, manifold, c):
+        self.c = c
+        self.manifold = manifold
+
+    def __repr__(self):
+        return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__()
diff --git a/HGCN/manifolds/euclidean.py b/HGCN/manifolds/euclidean.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ec5e38b7ff2c01ef8fc33337d26a08dd9d3cfa9
--- /dev/null
+++ b/HGCN/manifolds/euclidean.py
@@ -0,0 +1,67 @@
+"""Euclidean manifold."""
+
+from Ghypeddings.HGCN.manifolds.base import Manifold
+
+
+class Euclidean(Manifold):
+    """
+    Euclidean Manifold class.
+    """
+
+    def __init__(self):
+        super(Euclidean, self).__init__()
+        self.name = 'Euclidean'
+
+    def normalize(self, p):
+        dim = p.size(-1)
+        p.view(-1, dim).renorm_(2, 0, 1.)
+        return p
+
+    def sqdist(self, p1, p2, c):
+        return (p1 - p2).pow(2).sum(dim=-1)
+
+    def egrad2rgrad(self, p, dp, c):
+        return dp
+
+    def proj(self, p, c):
+        return p
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        return p + u
+
+    def logmap(self, p1, p2, c):
+        return p2 - p1
+
+    def expmap0(self, u, c):
+        return u
+
+    def logmap0(self, p, c):
+        return p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        return x + y
+
+    def mobius_matvec(self, m, x, c):
+        mx = x @ m.transpose(-1, -2)
+        return mx
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        return (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, v, c):
+        return v
+
+    def ptransp0(self, x, v, c):
+        return x + v
diff --git a/HGCN/manifolds/hyperboloid.py b/HGCN/manifolds/hyperboloid.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0147001c24330e86264bbb009ff2a6a2c8986e0
--- /dev/null
+++ b/HGCN/manifolds/hyperboloid.py
@@ -0,0 +1,155 @@
+"""Hyperboloid manifold."""
+
+import torch
+
+from Ghypeddings.HGCN.manifolds.base import Manifold
+from Ghypeddings.HGCN.utils.math_utils import arcosh, cosh, sinh 
+
+
+class Hyperboloid(Manifold):
+    """
+    Hyperboloid manifold class.
+
+    We use the following convention: -x0^2 + x1^2 + ... + xd^2 = -K
+
+    c = 1 / K is the hyperbolic curvature. 
+    """
+
+    def __init__(self):
+        super(Hyperboloid, self).__init__()
+        self.name = 'Hyperboloid'
+        self.eps = {torch.float32: 1e-7, torch.float64: 1e-15}
+        self.min_norm = 1e-15
+        self.max_norm = 1e6
+
+    def minkowski_dot(self, x, y, keepdim=True):
+        res = torch.sum(x * y, dim=-1) - 2 * x[..., 0] * y[..., 0]
+        if keepdim:
+            res = res.view(res.shape + (1,))
+        return res
+
+    def minkowski_norm(self, u, keepdim=True):
+        dot = self.minkowski_dot(u, u, keepdim=keepdim)
+        return torch.sqrt(torch.clamp(dot, min=self.eps[u.dtype]))
+
+    def sqdist(self, x, y, c):
+        K = 1. / c
+        prod = self.minkowski_dot(x, y)
+        theta = torch.clamp(-prod / K, min=1.0 + self.eps[x.dtype])
+        sqdist = K * arcosh(theta) ** 2
+        # clamp distance to avoid nans in Fermi-Dirac decoder
+        return torch.clamp(sqdist, max=50.0)
+
+    def proj(self, x, c):
+        K = 1. / c
+        d = x.size(-1) - 1
+        y = x.narrow(-1, 1, d)
+        y_sqnorm = torch.norm(y, p=2, dim=1, keepdim=True) ** 2 
+        mask = torch.ones_like(x)
+        mask[:, 0] = 0
+        vals = torch.zeros_like(x)
+        vals[:, 0:1] = torch.sqrt(torch.clamp(K + y_sqnorm, min=self.eps[x.dtype]))
+        return vals + mask * x
+
+    def proj_tan(self, u, x, c):
+        K = 1. / c
+        d = x.size(-1) - 1
+        ux = torch.sum(x.narrow(-1, 1, d) * u.narrow(-1, 1, d), dim=1, keepdim=True)
+        mask = torch.ones_like(u)
+        mask[:, 0] = 0
+        vals = torch.zeros_like(u)
+        if(len(x.size()) == 1):
+            x = x.unsqueeze(0)
+        vals[:, 0:1] = ux / torch.clamp(x[:, 0:1], min=self.eps[x.dtype])
+        return vals + mask * u
+
+    def proj_tan0(self, u, c):
+        narrowed = u.narrow(-1, 0, 1)
+        vals = torch.zeros_like(u)
+        vals[:, 0:1] = narrowed
+        return u - vals
+
+    def expmap(self, u, x, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        normu = self.minkowski_norm(u)
+        normu = torch.clamp(normu, max=self.max_norm)
+        theta = normu / sqrtK
+        theta = torch.clamp(theta, min=self.min_norm)
+        result = cosh(theta) * x + sinh(theta) * u / theta
+        return self.proj(result, c)
+        
+    def logmap(self, x, y, c):
+        K = 1. / c
+        xy = torch.clamp(self.minkowski_dot(x, y) + K, max=-self.eps[x.dtype]) - K
+        u = y + xy * x * c
+        normu = self.minkowski_norm(u)
+        normu = torch.clamp(normu, min=self.min_norm)
+        dist = self.sqdist(x, y, c) ** 0.5
+        result = dist * u / normu
+        return self.proj_tan(result, x, c)
+
+    def expmap0(self, u, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        d = u.size(-1) - 1
+        x = u.narrow(-1, 1, d).view(-1, d)
+        x_norm = torch.norm(x, p=2, dim=1, keepdim=True)
+        x_norm = torch.clamp(x_norm, min=self.min_norm)
+        theta = x_norm / sqrtK
+        res = torch.ones_like(u)
+        res[:, 0:1] = sqrtK * cosh(theta)
+        res[:, 1:] = sqrtK * sinh(theta) * x / x_norm
+        return self.proj(res, c)
+
+    def logmap0(self, x, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        d = x.size(-1) - 1
+        y = x.narrow(-1, 1, d).view(-1, d)
+        y_norm = torch.norm(y, p=2, dim=1, keepdim=True)
+        y_norm = torch.clamp(y_norm, min=self.min_norm)
+        res = torch.zeros_like(x)
+        theta = torch.clamp(x[:, 0:1] / sqrtK, min=1.0 + self.eps[x.dtype])
+        res[:, 1:] = sqrtK * arcosh(theta) * y / y_norm
+        return res
+
+    def mobius_add(self, x, y, c):
+        u = self.logmap0(y, c)
+        v = self.ptransp0(x, u, c)
+        return self.expmap(v, x, c)
+
+    def mobius_matvec(self, m, x, c):
+        u = self.logmap0(x, c)
+        mu = u @ m.transpose(-1, -2)
+        return self.expmap0(mu, c)
+
+    def ptransp(self, x, y, u, c):
+        logxy = self.logmap(x, y, c)
+        logyx = self.logmap(y, x, c)
+        sqdist = torch.clamp(self.sqdist(x, y, c), min=self.min_norm)
+        alpha = self.minkowski_dot(logxy, u) / sqdist
+        res = u - alpha * (logxy + logyx)
+        return self.proj_tan(res, y, c)
+
+    def ptransp0(self, x, u, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        x0 = x.narrow(-1, 0, 1)
+        d = x.size(-1) - 1
+        y = x.narrow(-1, 1, d)
+        y_norm = torch.clamp(torch.norm(y, p=2, dim=1, keepdim=True), min=self.min_norm)
+        y_normalized = y / y_norm
+        v = torch.ones_like(x)
+        v[:, 0:1] = - y_norm 
+        v[:, 1:] = (sqrtK - x0) * y_normalized
+        alpha = torch.sum(y_normalized * u[:, 1:], dim=1, keepdim=True) / sqrtK
+        res = u - alpha * v
+        return self.proj_tan(res, x, c)
+
+    def to_poincare(self, x, c):
+        K = 1. / c
+        sqrtK = K ** 0.5
+        d = x.size(-1) - 1
+        return sqrtK * x.narrow(-1, 1, d) / (x[:, 0:1] + sqrtK)
+
diff --git a/HGCN/manifolds/poincare.py b/HGCN/manifolds/poincare.py
new file mode 100644
index 0000000000000000000000000000000000000000..601b5808980bfbb3dcff40c5354f13a1ca37e67c
--- /dev/null
+++ b/HGCN/manifolds/poincare.py
@@ -0,0 +1,145 @@
+"""Poincare ball manifold."""
+
+import torch
+
+from Ghypeddings.HGCN.manifolds.base import Manifold
+from Ghypeddings.HGCN.utils.math_utils import artanh, tanh
+
+
+class PoincareBall(Manifold):
+    """
+    PoicareBall Manifold class.
+
+    We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c
+
+    Note that 1/sqrt(c) is the Poincare ball radius.
+
+    """
+
+    def __init__(self, ):
+        super(PoincareBall, self).__init__()
+        self.name = 'PoincareBall'
+        self.min_norm = 1e-15
+        self.eps = {torch.float32: 4e-3, torch.float64: 1e-5}
+
+    def sqdist(self, p1, p2, c):
+        sqrt_c = c ** 0.5
+        dist_c = artanh(
+            sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False)
+        )
+        dist = dist_c * 2 / sqrt_c
+        return dist ** 2
+
+    def _lambda_x(self, x, c):
+        x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True)
+        return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm)
+
+    def egrad2rgrad(self, p, dp, c):
+        lambda_p = self._lambda_x(p, c)
+        dp /= lambda_p.pow(2)
+        return dp
+
+    def proj(self, x, c):
+        norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm)
+        maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5)
+        cond = norm > maxnorm
+        projected = x / norm * maxnorm
+        return torch.where(cond, projected, x)
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        sqrt_c = c ** 0.5
+        u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        second_term = (
+                tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm)
+                * u
+                / (sqrt_c * u_norm)
+        )
+        gamma_1 = self.mobius_add(p, second_term, c)
+        return gamma_1
+
+    def logmap(self, p1, p2, c):
+        sub = self.mobius_add(-p1, p2, c)
+        sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        lam = self._lambda_x(p1, c)
+        sqrt_c = c ** 0.5
+        return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm
+
+    def expmap0(self, u, c):
+        sqrt_c = c ** 0.5
+        u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm)
+        gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
+        return gamma_1
+
+    def logmap0(self, p, c):
+        sqrt_c = c ** 0.5
+        p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm
+        return scale * p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        x2 = x.pow(2).sum(dim=dim, keepdim=True)
+        y2 = y.pow(2).sum(dim=dim, keepdim=True)
+        xy = (x * y).sum(dim=dim, keepdim=True)
+        num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y
+        denom = 1 + 2 * c * xy + c ** 2 * x2 * y2
+        return num / denom.clamp_min(self.min_norm)
+
+    def mobius_matvec(self, m, x, c):
+        sqrt_c = c ** 0.5
+        x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        mx = x @ m.transpose(-1, -2)
+        mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c)
+        cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8)
+        res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device)
+        res = torch.where(cond, res_0, res_c)
+        return res
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def _gyration(self, u, v, w, c, dim: int = -1):
+        u2 = u.pow(2).sum(dim=dim, keepdim=True)
+        v2 = v.pow(2).sum(dim=dim, keepdim=True)
+        uv = (u * v).sum(dim=dim, keepdim=True)
+        uw = (u * w).sum(dim=dim, keepdim=True)
+        vw = (v * w).sum(dim=dim, keepdim=True)
+        c2 = c ** 2
+        a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw
+        b = -c2 * vw * u2 - c * uw
+        d = 1 + 2 * c * uv + c2 * u2 * v2
+        return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm)
+
+    def inner(self, x, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        lambda_x = self._lambda_x(x, c)
+        return lambda_x ** 2 * (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def ptransp_(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def ptransp0(self, x, u, c):
+        lambda_x = self._lambda_x(x, c)
+        return 2 * u / lambda_x.clamp_min(self.min_norm)
+
+    def to_hyperboloid(self, x, c):
+        K = 1./ c
+        sqrtK = K ** 0.5
+        sqnorm = torch.norm(x, p=2, dim=1, keepdim=True) ** 2
+        return sqrtK * torch.cat([K + sqnorm, 2 * sqrtK * x], dim=1) / (K - sqnorm)
+
diff --git a/HGCN/models/__init__.py b/HGCN/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCN/models/base_models.py b/HGCN/models/base_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..00f5628c36362d96a94bd153b0e5abfb44ad20ff
--- /dev/null
+++ b/HGCN/models/base_models.py
@@ -0,0 +1,85 @@
+"""Base model class."""
+
+import numpy as np
+from sklearn.metrics import roc_auc_score, average_precision_score
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.HGCN.manifolds as manifolds
+import Ghypeddings.HGCN.models.encoders as encoders
+from Ghypeddings.HGCN.models.decoders import model2decoder
+from Ghypeddings.HGCN.utils.eval_utils import acc_f1
+
+
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.manifold_name = args.select_manifold
+        if args.c is not None:
+            self.c = torch.tensor([args.c])
+            if not args.cuda == -1:
+                self.c = self.c.to(args.device)
+        else:
+            self.c = nn.Parameter(torch.Tensor([1.]))
+        self.manifold = getattr(manifolds, self.manifold_name)()
+        if self.manifold.name == 'Hyperboloid':
+            args.feat_dim = args.feat_dim + 1
+        self.nnodes = args.n_nodes
+        self.encoder = getattr(encoders, args.model)(self.c, args)
+
+    def encode(self, x, adj):
+        if self.manifold.name == 'Hyperboloid':
+            o = torch.zeros_like(x)
+            x = torch.cat([o[:, 0:1], x], dim=1)
+        h = self.encoder.encode(x, adj)
+        return h
+
+    def compute_metrics(self, embeddings, data, split):
+        raise NotImplementedError
+
+    def init_metric_dict(self):
+        raise NotImplementedError
+
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+
+
+class NCModel(BaseModel):
+    """
+    Base model for node classification task.
+    """
+
+    def __init__(self, args):
+        super(NCModel, self).__init__(args)
+        self.decoder = model2decoder(self.c, args)
+        if args.n_classes > 2:
+            self.f1_average = 'micro'
+        else:
+            self.f1_average = 'binary'
+        
+        self.weights = torch.Tensor([1.] * args.n_classes)
+        if not args.cuda == -1:
+            self.weights = self.weights.to(args.device)
+
+    def decode(self, h, adj, idx):
+        output = self.decoder.decode(h, adj)
+        return F.log_softmax(output[idx], dim=1)
+
+    def compute_metrics(self, embeddings, data, split):
+        idx = data[f'idx_{split}']
+        output = self.decode(embeddings, data['adj_train_norm'], idx)
+        loss = F.nll_loss(output, data['labels'][idx], self.weights)
+        acc, f1,recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average)
+        metrics = {'loss': loss, 'acc': acc, 'f1': f1,'recall':recall,'precision':precision,'roc_auc':roc_auc}
+        return metrics
+
+    def init_metric_dict(self):
+        return {'acc': -1, 'f1': -1}
+
+    def has_improved(self, m1, m2):
+        return m1["f1"] < m2["f1"]
diff --git a/HGCN/models/decoders.py b/HGCN/models/decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..f20046bcaca78d98d08a8a94da17f6881347b0d2
--- /dev/null
+++ b/HGCN/models/decoders.py
@@ -0,0 +1,52 @@
+"""Graph decoders."""
+import Ghypeddings.HGCN.manifolds as manifolds
+import torch.nn as nn
+import torch.nn.functional as F
+
+from Ghypeddings.HGCN.layers.layers import Linear
+
+
+class Decoder(nn.Module):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+
+    def decode(self, x, adj):
+        if self.decode_adj:
+            input = (x, adj)
+            probs, _ = self.cls.forward(input)
+        else:
+            probs = self.cls.forward(x)
+        return probs
+
+
+class LinearDecoder(Decoder):
+    """
+    MLP Decoder for Hyperbolic/Euclidean node classification models.
+    """
+
+    def __init__(self, c, args):
+        super(LinearDecoder, self).__init__(c)
+        self.manifold = getattr(manifolds, args.select_manifold)()
+        self.input_dim = args.dim
+        self.output_dim = args.n_classes
+        self.bias = args.bias
+        self.cls = Linear(self.input_dim, self.output_dim, args.dropout, lambda x: x, self.bias)
+        self.decode_adj = False
+
+    def decode(self, x, adj):
+        h = self.manifold.proj_tan0(self.manifold.logmap0(x, c=self.c), c=self.c)
+        return super(LinearDecoder, self).decode(h, adj)
+
+    def extra_repr(self):
+        return 'in_features={}, out_features={}, bias={}, c={}'.format(
+                self.input_dim, self.output_dim, self.bias, self.c
+        )
+
+
+model2decoder = LinearDecoder
+
diff --git a/HGCN/models/encoders.py b/HGCN/models/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..c82c611b2bae12bbf192813e906daa001eac822f
--- /dev/null
+++ b/HGCN/models/encoders.py
@@ -0,0 +1,99 @@
+"""Graph encoders."""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.HGCN.manifolds as manifolds
+import Ghypeddings.HGCN.layers.hyp_layers as hyp_layers
+import Ghypeddings.HGCN.utils.math_utils as pmath
+
+from Ghypeddings.HGCN.layers.layers import GraphConvolution, Linear, get_dim_act
+from Ghypeddings.HGCN.layers.att_layers import GraphAttentionLayer
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x, adj):
+        if self.encode_graph:
+            input = (x, adj)
+            output, _ = self.layers.forward(input)
+        else:
+            output = self.layers.forward(x)
+        return output
+
+class GCN(Encoder):
+    """
+    Graph Convolution Networks.
+    """
+
+    def __init__(self, c, args):
+        super(GCN, self).__init__(c)
+        assert args.num_layers > 0
+        dims, acts = get_dim_act(args)
+        gc_layers = []
+        for i in range(len(dims) - 1):
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            gc_layers.append(GraphConvolution(in_dim, out_dim, args.dropout, act, args.bias))
+        self.layers = nn.Sequential(*gc_layers)
+        self.encode_graph = True
+
+class GAT(Encoder):
+    """
+    Graph Attention Networks.
+    """
+
+    def __init__(self, c, args):
+        super(GAT, self).__init__(c)
+        assert args.num_layers > 0
+        dims, acts = get_dim_act(args)
+        gat_layers = []
+        for i in range(len(dims) - 1):
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            assert dims[i + 1] % args.n_heads == 0
+            out_dim = dims[i + 1] // args.n_heads
+            concat = True
+            gat_layers.append(
+                    GraphAttentionLayer(in_dim, out_dim, args.dropout, act, args.alpha, args.n_heads, concat))
+        self.layers = nn.Sequential(*gat_layers)
+        self.encode_graph = True
+
+
+class HGCN(Encoder):
+    """
+    Hyperbolic-GCN.
+    """
+
+    def __init__(self, c, args):
+        super(HGCN, self).__init__(c)
+        self.manifold = getattr(manifolds, args.select_manifold)()
+        assert args.num_layers > 1
+        dims, acts, self.curvatures = hyp_layers.get_dim_act_curv(args)
+        self.curvatures.append(self.c)
+        hgc_layers = []
+        for i in range(len(dims) - 1):
+            c_in, c_out = self.curvatures[i], self.curvatures[i + 1]
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            hgc_layers.append(
+                    hyp_layers.HyperbolicGraphConvolution(
+                            self.manifold, in_dim, out_dim, c_in, c_out, args.dropout, act, args.bias, args.use_att, args.local_agg
+                    )
+            )
+        self.layers = nn.Sequential(*hgc_layers)
+        self.encode_graph = True
+
+    def encode(self, x, adj):
+        x_tan = self.manifold.proj_tan0(x, self.curvatures[0])
+        x_hyp = self.manifold.expmap0(x_tan, c=self.curvatures[0])
+        x_hyp = self.manifold.proj(x_hyp, c=self.curvatures[0])
+        return super(HGCN, self).encode(x_hyp, adj)
diff --git a/HGCN/optimizers/__init__.py b/HGCN/optimizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..411e319d0d10a157da5a9e05a85f468983dcb4be
--- /dev/null
+++ b/HGCN/optimizers/__init__.py
@@ -0,0 +1,2 @@
+from torch.optim import Adam
+from Ghypeddings.HGCN.optimizers.radam import RiemannianAdam
diff --git a/HGCN/optimizers/radam.py b/HGCN/optimizers/radam.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7033935d2acb22bb55679828d15564b17896e34
--- /dev/null
+++ b/HGCN/optimizers/radam.py
@@ -0,0 +1,172 @@
+"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/)."""
+import torch.optim
+from Ghypeddings.HGCN.manifolds import Euclidean, ManifoldParameter
+
+_default_manifold = Euclidean()
+
+class OptimMixin(object):
+    def __init__(self, *args, stabilize=None, **kwargs):
+        self._stabilize = stabilize
+        super().__init__(*args, **kwargs)
+
+    def stabilize_group(self, group):
+        pass
+
+    def stabilize(self):
+        """Stabilize parameters if they are off-manifold due to numerical reasons
+        """
+        for group in self.param_groups:
+            self.stabilize_group(group)
+
+
+def copy_or_set_(dest, source):
+    """
+    A workaround to respect strides of :code:`dest` when copying :code:`source`
+    (https://github.com/geoopt/geoopt/issues/70)
+    Parameters
+    ----------
+    dest : torch.Tensor
+        Destination tensor where to store new data
+    source : torch.Tensor
+        Source data to put in the new tensor
+    Returns
+    -------
+    dest
+        torch.Tensor, modified inplace
+    """
+    if dest.stride() != source.stride():
+        return dest.copy_(source)
+    else:
+        return dest.set_(source)
+
+
+class RiemannianAdam(OptimMixin, torch.optim.Adam):
+    r"""Riemannian Adam with the same API as :class:`torch.optim.Adam`
+    Parameters
+    ----------
+    params : iterable
+        iterable of parameters to optimize or dicts defining
+        parameter groups
+    lr : float (optional)
+        learning rate (default: 1e-3)
+    betas : Tuple[float, float] (optional)
+        coefficients used for computing
+        running averages of gradient and its square (default: (0.9, 0.999))
+    eps : float (optional)
+        term added to the denominator to improve
+        numerical stability (default: 1e-8)
+    weight_decay : float (optional)
+        weight decay (L2 penalty) (default: 0)
+    amsgrad : bool (optional)
+        whether to use the AMSGrad variant of this
+        algorithm from the paper `On the Convergence of Adam and Beyond`_
+        (default: False)
+    Other Parameters
+    ----------------
+    stabilize : int
+        Stabilize parameters if they are off-manifold due to numerical
+        reasons every ``stabilize`` steps (default: ``None`` -- no stabilize)
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments
+        ---------
+        closure : callable (optional)
+            A closure that reevaluates the model
+            and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        with torch.no_grad():
+            for group in self.param_groups:
+                if "step" not in group:
+                    group["step"] = 0
+                betas = group["betas"]
+                weight_decay = group["weight_decay"]
+                eps = group["eps"]
+                learning_rate = group["lr"]
+                amsgrad = group["amsgrad"]
+                for point in group["params"]:
+                    grad = point.grad
+                    if grad is None:
+                        continue
+                    if isinstance(point, (ManifoldParameter)):
+                        manifold = point.manifold
+                        c = point.c
+                    else:
+                        manifold = _default_manifold
+                        c = None
+                    if grad.is_sparse:
+                        raise RuntimeError(
+                                "Riemannian Adam does not support sparse gradients yet (PR is welcome)"
+                        )
+
+                    state = self.state[point]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state["step"] = 0
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(point)
+                        # Exponential moving average of squared gradient values
+                        state["exp_avg_sq"] = torch.zeros_like(point)
+                        if amsgrad:
+                            # Maintains max of all exp. moving avg. of sq. grad. values
+                            state["max_exp_avg_sq"] = torch.zeros_like(point)
+                    # make local variables for easy access
+                    exp_avg = state["exp_avg"]
+                    exp_avg_sq = state["exp_avg_sq"]
+                    # actual step
+                    grad.add_(weight_decay, point)
+                    grad = manifold.egrad2rgrad(point, grad, c)
+                    exp_avg.mul_(betas[0]).add_(1 - betas[0], grad)
+                    exp_avg_sq.mul_(betas[1]).add_(
+                            1 - betas[1], manifold.inner(point, c, grad, keepdim=True)
+                    )
+                    if amsgrad:
+                        max_exp_avg_sq = state["max_exp_avg_sq"]
+                        # Maintains the maximum of all 2nd moment running avg. till now
+                        torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                        # Use the max. for normalizing running avg. of gradient
+                        denom = max_exp_avg_sq.sqrt().add_(eps)
+                    else:
+                        denom = exp_avg_sq.sqrt().add_(eps)
+                    group["step"] += 1
+                    bias_correction1 = 1 - betas[0] ** group["step"]
+                    bias_correction2 = 1 - betas[1] ** group["step"]
+                    step_size = (
+                        learning_rate * bias_correction2 ** 0.5 / bias_correction1
+                    )
+
+                    # copy the state, we need it for retraction
+                    # get the direction for ascend
+                    direction = exp_avg / denom
+                    # transport the exponential averaging to the new point
+                    new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c)
+                    exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c)
+                    # use copy only for user facing point
+                    copy_or_set_(point, new_point)
+                    exp_avg.set_(exp_avg_new)
+
+                    group["step"] += 1
+                if self._stabilize is not None and group["step"] % self._stabilize == 0:
+                    self.stabilize_group(group)
+        return loss
+
+    @torch.no_grad()
+    def stabilize_group(self, group):
+        for p in group["params"]:
+            if not isinstance(p, ManifoldParameter):
+                continue
+            state = self.state[p]
+            if not state:  # due to None grads
+                continue
+            manifold = p.manifold
+            c = p.c
+            exp_avg = state["exp_avg"]
+            copy_or_set_(p, manifold.proj(p, c))
+            exp_avg.set_(manifold.proj_tan(exp_avg, u, c))
diff --git a/HGCN/utils/__init__.py b/HGCN/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGCN/utils/data_utils.py b/HGCN/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5169f98c576ae898769d8d3ffe5f4133283af93f
--- /dev/null
+++ b/HGCN/utils/data_utils.py
@@ -0,0 +1,87 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+import pandas as pd
+
+from sklearn.preprocessing import MinMaxScaler
+
+
+def process_data(args, adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'],args.normalize_adj,args.normalize_feats
+    )
+    return data
+
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj + sp.eye(adj.shape[0]))
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+
+
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+
+def process_data_nc(args,adj,features,labels):
+    idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test}
+    return data
\ No newline at end of file
diff --git a/HGCN/utils/eval_utils.py b/HGCN/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..840a48bf45cc08944925411885698019442f5870
--- /dev/null
+++ b/HGCN/utils/eval_utils.py
@@ -0,0 +1,14 @@
+from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score
+
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels, preds)
+    recall = recall_score(labels, preds)
+    precision = precision_score(labels, preds)
+    roc_auc = roc_auc_score(labels,preds )
+    f1 = f1_score(labels, preds, average=average)
+    return accuracy, f1 , recall,precision,roc_auc
+
diff --git a/HGCN/utils/math_utils.py b/HGCN/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cf278ed7ce59b97f4793f5def3218f3e830d473
--- /dev/null
+++ b/HGCN/utils/math_utils.py
@@ -0,0 +1,69 @@
+"""Math utils functions."""
+
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-15, 1 - 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1.0 + 1e-15)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-15).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
diff --git a/HGCN/utils/train_utils.py b/HGCN/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..296451e4795df11d93ffae8d345da6c1aba740fe
--- /dev/null
+++ b/HGCN/utils/train_utils.py
@@ -0,0 +1,47 @@
+import os
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--c', type=int, default=args[1])
+    parser.add_argument('--num_layers', type=int, default=args[2])
+    parser.add_argument('--bias', type=bool, default=args[3])
+    parser.add_argument('--act', type=str, default=args[4])
+    parser.add_argument('--select_manifold', type=str, default=args[5])
+    parser.add_argument('--grad_clip', type=float, default=args[6])
+    parser.add_argument('--optimizer', type=str, default=args[7])
+    parser.add_argument('--weight_decay', type=float, default=args[8])
+    parser.add_argument('--lr', type=float, default=args[9])
+    parser.add_argument('--gamma', type=float, default=args[10])
+    parser.add_argument('--lr_reduce_freq', type=int, default=args[11])
+    parser.add_argument('--cuda', type=int, default=args[12])
+    parser.add_argument('--epochs', type=int, default=args[13])
+    parser.add_argument('--min_epochs', type=int, default=args[14])
+    parser.add_argument('--patience', type=int, default=args[15])
+    parser.add_argument('--seed', type=int, default=args[16])
+    parser.add_argument('--log_freq', type=int, default=args[17])
+    parser.add_argument('--eval_freq', type=int, default=args[18])
+    parser.add_argument('--val_prop', type=float, default=args[19])
+    parser.add_argument('--test_prop', type=float, default=args[20])
+    parser.add_argument('--double_precision', type=int, default=args[21])
+    parser.add_argument('--dropout', type=float, default=args[22])
+    parser.add_argument('--use_att', type=bool, default=args[23])
+    parser.add_argument('--alpha', type=float, default=args[24])
+    parser.add_argument('--local_agg', type=bool, default=args[25])
+    parser.add_argument('--normalize_adj', type=bool, default=args[26])
+    parser.add_argument('--normalize_feats', type=bool, default=args[27])
+    parser.add_argument('--model', type=str, default='GAT') #GCN, GAT,HGCN
+    parser.add_argument('--n_heads', type=int, default=1) #GCN, GAT,HGCN
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
diff --git a/HGNN/.gitignore b/HGNN/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c
--- /dev/null
+++ b/HGNN/.gitignore
@@ -0,0 +1 @@
+__pycache__/
\ No newline at end of file
diff --git a/HGNN/__init__.py b/HGNN/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGNN/dataset/NodeClassificationDataset.py b/HGNN/dataset/NodeClassificationDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef6a3c813c067a1564277d5509081dbe91489828
--- /dev/null
+++ b/HGNN/dataset/NodeClassificationDataset.py
@@ -0,0 +1,160 @@
+import numpy as np
+import pickle as pkl
+import networkx as nx
+import scipy.sparse as sp
+from scipy.sparse import save_npz, load_npz
+from scipy.sparse.linalg import eigsh
+import sys
+from torch.utils.data import Dataset, DataLoader
+from Ghypeddings.HGNN.utils import *
+import pandas as pd
+from sklearn.preprocessing import MinMaxScaler
+
+def parse_index_file(filename):
+    """Parse index file."""
+    index = []
+    for line in open(filename):
+        index.append(int(line.strip()))
+    return index
+
+def sample_mask(idx, l):
+    """Create mask."""
+    mask = np.zeros(l)
+    mask[idx] = 1
+    return np.array(mask, dtype=np.bool_)
+
+def preprocess_features(features):
+    """Row-normalize feature matrix and convert to tuple representation"""
+    rowsum = np.array(features.sum(1)).astype(float)
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0
+    r_mat_inv = sp.diags(r_inv)
+    features = r_mat_inv.dot(features)
+    return features
+
+class NodeClassificationDataset(Dataset):
+    """
+    Extend the Dataset class for graph datasets
+    """
+    def __init__(self, args, logger,adj,features,labels):
+        self.args = args
+        self.process_data(adj,features,labels)
+
+    def _filling_adjacency_numpy(self,data, N, source_ip_index, destination_ip_index):
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            
+        source_ips = data[:, source_ip_index]
+        destination_ips = data[:, destination_ip_index]
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        adjacency = adjacency - np.eye(N)
+        return adjacency
+    
+    def compact_adjacency(self,adj):
+        max_neighbors = int(np.max(np.sum(adj, axis=1)))
+        shape = (adj.shape[0],max_neighbors)
+        c_adj = np.zeros(shape)
+        c_adj[:,:] = -1
+        indices , neighbors = np.where(adj == 1)
+
+        j=-1
+        l = indices[0]
+        for i,k in zip(indices,neighbors):
+            if i == l:
+                j+=1
+            else:
+                l=i
+                j=0
+            c_adj[i,j]=int(k)
+        return c_adj
+    
+    def compact_weight_matrix(self,c_adj):
+        return np.where(c_adj >= 0, 1, 0)
+    
+    def one_hot_labels(self,y):
+        array  = np.zeros((len(y),2))
+        for i,j in zip(range(len(y)),y):
+            if j:
+                array[i,1]=1
+            else:
+                array[i,0]=1
+
+        return array
+    
+    def split_data(self,labels, test_prop,val_prop):
+        np.random.seed(self.args.seed)
+        #nb_nodes = labels.shape[0]
+        #all_idx = np.arange(nb_nodes)
+        # pos_idx = labels.nonzero()[0]
+        # neg_idx = (1. - labels).nonzero()[0]
+        pos_idx = labels[:,1].nonzero()[0]
+        neg_idx = labels[:,0].nonzero()[0]
+        np.random.shuffle(pos_idx)
+        np.random.shuffle(neg_idx)
+        pos_idx = pos_idx.tolist()
+        neg_idx = neg_idx.tolist()
+        nb_pos_neg = min(len(pos_idx), len(neg_idx))
+        nb_val = round(val_prop * nb_pos_neg)
+        nb_test = round(test_prop * nb_pos_neg)
+        idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                    nb_val + nb_test:]
+        idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                    nb_val + nb_test:]
+        return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+    
+    def process_data(self, adj,features,labels):
+            
+        adj = self.compact_adjacency(adj)
+        weight = self.compact_weight_matrix(adj)
+        adj[adj == -1] = 0
+
+        labels = self.one_hot_labels(labels)
+
+        idx_test, idx_train, idx_val = self.split_data(labels,self.args.test_prop,self.args.val_prop)
+
+        train_mask = sample_mask(idx_train, labels.shape[0])
+        val_mask = sample_mask(idx_val, labels.shape[0])
+        test_mask = sample_mask(idx_test, labels.shape[0])
+
+        y_train = np.zeros(labels.shape)
+        y_val = np.zeros(labels.shape)
+        y_test = np.zeros(labels.shape)
+        y_train[train_mask, :] = labels[train_mask, :]
+        y_val[val_mask, :] = labels[val_mask, :]
+        y_test[test_mask, :] = labels[test_mask, :]
+
+        self.adj = adj
+        self.weight = weight
+
+        self.features = preprocess_features(features) if self.args.normalize_feats else features
+        self.features = features
+        assert np.isnan(features).any()== False
+        self.y_train = y_train
+        self.y_val = y_val
+        self.y_test = y_test
+        self.train_mask = train_mask.astype(int)
+        self.val_mask = val_mask.astype(int)
+        self.test_mask = test_mask.astype(int)
+        self.args.node_num = self.features.shape[0]
+        self.args.input_dim = self.features.shape[1]
+        self.args.num_class = y_train.shape[1]
+    
+
+    def __len__(self):
+        return 1
+
+    def __getitem__(self, idx):
+        return  {
+                  'adj': self.adj,
+                  'weight': self.weight,
+                  'features': self.features,
+                  'y_train' : self.y_train,
+                  'y_val' : self.y_val,
+                  'y_test' : self.y_test,
+                  'train_mask' : self.train_mask,
+                  'val_mask' : self.val_mask,
+                  'test_mask' : self.test_mask,
+                }
diff --git a/HGNN/dataset/__init__.py b/HGNN/dataset/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGNN/gnn/RiemannianGNN.py b/HGNN/gnn/RiemannianGNN.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e89f0ad24542163ba3a0f185eff0ee12f736ffb
--- /dev/null
+++ b/HGNN/gnn/RiemannianGNN.py
@@ -0,0 +1,138 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGNN.utils import *
+
+class RiemannianGNN(nn.Module):
+
+	def __init__(self, args, logger, manifold):
+		super(RiemannianGNN, self).__init__()
+		self.args = args
+		self.logger = logger
+		self.manifold = manifold
+		self.set_up_params()
+		self.activation = get_activation(self.args)
+		self.dropout = nn.Dropout(self.args.dropout)
+
+	def create_params(self):
+		"""
+		create the GNN params for a specific msg type
+		"""
+		msg_weight = []
+		layer = self.args.num_layers if not self.args.tie_weight else 1
+		for _ in range(layer):
+			# weight in euclidean space
+			if self.args.select_manifold == 'poincare':
+				M = th.zeros([self.args.dim, self.args.dim], requires_grad=True)
+			elif self.args.select_manifold == 'lorentz': # one degree of freedom less
+				M = th.zeros([self.args.dim, self.args.dim - 1], requires_grad=True)
+			init_weight(M, self.args.proj_init)
+			M = nn.Parameter(M)
+			self.args.eucl_vars.append(M)
+			msg_weight.append(M)
+		return nn.ParameterList(msg_weight)
+
+	def set_up_params(self):
+		"""
+		set up the params for all message types
+		"""
+		self.type_of_msg = 1
+
+		for i in range(0, self.type_of_msg):
+			setattr(self, "msg_%d_weight" % i, self.create_params())
+
+
+	def retrieve_params(self, weight, step):
+		"""
+		Args:
+			weight: a list of weights
+			step: a certain layer
+		"""
+		if self.args.select_manifold == 'poincare':
+			layer_weight = weight[step]
+		elif self.args.select_manifold == 'lorentz': # Ensure valid tangent vectors for (1, 0, ...)
+			layer_weight = th.cat((th.zeros((self.args.dim, 1)).cuda(), weight[step]), dim=1)
+		return layer_weight
+
+	def apply_activation(self, node_repr):
+		"""
+		apply non-linearity for different manifolds
+		"""
+		if self.args.select_manifold == "poincare":
+			return self.activation(node_repr)
+		elif self.args.select_manifold == "lorentz":
+			return self.manifold.from_poincare_to_lorentz(
+				self.activation(self.manifold.from_lorentz_to_poincare(node_repr))
+			)
+
+	def split_graph_by_negative_edge(self, adj_mat, weight):
+		"""
+		Split the graph according to positive and negative edges.
+		"""
+		mask = weight > 0
+		neg_mask = weight < 0
+
+		pos_adj_mat = adj_mat * mask.long()
+		neg_adj_mat = adj_mat * neg_mask.long()
+		pos_weight = weight * mask.float()
+		neg_weight = -weight * neg_mask.float()
+		return pos_adj_mat, pos_weight, neg_adj_mat, neg_weight
+
+	def split_graph_by_type(self, adj_mat, weight):
+		"""
+		split the graph according to edge type for multi-relational datasets
+		"""
+		multi_relation_adj_mat = []
+		multi_relation_weight = []
+		for relation in range(1, self.args.edge_type):
+			mask = (weight.int() == relation)
+			multi_relation_adj_mat.append(adj_mat * mask.long())
+			multi_relation_weight.append(mask.float())
+		return multi_relation_adj_mat, multi_relation_weight
+
+	def split_input(self, adj_mat, weight):
+		"""
+		Split the adjacency matrix and weight matrix for multi-relational datasets
+		and datasets with enhanced inverse edges, e.g. Ethereum.
+		"""
+		return [adj_mat], [weight]
+
+	def aggregate_msg(self, node_repr, adj_mat, weight, layer_weight, mask):
+		"""
+		message passing for a specific message type.
+		"""
+		node_num, max_neighbor = adj_mat.size(0), adj_mat.size(1)
+		msg = th.mm(node_repr, layer_weight) * mask
+		# select out the neighbors of each node
+		neighbors = th.index_select(msg, 0, adj_mat.view(-1)) # [node_num * max_neighbor, embed_size]
+		neighbors = neighbors.view(node_num, max_neighbor, -1)
+		# weighted sum of the neighbors' representations
+		neighbors = weight.unsqueeze(2) * neighbors # [node_num, max_neighbor, embed_size]
+		combined_msg = th.sum(neighbors, dim=1)  # [node_num, embed_size]
+		return combined_msg
+
+	def get_combined_msg(self, step, node_repr, adj_mat, weight, mask):
+		"""
+		perform message passing in the tangent space of x'
+		"""
+		# use the first layer only if tying weights
+		gnn_layer = 0 if self.args.tie_weight else step
+		combined_msg = None
+		for relation in range(0, self.type_of_msg):
+			layer_weight = self.retrieve_params(getattr(self, "msg_%d_weight" % relation), gnn_layer)
+			aggregated_msg = self.aggregate_msg(node_repr,
+												adj_mat[relation],
+												weight[relation],
+												layer_weight, mask)
+			combined_msg = aggregated_msg if combined_msg is None else (combined_msg + aggregated_msg)
+		return combined_msg
+
+	def forward(self, node_repr, adj_list, weight, mask):
+		adj_list, weight = self.split_input(adj_list, weight)
+		for step in range(self.args.num_layers):
+			node_repr = self.manifold.log_map_zero(node_repr) * mask if step > 0 else node_repr * mask
+			combined_msg = self.get_combined_msg(step, node_repr, adj_list, weight, mask)
+			combined_msg = self.dropout(combined_msg) * mask
+			node_repr = self.manifold.exp_map_zero(combined_msg) * mask
+			node_repr = self.apply_activation(node_repr) * mask
+		return node_repr
diff --git a/HGNN/gnn/__init__.py b/HGNN/gnn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c53e88b5c75fe891a396a8d629fae431dfe63d6
--- /dev/null
+++ b/HGNN/gnn/__init__.py
@@ -0,0 +1 @@
+from Ghypeddings.HGNN.gnn.RiemannianGNN import RiemannianGNN
diff --git a/HGNN/hgnn.py b/HGNN/hgnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8702dec1502ad91bef985a6c3d4045929c17ff5
--- /dev/null
+++ b/HGNN/hgnn.py
@@ -0,0 +1,70 @@
+from Ghypeddings.HGNN.task import *
+from Ghypeddings.HGNN.utils import *
+from Ghypeddings.HGNN.manifold import *
+from Ghypeddings.HGNN.gnn import RiemannianGNN
+
+class HGNN:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                c=None,
+                num_layers=2,
+                bias=True,
+                act='leaky_relu',
+                alpha=0.2,
+                select_manifold='poincare',
+                num_centroid=100,
+                eucl_vars=[],
+                hyp_vars=[],
+                grad_clip=1.0,
+                optimizer='sgd',
+                weight_decay=0.01,
+                lr=0.01,
+                lr_scheduler='cosine',
+                lr_gamma=0.5,
+                lr_hyperbolic=0.01,
+                hyper_optimizer='ramsgrad',
+                proj_init='xavier',
+                tie_weight=True,
+                epochs=50,
+                patience=100,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.15,
+                test_prop=0.15,
+                double_precision=0,
+                dropout=0.01,
+                normalize_adj=False,
+                normalize_feats=True):
+        
+        self.args = create_args(dim,c,num_layers,bias,act,alpha,select_manifold,num_centroid,eucl_vars,hyp_vars,grad_clip,optimizer,weight_decay,lr,lr_scheduler,lr_gamma,lr_hyperbolic,hyper_optimizer,proj_init,tie_weight,epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
+        
+        set_seed(self.args.seed)
+        self.logger = create_logger()
+        if self.args.select_manifold == 'lorentz':
+                    self.args.dim += 1
+        if self.args.select_manifold == 'lorentz':
+            self.manifold= LorentzManifold(self.args, self.logger)
+        elif self.args.select_manifold == 'poincare':
+            self.manifold= PoincareManifold(self.args,self.logger)
+        rgnn = RiemannianGNN(self.args, self.logger, self.manifold)
+        self.gnn = NodeClassificationTask(self.args, self.logger, rgnn, self.manifold, adj,features,labels)
+    
+    def fit(self):
+        return self.gnn.run_gnn()
+
+    def predict(self):
+        return self.gnn.evaluate(self.gnn.loader, 'test', self.gnn.model, self.gnn.loss_function)
+
+    def save_embeddings(self):
+        labels = np.argmax(th.squeeze(self.gnn.labels).numpy(),axis=1)
+        #tb_embeddings_euc = self.gnn.manifold.log_map_zero(self.gnn.early_stop.best_emb)
+        for_classification_hyp = np.hstack((self.gnn.early_stop.best_emb.cpu().detach().numpy(),labels.reshape(-1,1)))
+        #for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),labels.reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'hgnn_embeddings_hyp.csv')
+        #euc_file_path = os.path.join(os.getcwd(),'hgnn_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        #np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/HGNN/hyperbolic_module/CentroidDistance.py b/HGNN/hyperbolic_module/CentroidDistance.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d868cb98ea1d10da977fd7bb8b22d9f0cfb0853
--- /dev/null
+++ b/HGNN/hyperbolic_module/CentroidDistance.py
@@ -0,0 +1,54 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGNN.utils import *
+
+class CentroidDistance(nn.Module):
+	"""
+	Implement a model that calculates the pairwise distances between node representations
+	and centroids
+	"""
+	def __init__(self, args, logger, manifold):
+		super(CentroidDistance, self).__init__()
+		self.args = args
+		self.logger = logger
+		self.manifold = manifold
+
+		# centroid embedding
+		self.centroid_embedding = nn.Embedding(
+			args.num_centroid, args.dim,
+			sparse=False,
+			scale_grad_by_freq=False,
+		)
+		self.manifold.init_embed(self.centroid_embedding)
+		args.hyp_vars.append(self.centroid_embedding)
+
+	def forward(self, node_repr, mask):
+		"""
+		Args:
+			node_repr: [node_num, embed_size]
+			mask: [node_num, 1] 1 denote real node, 0 padded node
+		return:
+			graph_centroid_dist: [1, num_centroid]
+			node_centroid_dist: [1, node_num, num_centroid]
+		"""
+		node_num = node_repr.size(0)
+
+		# broadcast and reshape node_repr to [node_num * num_centroid, embed_size]
+		node_repr =  node_repr.unsqueeze(1).expand(
+												-1,
+												self.args.num_centroid,
+												-1).contiguous().view(-1, self.args.dim)
+
+		# broadcast and reshape centroid embeddings to [node_num * num_centroid, embed_size]
+		centroid_repr = self.centroid_embedding(th.arange(self.args.num_centroid).cuda())
+		centroid_repr = centroid_repr.unsqueeze(0).expand(
+												node_num,
+												-1,
+												-1).contiguous().view(-1, self.args.dim)
+		# get distance
+		node_centroid_dist = self.manifold.distance(node_repr, centroid_repr)
+		node_centroid_dist = node_centroid_dist.view(1, node_num, self.args.num_centroid) * mask
+		# average pooling over nodes
+		graph_centroid_dist = th.sum(node_centroid_dist, dim=1) / th.sum(mask)
+		return graph_centroid_dist, node_centroid_dist
diff --git a/HGNN/hyperbolic_module/PoincareDistance.py b/HGNN/hyperbolic_module/PoincareDistance.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bc423409e286c13382d0a76bd97931f1c840a54
--- /dev/null
+++ b/HGNN/hyperbolic_module/PoincareDistance.py
@@ -0,0 +1,38 @@
+import torch as th
+from torch.autograd import Function
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+
+class PoincareDistance(Function):
+    @staticmethod
+    def grad(x, v, sqnormx, sqnormv, sqdist, eps):
+        alpha = (1 - sqnormx)
+        beta = (1 - sqnormv)
+        z = 1 + 2 * sqdist / (alpha * beta)
+        a = ((sqnormv - 2 * th.sum(x * v, dim=-1) + 1) / th.pow(alpha, 2))\
+            .unsqueeze(-1).expand_as(x)
+        a = a * x - v / alpha.unsqueeze(-1).expand_as(v)
+        z = th.sqrt(th.pow(z, 2) - 1)
+        z = th.clamp(z * beta, min=eps).unsqueeze(-1)
+        return 4 * a / z.expand_as(x)
+
+    @staticmethod
+    def forward(ctx, u, v, eps):
+        squnorm = th.clamp(th.sum(u * u, dim=-1), 0, 1 - eps)
+        sqvnorm = th.clamp(th.sum(v * v, dim=-1), 0, 1 - eps)
+        sqdist = th.sum(th.pow(u - v, 2), dim=-1)
+        ctx.eps = eps
+        ctx.save_for_backward(u, v, squnorm, sqvnorm, sqdist)
+        x = sqdist / ((1 - squnorm) * (1 - sqvnorm)) * 2 + 1
+        # arcosh
+        z = th.sqrt(th.pow(x, 2) - 1)
+        return th.log(x + z)
+
+    @staticmethod
+    def backward(ctx, g):
+        u, v, squnorm, sqvnorm, sqdist = ctx.saved_tensors
+        g = g.unsqueeze(-1)
+        gu = PoincareDistance.grad(u, v, squnorm, sqvnorm, sqdist, ctx.eps)
+        gv = PoincareDistance.grad(v, u, sqvnorm, squnorm, sqdist, ctx.eps)
+        return g.expand_as(gu) * gu, g.expand_as(gv) * gv, None
diff --git a/HGNN/hyperbolic_module/__init__.py b/HGNN/hyperbolic_module/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGNN/manifold/LorentzManifold.py b/HGNN/manifold/LorentzManifold.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6e9bbcc36dc0769a486fa887b472a07f1ab1492
--- /dev/null
+++ b/HGNN/manifold/LorentzManifold.py
@@ -0,0 +1,165 @@
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+from Ghypeddings.HGNN.utils import *
+
+_eps = 1e-10
+
+class LorentzManifold:
+
+    def __init__(self, args, logger, eps=1e-3, norm_clip=1, max_norm=1e3):
+        self.args = args
+        self.logger = logger
+        self.eps = eps
+        self.norm_clip = norm_clip
+        self.max_norm = max_norm
+
+    @staticmethod
+    def ldot(u, v, keepdim=False):
+        """
+        Lorentzian Scalar Product
+        Args:
+            u: [batch_size, d + 1]
+            v: [batch_size, d + 1]
+        Return:
+            keepdim: False [batch_size]
+            keepdim: True  [batch_size, 1]
+        """
+        d = u.size(1) - 1
+        uv = u * v
+        uv = th.cat((-uv.narrow(1, 0, 1), uv.narrow(1, 1, d)), dim=1)
+        return th.sum(uv, dim=1, keepdim=keepdim)
+
+    def from_lorentz_to_poincare(self, x):
+        """
+        Args:
+            u: [batch_size, d + 1]
+        """
+        d = x.size(-1) - 1
+        return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
+
+    def from_poincare_to_lorentz(self, x):
+        """
+        Args:
+            u: [batch_size, d]
+        """
+        x_norm_square = th_dot(x, x)
+        return th.cat((1 + x_norm_square, 2 * x), dim=1) / (1 - x_norm_square + self.eps)
+
+    def distance(self, u, v):
+        d = -LorentzDot.apply(u, v)
+        return Acosh.apply(d, self.eps)
+
+    def normalize(self, w):
+        """
+        Normalize vector such that it is located on the hyperboloid
+        Args:
+            w: [batch_size, d + 1]
+        """
+        d = w.size(-1) - 1
+        narrowed = w.narrow(-1, 1, d)
+        if self.max_norm:
+            narrowed = th.renorm(narrowed.view(-1, d), 2, 0, self.max_norm)
+        first = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
+        first = th.sqrt(first)
+        return th.cat((first, narrowed), dim=1)
+
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+
+    def rgrad(self, p, d_p):
+        """Riemannian gradient for hyperboloid"""
+        u = d_p
+        x = p
+        u.narrow(-1, 0, 1).mul_(-1)
+        u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
+        return d_p
+
+    def exp_map_zero(self, v):
+        zeros = th.zeros_like(v)
+        zeros[:, 0] = 1
+        return self.exp_map_x(zeros, v)
+
+    def exp_map_x(self, p, d_p, d_p_normalize=True, p_normalize=True):
+        if d_p_normalize:
+            d_p = self.normalize_tan(p, d_p)
+
+        ldv = self.ldot(d_p, d_p, keepdim=True)
+        nd_p = th.sqrt(th.clamp(ldv + self.eps, _eps))
+
+        t = th.clamp(nd_p, max=self.norm_clip)
+        newp = (th.cosh(t) * p) + (th.sinh(t) * d_p / nd_p)
+
+        if p_normalize:
+            newp = self.normalize(newp)
+        return newp
+
+    def normalize_tan(self, x_all, v_all):
+        d = v_all.size(1) - 1
+        x = x_all.narrow(1, 1, d)
+        xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
+        tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
+        tmp = th.sqrt(tmp)
+        return th.cat((xv / tmp, v_all.narrow(1, 1, d)), dim=1)
+
+    def log_map_zero(self, y, i=-1):
+        zeros = th.zeros_like(y)
+        zeros[:, 0] = 1
+        return self.log_map_x(zeros, y)
+
+    def log_map_x(self, x, y, normalize=False):
+        """Logarithmic map on the Lorentz Manifold"""
+        xy = self.ldot(x, y).unsqueeze(-1)
+        tmp = th.sqrt(th.clamp(xy * xy - 1 + self.eps, _eps))
+        v = Acosh.apply(-xy, self.eps) / (
+            tmp
+        ) * th.addcmul(y, xy, x)
+        if normalize:
+            result = self.normalize_tan(x, v)
+        else:
+            result = v
+        return result
+
+    def parallel_transport(self, x, y, v):
+        """Parallel transport for hyperboloid"""
+        v_ = v
+        x_ = x
+        y_ = y
+
+        xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
+        vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
+        vnew = v_ + vy / (1 - xy) * (x_ + y_)
+        return vnew
+
+    def metric_tensor(self, x, u, v):
+        return self.ldot(u, v, keepdim=True)
+
+class LorentzDot(Function):
+    @staticmethod
+    def forward(ctx, u, v):
+        ctx.save_for_backward(u, v)
+        return LorentzManifold.ldot(u, v)
+
+    @staticmethod
+    def backward(ctx, g):
+        u, v = ctx.saved_tensors
+        g = g.unsqueeze(-1).expand_as(u).clone()
+        g.narrow(-1, 0, 1).mul_(-1)
+        return g * v, g * u
+
+class Acosh(Function):
+    @staticmethod
+    def forward(ctx, x, eps):
+        z = th.sqrt(th.clamp(x * x - 1 + eps, _eps))
+        ctx.save_for_backward(z)
+        ctx.eps = eps
+        return th.log(x + z)
+
+    @staticmethod
+    def backward(ctx, g):
+        z, = ctx.saved_tensors
+        z = th.clamp(z, min=ctx.eps)
+        z = g / z
+        return z, None
diff --git a/HGNN/manifold/PoincareManifold.py b/HGNN/manifold/PoincareManifold.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a3c97c31eb609a62ffb675fcad4e865ef048fe1
--- /dev/null
+++ b/HGNN/manifold/PoincareManifold.py
@@ -0,0 +1,112 @@
+import torch as th
+import torch.nn as nn
+import numpy as np
+from torch.autograd import Function, Variable
+from Ghypeddings.HGNN.hyperbolic_module.PoincareDistance import PoincareDistance
+from Ghypeddings.HGNN.utils import *
+
+class PoincareManifold:
+
+    def __init__(self, args, logger, EPS=1e-5, PROJ_EPS=1e-5):
+        self.args = args
+        self.logger = logger
+        self.EPS = EPS
+        self.PROJ_EPS = PROJ_EPS
+        self.tanh = nn.Tanh()
+
+    def normalize(self, x):
+        return clip_by_norm(x, (1. - self.PROJ_EPS))
+
+    def init_embed(self, embed, irange=1e-2):
+        embed.weight.data.uniform_(-irange, irange)
+        embed.weight.data.copy_(self.normalize(embed.weight.data))
+
+    def mob_add(self, u, v):
+        """
+        Add two vectors in hyperbolic space
+        """
+        v = v + self.EPS
+        th_dot_u_v = 2. * th_dot(u, v)
+        th_norm_u_sq = th_dot(u, u)
+        th_norm_v_sq = th_dot(v, v)
+        denominator = 1. + th_dot_u_v + th_norm_v_sq * th_norm_u_sq
+        result = (1. + th_dot_u_v + th_norm_v_sq) / (denominator + self.EPS) * u + \
+                 (1. - th_norm_u_sq) / (denominator + self.EPS) * v
+        return self.normalize(result)
+
+    def distance(self, u, v):
+        return PoincareDistance.apply(u, v, 1e-5)
+
+    def lambda_x(self, x):
+        """
+        A conformal factor
+        """
+        return 2. / (1 - th_dot(x, x))
+
+    def log_map_zero(self, y):
+        diff = y + self.EPS
+        norm_diff = th_norm(diff)
+        return 1. / th_atanh(norm_diff, self.EPS) / norm_diff * diff
+
+    def log_map_x(self, x, y):
+        diff = self.mob_add(-x, y) + self.EPS
+        norm_diff = th_norm(diff)
+        lam = self.lambda_x(x)
+        return (( 2. / lam) * th_atanh(norm_diff, self.EPS) / norm_diff) * diff
+
+    def metric_tensor(self, x, u, v):
+        """
+        The metric tensor in hyperbolic space.
+        In-place operations for saving memory. (do not use this function in forward calls)
+        """
+        u_dot_v = th_dot(u, v)
+        lambda_x = self.lambda_x(x)
+        lambda_x *= lambda_x
+        lambda_x *= u_dot_v
+        return lambda_x
+
+    def exp_map_zero(self, v):
+        """
+        Exp map from tangent space of zero to hyperbolic space
+        Args:
+            v: [batch_size, *] in tangent space
+        """
+        v = v + self.EPS
+        norm_v = th_norm(v) # [batch_size, 1]
+        result = self.tanh(norm_v) / (norm_v) * v
+        return self.normalize(result)
+
+    def exp_map_x(self, x, v):
+        """
+        Exp map from tangent space of x to hyperbolic space
+        """
+        v = v + self.EPS # Perturbe v to avoid dealing with v = 0
+        norm_v = th_norm(v)
+        second_term = (self.tanh(self.lambda_x(x) * norm_v / 2) / norm_v) * v
+        return self.normalize(self.mob_add(x, second_term))
+
+    def gyr(self, u, v, w):
+        u_norm = th_dot(u, u)
+        v_norm = th_dot(v, v)
+        u_dot_w = th_dot(u, w)
+        v_dot_w = th_dot(v, w)
+        u_dot_v = th_dot(u, v)
+        A = - u_dot_w * v_norm + v_dot_w + 2 * u_dot_v * v_dot_w
+        B = - v_dot_w * u_norm - u_dot_w
+        D = 1 + 2 * u_dot_v + u_norm * v_norm
+        return w + 2 * (A * u + B * v) / (D + self.EPS)
+
+    def parallel_transport(self, src, dst, v):
+        return self.lambda_x(src) / th.clamp(self.lambda_x(dst), min=self.EPS) * self.gyr(dst, -src, v)
+
+    def rgrad(self, p, d_p):
+        """
+        Function to compute Riemannian gradient from the
+        Euclidean gradient in the Poincare ball.
+        Args:
+            p (Tensor): Current point in the ball
+            d_p (Tensor): Euclidean gradient at p
+        """
+        p_sqnorm = th.sum(p.data ** 2, dim=-1, keepdim=True)
+        d_p = d_p * ((1 - p_sqnorm) ** 2 / 4.0).expand_as(d_p)
+        return d_p
diff --git a/HGNN/manifold/__init__.py b/HGNN/manifold/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ada909b5c9f1f0e0467f0b7b368874b627cf3751
--- /dev/null
+++ b/HGNN/manifold/__init__.py
@@ -0,0 +1,2 @@
+from Ghypeddings.HGNN.manifold.PoincareManifold import *
+from Ghypeddings.HGNN.manifold.LorentzManifold import *
diff --git a/HGNN/optimizer/__init__.py b/HGNN/optimizer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/HGNN/optimizer/ramsgrad.py b/HGNN/optimizer/ramsgrad.py
new file mode 100644
index 0000000000000000000000000000000000000000..c51d3d7cae72d995edf555dd36e2535770e14708
--- /dev/null
+++ b/HGNN/optimizer/ramsgrad.py
@@ -0,0 +1,74 @@
+"""
+Implement a AMSGrad: https://openreview.net/pdf?id=r1eiqi09K7
+"""
+import torch as th
+from torch.optim.optimizer import Optimizer, required
+import os
+import math
+import numpy as np
+
+class RiemannianAMSGrad(Optimizer):
+    """
+    Riemannian AMS gradient descent.
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float): learning rate
+    """
+
+    def __init__(self, args, manifold,params, lr, betas=(0.9, 0.99), eps=1e-8):
+        self.args = args
+        self.manifold = manifold
+        defaults = dict(lr=lr, betas=betas, eps=eps)
+        super(RiemannianAMSGrad, self).__init__(params, defaults)
+
+    def step(self, lr=None):
+        """Performs a single optimization step.
+        Arguments:
+            lr (float, optional): learning rate for the current update.
+        """
+        loss = None
+        with th.no_grad():
+            for group in self.param_groups:
+                for p in group['params']:
+                    if p.grad is None:
+                        continue
+                    grad = p.grad.data
+                    grad = self.manifold.rgrad(p, grad)
+                    if lr is None:
+                        lr = group['lr']
+
+                    state = self.state[p]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state['step'] = 0
+                        state['tau'] = th.zeros_like(p.data)
+                        # Exponential moving average of gradient values
+                        state['exp_avg'] = th.zeros_like(p.data)
+                        # Exponential moving average of squared gradient values
+                        state['exp_avg_sq'] = th.zeros_like(p.data)
+                        # Maintains max of all exp. moving avg. of sq. grad. values
+                        state['max_exp_avg_sq'] = th.zeros_like(p.data)
+
+                    exp_avg, exp_avg_sq, tau, max_exp_avg_sq = \
+                    			state['exp_avg'], state['exp_avg_sq'], state['tau'], state['max_exp_avg_sq']
+
+                    beta1, beta2 = group['betas']
+
+                    state['step'] += 1
+
+                    # Decay the first and second moment running average coefficient
+                    exp_avg.data = beta1 * tau + (1 - beta1) * grad
+                    exp_avg_sq.mul_(beta2).add_(1 - beta2, self.manifold.metric_tensor(p, grad, grad))
+                    th.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                    # Use the max. for normalizing running avg. of gradient
+                    denom = max_exp_avg_sq.sqrt().clamp_(min=group['eps'])
+
+                    step_size = group['lr']
+
+                    p_original = p.clone()
+                    before_proj = self.manifold.exp_map_x(p, (-step_size * exp_avg).div_(denom))
+                    p.data = self.manifold.normalize(before_proj)
+                    tau.data = self.manifold.parallel_transport(p_original, p, exp_avg)
+            return loss
diff --git a/HGNN/optimizer/rsgd.py b/HGNN/optimizer/rsgd.py
new file mode 100644
index 0000000000000000000000000000000000000000..14da1fe8e2f72ae731947ea4ffab607626865c6b
--- /dev/null
+++ b/HGNN/optimizer/rsgd.py
@@ -0,0 +1,43 @@
+import torch as th
+from torch.optim.optimizer import Optimizer, required
+from Ghypeddings.HGNN.utils import *
+import os
+import math
+
+class RiemannianSGD(Optimizer):
+    """Riemannian stochastic gradient descent.
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        rgrad (Function): Function to compute the Riemannian gradient from
+            an Euclidean gradient
+        retraction (Function): Function to update the parameters via a
+            retraction of the Riemannian gradient
+        lr (float): learning rate
+    """
+
+    def __init__(self, args, params, lr):
+        defaults = dict(lr=lr)
+        self.args = args
+        super(RiemannianSGD, self).__init__(params, defaults)
+
+    def step(self, lr=None):
+        """
+        Performs a single optimization step.
+        Arguments:
+            lr (float, optional): learning rate for the current update.
+        """
+        loss = None
+
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad.data
+                d_p = self.args.manifold.rgrad(p, d_p)
+                if lr is None:
+                    lr = group['lr']
+                p.data = self.args.manifold.normalize(
+                            self.args.manifold.exp_map_x(p, -lr * d_p)
+                         )
+        return loss
diff --git a/HGNN/task/BaseTask.py b/HGNN/task/BaseTask.py
new file mode 100644
index 0000000000000000000000000000000000000000..2486800e402e2e1bc8a5b44559f5c259b53b605c
--- /dev/null
+++ b/HGNN/task/BaseTask.py
@@ -0,0 +1,43 @@
+import numpy as np
+from Ghypeddings.HGNN.utils import *
+import torch as th
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader
+import torch.optim as optim
+import torch.distributed as dist
+from torch.utils.data.distributed import DistributedSampler
+
+class BaseTask(object):
+	"""
+	A base class that supports loading datasets, early stop and reporting statistics
+	"""
+	def __init__(self, args, logger, criterion='max'):
+		"""
+		criterion: min/max
+		"""
+		self.args = args
+		self.logger = logger
+		self.early_stop = EarlyStoppingCriterion(self.args.patience, criterion)
+
+	def reset_epoch_stats(self, epoch, prefix):
+		"""
+		prefix: train/dev/test
+		"""
+		self.epoch_stats = {
+			'prefix': prefix,
+			'epoch': epoch,
+			'loss': 0,
+			'num_correct': 0,
+			'num_total': 0,
+		}
+
+	def update_epoch_stats(self, loss, score, label, is_regression=False):
+		with th.no_grad():
+			self.epoch_stats['loss'] += loss.item()
+			self.epoch_stats['num_total'] += label.size(0)
+			if not is_regression:
+				self.epoch_stats['num_correct'] += th.sum(th.eq(th.argmax(score, dim=1), label)).item()
+	
+	def report_best(self):
+		self.logger.info("best val %.6f" 
+			% (self.early_stop.best_dev_score))
diff --git a/HGNN/task/NodeClassification.py b/HGNN/task/NodeClassification.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ef815069ee3d3c815c7dd217749dc310c9e65cf
--- /dev/null
+++ b/HGNN/task/NodeClassification.py
@@ -0,0 +1,44 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGNN.utils import * 
+from Ghypeddings.HGNN.hyperbolic_module.CentroidDistance import CentroidDistance
+
+class NodeClassification(nn.Module):
+
+	def __init__(self, args, logger, rgnn, manifold):
+		super(NodeClassification, self).__init__()
+		self.args = args
+		self.logger = logger
+		self.manifold = manifold
+		self.c = nn.Parameter(th.Tensor([1.]))
+
+		self.feature_linear = nn.Linear(self.args.input_dim,
+										self.args.dim
+							  )
+		nn_init(self.feature_linear, self.args.proj_init)
+		self.args.eucl_vars.append(self.feature_linear)			
+
+		self.distance = CentroidDistance(args, logger, manifold)
+
+		self.rgnn = rgnn
+		self.output_linear = nn.Linear(self.args.num_centroid,
+										self.args.num_class
+							  )
+		nn_init(self.output_linear, self.args.proj_init)
+		self.args.eucl_vars.append(self.output_linear)
+
+		self.log_softmax = nn.LogSoftmax(dim=1)
+		self.activation = get_activation(self.args)
+
+	def forward(self, adj, weight, features):
+
+		adj, weight, features = adj.squeeze(0), weight.squeeze(0), features.squeeze(0)
+		node_repr = self.activation(self.feature_linear(features))
+		assert th.isnan(node_repr).any().item() == False
+		mask = th.ones((self.args.node_num, 1)).cuda() # [node_num, 1]
+		node_repr = self.rgnn(node_repr, adj, weight, mask) # [node_num, embed_size]
+
+		_, node_centroid_sim = self.distance(node_repr, mask) # [1, node_num, num_centroid]
+		class_logit = self.output_linear(node_centroid_sim.squeeze())
+		return self.log_softmax(class_logit) , node_repr
\ No newline at end of file
diff --git a/HGNN/task/NodeClassificationTask.py b/HGNN/task/NodeClassificationTask.py
new file mode 100644
index 0000000000000000000000000000000000000000..7eeca46314e5c6d39b453a60bd1c02fbf021e685
--- /dev/null
+++ b/HGNN/task/NodeClassificationTask.py
@@ -0,0 +1,137 @@
+import torch as th
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.HGNN.utils import * 
+from torch.utils.data import DataLoader
+import torch.optim as optim
+from Ghypeddings.HGNN.task.BaseTask import BaseTask
+import numpy as np
+from Ghypeddings.HGNN.dataset.NodeClassificationDataset import NodeClassificationDataset
+from Ghypeddings.HGNN.task.NodeClassification import NodeClassification
+import time
+from sklearn.metrics import roc_auc_score,accuracy_score,f1_score,precision_score,recall_score
+
+def cross_entropy(log_prob, label, mask):
+	label, mask = label.squeeze(), mask.squeeze()
+	negative_log_prob = -th.sum(label * log_prob, dim=1)
+	return th.sum(mask * negative_log_prob, dim=0) / th.sum(mask)
+
+def get_accuracy(label, log_prob, mask):
+	lab = label.clone()
+	lab = lab.squeeze()
+	mask_copy = mask.clone().cpu().numpy()[0].astype(np.bool_)
+	pred_class = th.argmax(log_prob, dim=1).cpu().numpy()[mask_copy]
+	real_class = th.argmax(lab, dim=1).cpu().numpy()[mask_copy]
+	acc= accuracy_score(y_true=real_class,y_pred=pred_class)
+	f1= f1_score(y_true=real_class,y_pred=pred_class)
+	recall= recall_score(y_true=real_class,y_pred=pred_class)
+	precision= precision_score(y_true=real_class,y_pred=pred_class)
+	print(np.sum(real_class) , np.sum(pred_class))
+	roc_auc = roc_auc_score(real_class,pred_class)	
+	return acc,f1,recall,precision,roc_auc
+
+class NodeClassificationTask(BaseTask):
+
+	def __init__(self, args, logger, rgnn, manifold,adj,features,labels):
+		super(NodeClassificationTask, self).__init__(args, logger, criterion='max')
+		self.args = args
+		self.logger = logger
+		self.manifold = manifold
+		self.hyperbolic = True
+		self.rgnn = rgnn
+		self.loader = self.process_data(adj,features,labels)
+		self.model = NodeClassification(self.args, self.logger, self.rgnn, self.manifold).cuda()
+		self.loss_function = cross_entropy
+
+	def forward(self, model, sample, loss_function):
+		scores , embeddings = model(
+					sample['adj'].cuda().long(),
+			        sample['weight'].cuda().float(),
+			        sample['features'].cuda().float(),
+					)
+		loss = loss_function(scores,
+						 sample['y_train'].cuda().float(), 
+						 sample['train_mask'].cuda().float())
+		return scores, loss , embeddings
+
+	def run_gnn(self):
+		loader = self.loader
+		model = self.model
+		loss_function = self.loss_function
+		
+		self.args.manifold = self.manifold
+		optimizer, lr_scheduler, hyperbolic_optimizer, hyperbolic_lr_scheduler = \
+								set_up_optimizer_scheduler(self.hyperbolic, self.args, model,self.manifold)
+		self.labels = None
+		
+		best_losses = []
+		train_losses = []
+		val_losses = []
+
+		t_total = time.time()
+		for epoch in range(self.args.epochs):
+			model.train()
+			for i, sample in enumerate(loader):
+				model.zero_grad()
+				scores, loss , embeddings = self.forward(model, sample, loss_function)
+				loss.backward()
+				if self.args.grad_clip > 0.0:
+					th.nn.utils.clip_grad_norm_(model.parameters(), self.args.grad_clip)
+				optimizer.step()
+				if self.hyperbolic and len(self.args.hyp_vars) != 0:
+					hyperbolic_optimizer.step()
+				self.labels = sample['y_train']
+				accuracy,f1,recall,precision,roc_auc = get_accuracy(
+									sample['y_train'].cuda().float(), 
+									scores, 
+									sample['train_mask'].cuda().float())
+			
+				train_losses.append(loss.item())
+				if(len(best_losses) == 0):
+					best_losses.append(train_losses[0])
+				elif (best_losses[-1] > train_losses[-1]):
+					best_losses.append(train_losses[-1])
+				else:
+					best_losses.append(best_losses[-1])
+
+				if (epoch + 1) % self.args.log_freq == 0:
+					self.logger.info("%s epoch %d: accuracy %.4f f1 %.4f recall %.4f precision %.4f roc_auc %.4f loss: %.4f \n" % (
+						'train', 
+						epoch, 
+						accuracy,f1,recall,precision,roc_auc,loss.item()))
+					
+				dev_loss, accuracy ,f1,recall,precision,roc_auc  = self.evaluate(loader, 'val', model, loss_function)
+				val_losses.append(dev_loss)
+				lr_scheduler.step()
+
+				if self.hyperbolic and len(self.args.hyp_vars) != 0:
+					hyperbolic_lr_scheduler.step()
+				if not self.early_stop.step(dev_loss, epoch , embeddings):		
+					break
+
+		self.logger.info("Training Finished!")
+		self.logger.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+
+		return {'train':train_losses,'best':best_losses,'val':val_losses}, accuracy,f1,recall,precision,roc_auc,time.time() - t_total
+			
+	def evaluate(self, data_loader, prefix, model, loss_function):
+		model.eval()
+		with th.no_grad():
+			for i, sample in enumerate(data_loader):
+				scores, loss , _ = self.forward(model, sample, loss_function)
+				if prefix == 'val':
+					accuracy,f1,recall,precision,roc_auc = get_accuracy(
+									sample['y_val'].cuda().float(), 
+									scores, 
+									sample['val_mask'].cuda().float())
+				elif prefix == 'test':
+					accuracy,f1,recall,precision,roc_auc = get_accuracy(
+									sample['y_test'].cuda().float(), 
+									scores, 
+									sample['test_mask'].cuda().float())
+				
+		return loss.item(), accuracy,f1,recall,precision,roc_auc
+
+	def process_data(self,adj,features,labels):
+		dataset = NodeClassificationDataset(self.args, self.logger,adj,features,labels)
+		return DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
diff --git a/HGNN/task/__init__.py b/HGNN/task/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4bd57382df4059527d0f60eeb0b175ad5c5d2f1
--- /dev/null
+++ b/HGNN/task/__init__.py
@@ -0,0 +1 @@
+from Ghypeddings.HGNN.task.NodeClassificationTask import *
\ No newline at end of file
diff --git a/HGNN/utils/EarlyStoppingCriterion.py b/HGNN/utils/EarlyStoppingCriterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e57381bd8bce016e7d9d9e0f6739e725d7dc521
--- /dev/null
+++ b/HGNN/utils/EarlyStoppingCriterion.py
@@ -0,0 +1,51 @@
+class EarlyStoppingCriterion(object):
+    """
+    Arguments:
+        patience (int): The maximum number of epochs with no improvement before early stopping should take place
+        mode (str, can only be 'max' or 'min'): To take the maximum or minimum of the score for optimization
+        min_delta (float, optional): Minimum change in the score to qualify as an improvement (default: 0.0)
+    """
+
+    def __init__(self, patience, mode, min_delta=0.0):
+        assert patience >= 0
+        assert mode in {'min', 'max'}
+        assert min_delta >= 0.0
+        self.patience = patience
+        self.mode = mode
+        self.min_delta = min_delta
+
+        self._count = 0
+        self.best_dev_score = None
+        self.best_epoch = None
+        self.is_improved = None
+        self.best_emb = None
+
+    def step(self, cur_dev_score, epoch , embeddings):
+        """
+        Checks if training should be continued given the current score.
+
+        Arguments:
+            cur_dev_score (float): the current development score
+            cur_test_score (float): the current test score
+        Output:
+            bool: if training should be continued
+        """
+        if self.best_dev_score is None:
+            self.best_dev_score = cur_dev_score
+            self.best_epoch = epoch
+            self.best_emb = embeddings
+            return True
+        else:
+            if self.mode == 'max':
+                self.is_improved = (cur_dev_score > self.best_dev_score + self.min_delta)
+            else:
+                self.is_improved = (cur_dev_score < self.best_dev_score - self.min_delta)
+
+            if self.is_improved:
+                self._count = 0
+                self.best_dev_score = cur_dev_score
+                self.best_epoch = epoch
+                self.best_emb = embeddings
+            else:
+                self._count += 1
+            return self._count <= self.patience
diff --git a/HGNN/utils/__init__.py b/HGNN/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3da1a96d729c4cfe3eb9edc1a122debcdd215aa
--- /dev/null
+++ b/HGNN/utils/__init__.py
@@ -0,0 +1,3 @@
+from Ghypeddings.HGNN.utils.utils import *
+from Ghypeddings.HGNN.utils.EarlyStoppingCriterion import EarlyStoppingCriterion
+from Ghypeddings.HGNN.utils.logger import *
diff --git a/HGNN/utils/logger.py b/HGNN/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f55e772da8da3784679cb1499dcdf6a13368759
--- /dev/null
+++ b/HGNN/utils/logger.py
@@ -0,0 +1,54 @@
+import logging
+import time
+from datetime import timedelta
+from Ghypeddings.HGNN.utils import make_dir
+
+class LogFormatter():
+
+    def __init__(self):
+        self.start_time = time.time()
+
+    def format(self, record):
+        elapsed_seconds = round(record.created - self.start_time)
+
+        prefix = "%s - %s - %s" % (
+            record.levelname,
+            time.strftime('%x %X'),
+            timedelta(seconds=elapsed_seconds)
+        )
+        message = record.getMessage()
+        message = message.replace('\n', '\n' + ' ' * (len(prefix) + 3))
+        return "%s - %s" % (prefix, message)
+
+def create_logger():
+    """
+    Create a logger.
+    """
+    #make_dir('log')
+    # create log formatter
+    log_formatter = LogFormatter()
+
+    # create file handler and set level to debug
+    # file_handler = logging.FileHandler(filepath, "a")
+    # file_handler.setLevel(logging.DEBUG)
+    # file_handler.setFormatter(log_formatter)
+
+    # create console handler and set level to info
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    console_handler.setFormatter(log_formatter)
+
+    # create logger and set level to debug
+    logger = logging.getLogger()
+    logger.handlers = []
+    logger.setLevel(logging.DEBUG)
+    logger.propagate = False
+    #logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+
+    # reset logger elapsed time
+    def reset_time():
+        log_formatter.start_time = time.time()
+    logger.reset_time = reset_time
+
+    return logger
diff --git a/HGNN/utils/utils.py b/HGNN/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cae6a571d4b2491d1b025926a4b47d2b2fb481e6
--- /dev/null
+++ b/HGNN/utils/utils.py
@@ -0,0 +1,284 @@
+from collections import defaultdict
+import os
+import pickle
+import json
+import torch.nn as nn
+import torch as th
+import torch.optim as optim
+import numpy as np
+import random
+from Ghypeddings.HGNN.optimizer.ramsgrad import RiemannianAMSGrad
+from Ghypeddings.HGNN.optimizer.rsgd import RiemannianSGD
+import math
+import subprocess
+import argparse
+
+def str2bool(v):
+    return v.lower() == "true"
+
+def make_dir(path):
+    if not os.path.exists(path):
+        try:
+            os.mkdir(path)
+        except:
+            pass
+
+def pickle_dump(file_name, content):
+    with open(file_name, 'wb') as out_file:        
+        pickle.dump(content, out_file, pickle.HIGHEST_PROTOCOL)
+        
+def pickle_load(file_name):
+    with open(file_name, 'rb') as f:
+        return pickle.load(f)
+
+def init_weight(weight, method):
+    """
+    Initialize parameters
+    Args:
+        weight: a Parameter object
+        method: initialization method 
+    """
+    if method == 'orthogonal':
+        nn.init.orthogonal_(weight)
+    elif method == 'xavier':
+        nn.init.xavier_uniform_(weight)
+    elif method == 'kaiming':
+        nn.init.kaiming_uniform_(weight)
+    elif method == 'none':
+        pass
+    else:
+        raise Exception('Unknown init method')
+
+
+def nn_init(nn_module, method='orthogonal'):
+    """
+    Initialize a Sequential or Module object
+    Args:
+        nn_module: Sequential or Module
+        method: initialization method
+    """
+    if method == 'none':
+        return
+    for param_name, _ in nn_module.named_parameters():
+        if isinstance(nn_module, nn.Sequential):
+            # for a Sequential object, the param_name contains both id and param name
+            i, name = param_name.split('.', 1)
+            param = getattr(nn_module[int(i)], name)
+        else:
+            param = getattr(nn_module, param_name)
+        if param_name.find('weight') > -1:
+            init_weight(param, method)
+        elif param_name.find('bias') > -1:
+            nn.init.uniform_(param, -1e-4, 1e-4)
+
+class NoneScheduler:
+	def step(self):
+		pass
+
+def get_lr_scheduler(args, optimizer):
+	if args.lr_scheduler == 'exponential':
+		return optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_gamma)
+	elif args.lr_scheduler == 'cosine':
+		return optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=0)
+	elif args.lr_scheduler == 'cycle':
+		return optim.lr_scheduler.CyclicLR(optimizer, 0, max_lr=args.lr, step_size_up=20, cycle_momentum=False)
+	elif args.lr_scheduler == 'none':
+		return NoneScheduler()
+
+def get_optimizer(args, params):
+	if args.optimizer == 'sgd':
+		optimizer = optim.SGD(params, lr=args.lr, weight_decay=args.weight_decay)
+	elif args.optimizer == 'adam':
+		optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay)
+	elif args.optimizer == 'amsgrad':
+		optimizer = optim.Adam(params, lr=args.lr, amsgrad=True, weight_decay=args.weight_decay)
+	return optimizer
+
+def get_hyperbolic_optimizer(args, manifold,params):
+    if args.hyper_optimizer == 'rsgd':
+        optimizer = RiemannianSGD(
+            args,
+            params,
+            lr=args.lr_hyperbolic,
+        )
+    elif args.hyper_optimizer == 'ramsgrad':
+        optimizer = RiemannianAMSGrad(
+            args,
+			manifold,
+            params,
+            lr=args.lr_hyperbolic,
+        )
+    else:
+        print("unsupported hyper optimizer")
+        exit(1)        
+    return optimizer
+
+def set_seed(seed):
+    """
+    Set the random seed
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    th.manual_seed(seed)
+    th.cuda.manual_seed(seed)
+    th.cuda.manual_seed_all(seed)
+
+def pad_sequence(data_list, maxlen, value=0):
+	return [row + [value] * (maxlen - len(row)) for row in data_list]
+
+def normalize_weight(adj_mat, weight):
+	degree = [1 / math.sqrt(sum(np.abs(w))) for w in weight]
+	for dst in range(len(adj_mat)):
+		for src_idx in range(len(adj_mat[dst])):
+			src = adj_mat[dst][src_idx]
+			weight[dst][src_idx] = degree[dst] * weight[dst][src_idx] * degree[src]
+
+def set_up_distributed_training_multi_gpu(args): 
+    #args.device_id = args.local_rank
+    args.device_id = 0
+    th.cuda.set_device(args.device_id)
+    args.distributed_rank = args.device_id
+    th.distributed.init_process_group(backend='nccl',
+                                         init_method='env://')
+
+def save_model_weights(args, model, path):
+	"""
+	save model weights out to file
+	"""
+	if args.distributed_rank == 0:
+		make_dir(path)
+		th.save(model.state_dict(), os.path.join(path, args.name))
+
+def load_model_weights(model, path):
+	"""
+	load saved weights
+	"""
+	model.load_state_dict(th.load(path))
+
+def th_atanh(x, EPS):
+	values = th.min(x, th.Tensor([1.0 - EPS]).cuda())
+	return 0.5 * (th.log(1 + values + EPS) - th.log(1 - values + EPS))
+	
+def th_norm(x, dim=1):
+	"""
+	Args
+		x: [batch size, dim]
+	Output:	
+		[batch size, 1]
+	"""
+	if(len(x.shape) == 1):
+		x = x.unsqueeze(0)
+	return th.norm(x, 2, dim, keepdim=True)
+
+def th_dot(x, y, keepdim=True):
+	tmp = x*y
+	if(len(tmp.shape) == 1):
+		tmp = tmp.unsqueeze(0) 
+	return th.sum(tmp, dim=1, keepdim=keepdim)
+
+def clip_by_norm(x, clip_norm):
+	return th.renorm(x, 2, 0, clip_norm)
+
+def get_params(params_list, vars_list):
+	"""
+	Add parameters in vars_list to param_list
+	"""
+	for i in vars_list:
+		if issubclass(i.__class__, nn.Module):
+			params_list.extend(list(i.parameters()))
+		elif issubclass(i.__class__, nn.Parameter):
+			params_list.append(i)
+		else:
+			print("Encounter unknown objects")
+			exit(1)
+
+def categorize_params(args):
+	"""
+	Categorize parameters into hyperbolic ones and euclidean ones
+	"""
+	hyperbolic_params, euclidean_params = [], []
+	get_params(euclidean_params, args.eucl_vars)
+	get_params(hyperbolic_params, args.hyp_vars)
+	return hyperbolic_params, euclidean_params
+
+def get_activation(args):
+	if args.act == 'leaky_relu':
+		return nn.LeakyReLU(args.alpha)
+	elif args.act == 'rrelu':
+		return nn.RReLU()
+	elif args.act == 'relu':
+		return nn.ReLU()
+	elif args.act == 'elu':
+		return nn.ELU()
+	elif args.act == 'prelu':
+		return nn.PReLU()
+	elif args.act == 'selu':
+		return nn.SELU()
+
+def set_up_optimizer_scheduler(hyperbolic, args, model , manifold):
+	if hyperbolic:
+		hyperbolic_params, euclidean_params = categorize_params(args)
+		#assert(len(list(model.parameters())) == len(hyperbolic_params) + len(euclidean_params))
+		optimizer = get_optimizer(args, euclidean_params)
+		lr_scheduler = get_lr_scheduler(args, optimizer)
+		if len(hyperbolic_params) > 0:
+			hyperbolic_optimizer = get_hyperbolic_optimizer(args,manifold, hyperbolic_params)
+			hyperbolic_lr_scheduler = get_lr_scheduler(args, hyperbolic_optimizer)
+		else:
+			hyperbolic_optimizer, hyperbolic_lr_scheduler = None, None
+		return optimizer, lr_scheduler, hyperbolic_optimizer, hyperbolic_lr_scheduler
+	else:
+		optimizer = get_optimizer(args, model.parameters())
+		lr_scheduler = get_lr_scheduler(args, optimizer)
+		return optimizer, lr_scheduler, None, None
+
+# reimplement clamp functions to avoid killing gradient during backpropagation
+def clamp_max(x, max_value):
+	t = th.clamp(max_value - x.detach(), max=0)
+	return x + t
+
+def clamp_min(x, min_value):
+	t = th.clamp(min_value - x.detach(), min=0)
+	return x + t
+
+def one_hot_vec(length, pos):
+	vec = [0] * length
+	vec[pos] = 1
+	return vec
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--c', type=int, default=args[1])
+    parser.add_argument('--num_layers', type=int, default=args[2])
+    parser.add_argument('--bias', type=bool, default=args[3])
+    parser.add_argument('--act', type=str, default=args[4])
+    parser.add_argument('--alpha', type=float, default=args[5])
+    parser.add_argument('--select_manifold', type=str, default=args[6])
+    parser.add_argument('--num_centroid', type=int, default=args[7])
+    parser.add_argument('--eucl_vars', nargs='+', default=args[8])
+    parser.add_argument('--hyp_vars', nargs='+', default=args[9])
+    parser.add_argument('--grad_clip', type=float, default=args[10])
+    parser.add_argument('--optimizer', type=str, default=args[11])
+    parser.add_argument('--weight_decay', type=float, default=args[12])
+    parser.add_argument('--lr', type=float, default=args[13])
+    parser.add_argument('--lr_scheduler', type=str, default=args[14])
+    parser.add_argument('--lr_gamma', type=float, default=args[15])
+    parser.add_argument('--lr_hyperbolic', type=float, default=args[16])
+    parser.add_argument('--hyper_optimizer', type=str, default=args[17])
+    parser.add_argument('--proj_init', type=str, default=args[18])
+    parser.add_argument('--tie_weight', type=bool, default=args[19])
+    parser.add_argument('--epochs', type=int, default=args[20])
+    parser.add_argument('--patience', type=int, default=args[21])
+    parser.add_argument('--seed', type=int, default=args[22])
+    parser.add_argument('--log_freq', type=int, default=args[23])
+    parser.add_argument('--eval_freq', type=int, default=args[24])
+    parser.add_argument('--val_prop', type=float, default=args[25])
+    parser.add_argument('--test_prop', type=float, default=args[26])
+    parser.add_argument('--double_precision', type=int, default=args[27])
+    parser.add_argument('--dropout', type=float, default=args[28])
+    parser.add_argument('--normalize_adj', type=bool, default=args[29])
+    parser.add_argument('--normalize_feats', type=bool, default=args[30])
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
diff --git a/PVAE/__init__.py b/PVAE/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/PVAE/distributions/__init__.py b/PVAE/distributions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..360ae4e2832bd8984779dd74f26bce9f9df9266a
--- /dev/null
+++ b/PVAE/distributions/__init__.py
@@ -0,0 +1,4 @@
+from Ghypeddings.PVAE.distributions.riemannian_normal import RiemannianNormal
+from Ghypeddings.PVAE.distributions.hyperbolic_radius import HyperbolicRadius
+from Ghypeddings.PVAE.distributions.wrapped_normal import WrappedNormal
+from Ghypeddings.PVAE.distributions.hyperspherical_uniform import HypersphericalUniform
diff --git a/PVAE/distributions/ars.py b/PVAE/distributions/ars.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdd7e7253c2aaf7590f1ee4368f41de55430eaac
--- /dev/null
+++ b/PVAE/distributions/ars.py
@@ -0,0 +1,135 @@
+import torch
+
+infty = torch.tensor(float('Inf'))
+
+def diff(x):
+    return x[:, 1:] - x[:, :-1]
+
+class ARS():
+    '''
+    This class implements the Adaptive Rejection Sampling technique of Gilks and Wild '92.
+    Where possible, naming convention has been borrowed from this paper.
+    The PDF must be log-concave.
+    Currently does not exploit lower hull described in paper- which is fine for drawing
+    only small amount of samples at a time.
+    '''
+
+    def __init__(self, logpdf, grad_logpdf, device, xi, lb=-infty, ub=infty, use_lower=False, ns=50, **fargs):
+        '''
+        initialize the upper (and if needed lower) hulls with the specified params
+
+        Parameters
+        ==========
+        f: function that computes log(f(u,...)), for given u, where f(u) is proportional to the
+           density we want to sample from
+        fprima:  d/du log(f(u,...))
+        xi: ordered vector of starting points in wich log(f(u,...) is defined
+            to initialize the hulls
+        use_lower: True means the lower sqeezing will be used; which is more efficient
+                   for drawing large numbers of samples
+
+
+        lb: lower bound of the domain
+        ub: upper bound of the domain
+        ns: maximum number of points defining the hulls
+        fargs: arguments for f and fprima
+        '''
+        self.device = device
+
+        self.lb = lb
+        self.ub = ub
+
+        self.logpdf = logpdf
+        self.grad_logpdf = grad_logpdf
+        self.fargs = fargs
+
+        #set limit on how many points to maintain on hull
+        self.ns = ns
+        self.xi = xi.to(self.device) # initialize x, the vector of absicassae at which the function h has been evaluated
+        self.B, self.K = self.xi.size() # hull size
+        self.h = torch.zeros(self.B, ns).to(self.device)
+        self.hprime = torch.zeros(self.B, ns).to(self.device)
+        self.x = torch.zeros(self.B, ns).to(self.device)
+        self.h[:, :self.K] = self.logpdf(self.xi, **self.fargs)
+        self.hprime[:, :self.K] = self.grad_logpdf(self.xi, **self.fargs)
+        self.x[:, :self.K] = self.xi
+        # Avoid under/overflow errors. the envelope and pdf are only
+        # proportional to the true pdf, so can choose any constant of proportionality.
+        self.offset = self.h.max(-1)[0].view(-1, 1)
+        self.h = self.h - self.offset 
+
+        # Derivative at first point in xi must be > 0
+        # Derivative at last point in xi must be < 0
+        if not (self.hprime[:, 0] > 0).all(): raise IOError('initial anchor points must span mode of PDF (left)')
+        if not (self.hprime[:, self.K-1] < 0).all(): raise IOError('initial anchor points must span mode of PDF (right)')
+        self.insert()
+
+
+    def sample(self, shape=torch.Size()):
+        '''
+        Draw N samples and update upper and lower hulls accordingly
+        '''
+        shape = shape if isinstance(shape, torch.Size) else torch.Size([shape])
+        samples = torch.ones(self.B, *shape).to(self.device)
+        bool_mask = (torch.ones(self.B, *shape) == 1).to(self.device)
+        count = 0
+        while bool_mask.sum() != 0:
+            count += 1
+            xt, i = self.sampleUpper(shape)
+            ht = self.logpdf(xt, **self.fargs)
+            # hprimet = self.grad_logpdf(xt, **self.fargs)
+            ht = ht - self.offset
+            ut = self.h.gather(1, i) + (xt - self.x.gather(1, i)) * self.hprime.gather(1, i)
+
+            # Accept sample?
+            u = torch.rand(shape).to(self.device)
+            accept = u < torch.exp(ht - ut)
+            reject = ~accept
+            samples[bool_mask * accept] = xt[bool_mask * accept]
+            bool_mask[bool_mask * accept] = reject[bool_mask * accept]
+            # Update hull with new function evaluations
+            # if self.K < self.ns:
+            #     nb_insert = self.ns - self.K
+            #     self.insert(nb_insert, xt[:, :nb_insert], ht[:, :nb_insert], hprimet[:, :nb_insert])
+
+        return samples.t().unsqueeze(-1)
+
+
+    def insert(self, nbnew=0, xnew=None, hnew=None, hprimenew=None):
+        '''
+        Update hulls with new point(s) if none given, just recalculate hull from existing x,h,hprime
+        # '''
+        # if xnew is not None:
+        #     self.x[:, self.K:self.K+nbnew] = xnew
+        #     self.x, idx = self.x.sort()
+        #     self.h[:, self.K:self.K+nbnew] = hnew
+        #     self.h = self.h.gather(1, idx)
+        #     self.hprime[:, self.K:self.K+nbnew] = hprimenew
+        #     self.hprime = self.hprime.gather(1, idx)
+
+        #     self.K += xnew.size(-1)
+
+        self.z = torch.zeros(self.B, self.K + 1).to(self.device)
+        self.z[:, 0] = self.lb; self.z[:, self.K] = self.ub
+        self.z[:, 1:self.K] = (diff(self.h[:, :self.K]) - diff(self.x[:, :self.K] * self.hprime[:, :self.K])) / -diff(self.hprime[:, :self.K]) 
+        idx = [0]+list(range(self.K))
+        self.u = self.h[:, idx] + self.hprime[:, idx] * (self.z-self.x[:, idx])
+
+        self.s = diff(torch.exp(self.u)) / self.hprime[:, :self.K]
+        self.s[self.hprime[:, :self.K] == 0.] = 0. # should be 0 when gradient is 0
+        self.cs = torch.cat((torch.zeros(self.B, 1).to(self.device), torch.cumsum(self.s, dim=-1)), dim=-1)
+        self.cu = self.cs[:, -1]
+
+    def sampleUpper(self, shape=torch.Size()):
+        '''
+        Return a single value randomly sampled from the upper hull and index of segment
+        '''
+
+        u = torch.rand(self.B, *shape).to(self.device)
+        i = (self.cs/self.cu.unsqueeze(-1)).unsqueeze(-1) <= u.unsqueeze(1).expand(*self.cs.shape, *shape)
+        idx = i.sum(1) - 1
+
+        xt = self.x.gather(1, idx) + (-self.h.gather(1, idx) + torch.log(self.hprime.gather(1, idx)*(self.cu.unsqueeze(-1)*u - self.cs.gather(1, idx)) + 
+        torch.exp(self.u.gather(1, idx)))) / self.hprime.gather(1, idx)
+
+        return xt, idx
diff --git a/PVAE/distributions/hyperbolic_radius.py b/PVAE/distributions/hyperbolic_radius.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf559a831fb0a963da2fe8f045a36ead6313abaf
--- /dev/null
+++ b/PVAE/distributions/hyperbolic_radius.py
@@ -0,0 +1,295 @@
+import math
+import torch
+from torch.autograd import Function, grad
+import torch.distributions as dist
+from Ghypeddings.PVAE.utils import Constants, logsinh, log_sum_exp_signs, rexpand
+from numbers import Number
+from Ghypeddings.PVAE.distributions.ars import ARS
+
+
+def cdf_r(value, scale, c, dim):
+    value = value.double()
+    scale = scale.double()
+    c = c.double()
+
+    if dim == 2:
+        return 1 / torch.erf(c.sqrt() * scale / math.sqrt(2)) * .5 * \
+    (2 * torch.erf(c.sqrt() * scale / math.sqrt(2)) + torch.erf((value - c.sqrt() * scale.pow(2)) / math.sqrt(2) / scale) - \
+        torch.erf((c.sqrt() * scale.pow(2) + value) / math.sqrt(2) / scale))
+    else:
+        device = value.device
+
+        k_float = rexpand(torch.arange(dim), *value.size()).double().to(device)
+        dim = torch.tensor(dim).to(device).double()
+
+        s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log( \
+                torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \
+                + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \
+                )
+        s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)))
+
+        signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)]
+        signs = rexpand(signs, *value.size())
+
+        S1 = log_sum_exp_signs(s1, signs, dim=0)
+        S2 = log_sum_exp_signs(s2, signs, dim=0)
+
+        output = torch.exp(S1 - S2)
+        zero_value_idx = value == 0.
+        output[zero_value_idx] = 0.
+        return output.float()
+
+
+def grad_cdf_value_scale(value, scale, c, dim):
+    device = value.device
+
+    dim = torch.tensor(int(dim)).to(device).double()
+
+    signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)]
+    signs = rexpand(signs, *value.size())
+    k_float = rexpand(torch.arange(dim), *value.size()).double().to(device)
+
+    log_arg1 = (dim - 1 - 2 * k_float).pow(2) * c * scale * \
+    (\
+        torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \
+        + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \
+    )
+    
+    log_arg2 = math.sqrt(2 / math.pi) * ( \
+        (dim - 1 - 2 * k_float) * c.sqrt() * torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) \
+        - ((value / scale.pow(2) + (dim - 1 - 2 * k_float) * c.sqrt()) * torch.exp(-(value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)).pow(2) / (2 * scale.pow(2)))) \
+        )
+
+    log_arg = log_arg1 + log_arg2
+    sign_log_arg = torch.sign(log_arg)
+
+    s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log(sign_log_arg * log_arg)
+
+    log_grad_sum_sigma = log_sum_exp_signs(s, signs * sign_log_arg, dim=0)
+    grad_sum_sigma = torch.sum(signs * sign_log_arg * torch.exp(s), dim=0)
+
+    s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+        + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+        + torch.log( \
+            torch.erf((value - (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2)) / scale / math.sqrt(2)) \
+            + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)) \
+        )
+
+    S1 = log_sum_exp_signs(s1, signs, dim=0)
+    grad_log_cdf_scale = grad_sum_sigma / S1.exp()
+    log_unormalised_prob = - value.pow(2) / (2 * scale.pow(2)) + (dim - 1) * logsinh(c.sqrt() * value) - (dim - 1) / 2 * c.log()
+    
+    with torch.autograd.enable_grad():
+        scale = scale.float()
+        logZ = _log_normalizer_closed_grad.apply(scale, c, dim)
+        grad_logZ_scale = grad(logZ, scale, grad_outputs=torch.ones_like(scale))
+
+    grad_log_cdf_scale = - grad_logZ_scale[0] + 1 / scale + grad_log_cdf_scale.float()
+    cdf = cdf_r(value.double(), scale.double(), c.double(), int(dim)).float().squeeze(0)
+    grad_scale = cdf * grad_log_cdf_scale
+
+    grad_value = (log_unormalised_prob.float() - logZ).exp()
+    return grad_value, grad_scale
+
+
+class _log_normalizer_closed_grad(Function):
+    @staticmethod 
+    def forward(ctx, scale, c, dim):
+        scale = scale.double()
+        c = c.double()
+        ctx.scale = scale.clone().detach()
+        ctx.c = c.clone().detach()
+        ctx.dim = dim
+
+        device = scale.device
+        output = .5 * (Constants.logpi - Constants.log2) + scale.log() -(int(dim) - 1) * (c.log() / 2 + Constants.log2)
+        dim = torch.tensor(int(dim)).to(device).double()
+
+        k_float = rexpand(torch.arange(int(dim)), *scale.size()).double().to(device)
+        s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)))
+        signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)]
+        signs = rexpand(signs, *scale.size())
+        ctx.log_sum_term = log_sum_exp_signs(s, signs, dim=0)
+        output = output + ctx.log_sum_term
+
+        return output.float()
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = grad_output.clone()
+
+        device = grad_input.device
+        scale = ctx.scale
+        c = ctx.c
+        dim = torch.tensor(int(ctx.dim)).to(device).double()
+
+        k_float = rexpand(torch.arange(int(dim)), *scale.size()).double().to(device)
+        signs = torch.tensor([1., -1.]).double().to(device).repeat(((int(dim)+1) // 2)*2)[:int(dim)]
+        signs = rexpand(signs, *scale.size())
+
+        log_arg = (dim - 1 - 2 * k_float).pow(2) * c * scale * (1+torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \
+            torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * 2 / math.sqrt(math.pi) * (dim - 1 - 2 * k_float) * c.sqrt() / math.sqrt(2)
+        log_arg_signs = torch.sign(log_arg)
+        s = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+            + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+            + torch.log(log_arg_signs * log_arg)
+        log_grad_sum_sigma = log_sum_exp_signs(s, log_arg_signs * signs, dim=0)
+
+        grad_scale = torch.exp(log_grad_sum_sigma - ctx.log_sum_term)
+        grad_scale = 1 / ctx.scale + grad_scale
+
+        grad_scale = (grad_input * grad_scale.float()).view(-1, *grad_input.shape).sum(0)
+        return (grad_scale, None, None)
+
+
+class impl_rsample(Function):
+    @staticmethod
+    def forward(ctx, value, scale, c, dim):
+        ctx.scale = scale.clone().detach().double().requires_grad_(True)
+        ctx.value = value.clone().detach().double().requires_grad_(True)
+        ctx.c = c.clone().detach().double().requires_grad_(True)
+        ctx.dim = dim
+        return value
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = grad_output.clone()
+        grad_cdf_value, grad_cdf_scale = grad_cdf_value_scale(ctx.value, ctx.scale, ctx.c, ctx.dim)
+        assert not torch.isnan(grad_cdf_value).any()
+        assert not torch.isnan(grad_cdf_scale).any()
+        grad_value_scale = -(grad_cdf_value).pow(-1) * grad_cdf_scale.expand(grad_input.shape)
+        grad_scale = (grad_input * grad_value_scale).view(-1, *grad_cdf_scale.shape).sum(0)
+        # grad_value_c = -(grad_cdf_value).pow(-1) * grad_cdf_c.expand(grad_input.shape)
+        # grad_c = (grad_input * grad_value_c).view(-1, *grad_cdf_c.shape).sum(0)
+        return (None, grad_scale, None, None)
+
+
+class HyperbolicRadius(dist.Distribution):
+    support = dist.constraints.positive
+    has_rsample = True
+
+    def __init__(self, dim, c, scale, ars=True, validate_args=None):
+        self.dim = dim
+        self.c = c
+        self.scale = scale
+        self.device = scale.device
+        self.ars = ars
+        if isinstance(scale, Number):
+            batch_shape = torch.Size()
+        else:
+            batch_shape = self.scale.size()
+        self.log_normalizer = self._log_normalizer()
+        if torch.isnan(self.log_normalizer).any() or torch.isinf(self.log_normalizer).any():
+            print('nan or inf in log_normalizer', torch.cat((self.log_normalizer, self.scale), dim=1))
+            raise
+        super(HyperbolicRadius, self).__init__(batch_shape)
+
+    def rsample(self, sample_shape=torch.Size()):
+        value = self.sample(sample_shape)
+        return impl_rsample.apply(value, self.scale, self.c, self.dim)
+
+    def sample(self, sample_shape=torch.Size()):
+        if sample_shape == torch.Size(): sample_shape=torch.Size([1])
+        with torch.no_grad():
+            mean = self.mean
+            stddev = self.stddev
+            if torch.isnan(stddev).any(): stddev[torch.isnan(stddev)] = self.scale[torch.isnan(stddev)]
+            if torch.isnan(mean).any(): mean[torch.isnan(mean)] = ((self.dim - 1) * self.scale.pow(2) * self.c.sqrt())[torch.isnan(mean)]
+            steps = torch.linspace(0.1, 3, 10).to(self.device)
+            steps = torch.cat((-steps.flip(0), steps))
+            xi = [mean + s * torch.min(stddev, .95 * mean / 3) for s in steps]
+            xi = torch.cat(xi, dim=1)
+            ars = ARS(self.log_prob, self.grad_log_prob, self.device, xi=xi, ns=20, lb=0)
+            value = ars.sample(sample_shape)
+        return value
+
+    def __while_loop(self, logM, proposal, sample_shape):
+        shape = self._extended_shape(sample_shape)
+        r, bool_mask = torch.ones(shape).to(self.device), (torch.ones(shape) == 1).to(self.device)
+        count = 0
+        while bool_mask.sum() != 0:
+            count += 1
+            r_ = proposal.sample(sample_shape).to(self.device)
+            u = torch.rand(shape).to(self.device)
+            log_ratio = self.log_prob(r_) - proposal.log_prob(r_) - logM
+            accept = log_ratio > torch.log(u)
+            reject = 1 - accept
+            r[bool_mask * accept] = r_[bool_mask * accept]
+            bool_mask[bool_mask * accept] = reject[bool_mask * accept]
+        return r
+
+    def log_prob(self, value):
+        res = - value.pow(2) / (2 * self.scale.pow(2)) + (self.dim - 1) * logsinh(self.c.sqrt() * value) \
+            - (self.dim - 1) / 2 * self.c.log() - self.log_normalizer#.expand(value.shape)
+        assert not torch.isnan(res).any()
+        return res
+
+    def grad_log_prob(self, value):
+        res = - value / self.scale.pow(2) + (self.dim - 1) * self.c.sqrt() * torch.cosh(self.c.sqrt() * value) / torch.sinh(self.c.sqrt() * value) 
+        return res
+
+    def cdf(self, value):
+        return cdf_r(value, self.scale, self.c, self.dim)
+
+    @property
+    def mean(self):
+        c = self.c.double()
+        scale = self.scale.double()
+        dim = torch.tensor(int(self.dim)).double().to(self.device)
+        signs = torch.tensor([1., -1.]).double().to(self.device).repeat(((self.dim+1) // 2)*2)[:self.dim].unsqueeze(-1).unsqueeze(-1).expand(self.dim, *self.scale.size())
+        
+        k_float = rexpand(torch.arange(self.dim), *self.scale.size()).double().to(self.device)
+        s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+                + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+                + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)))
+        S2 = log_sum_exp_signs(s2, signs, dim=0)
+
+        log_arg = (dim - 1 - 2 * k_float) * c.sqrt() * scale.pow(2) * (1 + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \
+                torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * scale * math.sqrt(2 / math.pi)
+        log_arg_signs = torch.sign(log_arg)
+        s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+                + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+                + torch.log(log_arg_signs * log_arg)
+        S1 = log_sum_exp_signs(s1, signs * log_arg_signs, dim=0)
+
+        output = torch.exp(S1 - S2)
+        return output.float()
+
+    @property
+    def variance(self):
+        c = self.c.double()
+        scale = self.scale.double()
+        dim = torch.tensor(int(self.dim)).double().to(self.device)
+        signs = torch.tensor([1., -1.]).double().to(self.device).repeat(((int(dim)+1) // 2)*2)[:int(dim)].unsqueeze(-1).unsqueeze(-1).expand(int(dim), *self.scale.size())
+
+        k_float = rexpand(torch.arange(self.dim), *self.scale.size()).double().to(self.device)
+        s2 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+                + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+                + torch.log1p(torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2)))
+        S2 = log_sum_exp_signs(s2, signs, dim=0)
+
+        log_arg = (1 + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2)) * (1 + torch.erf((dim - 1 - 2 * k_float) * c.sqrt() * scale / math.sqrt(2))) + \
+               (dim - 1 - 2 * k_float) * c.sqrt() * torch.exp(-(dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2) * scale * math.sqrt(2 / math.pi)
+        log_arg_signs = torch.sign(log_arg)
+        s1 = torch.lgamma(dim) - torch.lgamma(k_float + 1) - torch.lgamma(dim - k_float) \
+                + (dim - 1 - 2 * k_float).pow(2) * c * scale.pow(2) / 2 \
+                + 2 * scale.log() \
+                + torch.log(log_arg_signs * log_arg)
+        S1 = log_sum_exp_signs(s1, signs * log_arg_signs, dim=0)
+
+        output = torch.exp(S1 - S2)
+        output = output.float() - self.mean.pow(2)
+        return output
+
+    @property
+    def stddev(self): return self.variance.sqrt()
+
+    def _log_normalizer(self): return _log_normalizer_closed_grad.apply(self.scale, self.c, self.dim)
diff --git a/PVAE/distributions/hyperspherical_uniform.py b/PVAE/distributions/hyperspherical_uniform.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a31f12840af77f161816e0c3b2cc8fdaede3020
--- /dev/null
+++ b/PVAE/distributions/hyperspherical_uniform.py
@@ -0,0 +1,42 @@
+import math
+import torch
+from torch.distributions.utils import _standard_normal
+
+class HypersphericalUniform(torch.distributions.Distribution):
+    """ source: https://github.com/nicola-decao/s-vae-pytorch/blob/master/hyperspherical_vae/distributions/von_mises_fisher.py """
+
+    support = torch.distributions.constraints.real
+    has_rsample = False
+    _mean_carrier_measure = 0
+
+    @property
+    def dim(self):
+        return self._dim
+    
+    def __init__(self, dim, device='cpu', validate_args=None):
+        super(HypersphericalUniform, self).__init__(torch.Size([dim]), validate_args=validate_args)
+        self._dim = dim
+        self._device = device
+
+    def sample(self, shape=torch.Size()):
+        with torch.no_grad():
+            return self.rsample(shape)
+
+    def rsample(self, sample_shape=torch.Size()):
+        shape = torch.Size([*sample_shape, self._dim + 1])
+        output = _standard_normal(shape, dtype=torch.float, device=self._device)
+
+        return output / output.norm(dim=-1, keepdim=True)
+
+    def entropy(self):
+        return self.__log_surface_area()
+    
+    def log_prob(self, x):
+        return - torch.ones(x.shape[:-1]).to(self._device) * self._log_normalizer()
+
+    def _log_normalizer(self):
+        return self._log_surface_area().to(self._device)
+
+    def _log_surface_area(self):
+        return math.log(2) + ((self._dim + 1) / 2) * math.log(math.pi) - torch.lgamma(
+            torch.Tensor([(self._dim + 1) / 2]))
diff --git a/PVAE/distributions/riemannian_normal.py b/PVAE/distributions/riemannian_normal.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea59144a3c11fa2be735c4c581de0269d84948d8
--- /dev/null
+++ b/PVAE/distributions/riemannian_normal.py
@@ -0,0 +1,49 @@
+import torch
+import torch.distributions as dist
+from torch.distributions import constraints
+from numbers import Number
+from Ghypeddings.PVAE.distributions.hyperbolic_radius import HyperbolicRadius
+from Ghypeddings.PVAE.distributions.hyperspherical_uniform import HypersphericalUniform
+
+
+class RiemannianNormal(dist.Distribution):
+    arg_constraints = {'loc': dist.constraints.interval(-1, 1), 'scale': dist.constraints.positive}
+    support = dist.constraints.interval(-1, 1)
+    has_rsample = True
+
+    @property
+    def mean(self):
+        return self.loc
+    
+    def __init__(self, loc, scale, manifold, validate_args=None):
+        assert not (torch.isnan(loc).any() or torch.isnan(scale).any())
+        self.manifold = manifold
+        self.loc = loc
+        self.manifold.assert_check_point_on_manifold(self.loc)
+        self.scale = scale.clamp(min=0.1, max=7.)
+        self.radius = HyperbolicRadius(manifold.dim, manifold.c, self.scale)
+        self.direction = HypersphericalUniform(manifold.dim - 1, device=loc.device)
+        if isinstance(loc, Number) and isinstance(scale, Number):
+            batch_shape = torch.Size()
+        else:
+            batch_shape = self.loc.size()
+        super(RiemannianNormal, self).__init__(batch_shape, validate_args=validate_args)
+
+    def sample(self, shape=torch.Size()):
+        with torch.no_grad():
+            return self.rsample(shape)
+
+    def rsample(self, sample_shape=torch.Size()):
+        shape = self._extended_shape(sample_shape)
+        alpha = self.direction.sample(torch.Size([*shape[:-1]]))
+        radius = self.radius.rsample(sample_shape)
+        # u = radius * alpha / self.manifold.lambda_x(self.loc, keepdim=True)
+        # res = self.manifold.expmap(self.loc, u)
+        res = self.manifold.expmap_polar(self.loc, alpha, radius)
+        return res
+
+    def log_prob(self, value):
+        loc = self.loc.expand(value.shape)
+        radius_sq = self.manifold.dist(loc, value, keepdim=True).pow(2)
+        res = - radius_sq / 2 / self.scale.pow(2) - self.direction._log_normalizer() - self.radius.log_normalizer
+        return res
diff --git a/PVAE/distributions/wrapped_normal.py b/PVAE/distributions/wrapped_normal.py
new file mode 100644
index 0000000000000000000000000000000000000000..29566d92498a14c25d860d8ba3450780282b70c1
--- /dev/null
+++ b/PVAE/distributions/wrapped_normal.py
@@ -0,0 +1,65 @@
+import torch
+from torch.nn import functional as F
+from torch.distributions import Normal, Independent
+from numbers import Number
+from torch.distributions.utils import _standard_normal, broadcast_all
+
+
+class WrappedNormal(torch.distributions.Distribution):
+
+    arg_constraints = {'loc': torch.distributions.constraints.real,
+                       'scale': torch.distributions.constraints.positive}
+    support = torch.distributions.constraints.real
+    has_rsample = True
+    _mean_carrier_measure = 0
+
+    @property
+    def mean(self):
+        return self.loc
+
+    @property
+    def stddev(self):
+        raise NotImplementedError
+
+    @property
+    def scale(self):
+        return F.softplus(self._scale) if self.softplus else self._scale
+
+    def __init__(self, loc, scale, manifold, validate_args=None, softplus=False):
+        self.dtype = loc.dtype
+        self.softplus = softplus
+        self.loc, self._scale = broadcast_all(loc, scale)
+        self.manifold = manifold
+        self.manifold.assert_check_point_on_manifold(self.loc)
+        self.device = loc.device
+        if isinstance(loc, Number) and isinstance(scale, Number):
+            batch_shape, event_shape = torch.Size(), torch.Size()
+        else:
+            batch_shape = self.loc.shape[:-1]
+            event_shape = torch.Size([self.manifold.dim])
+        super(WrappedNormal, self).__init__(batch_shape, event_shape, validate_args=validate_args)
+
+    def sample(self, shape=torch.Size()):
+        with torch.no_grad():
+            return self.rsample(shape)
+
+    def rsample(self, sample_shape=torch.Size()):
+        shape = self._extended_shape(sample_shape)
+        v = self.scale * _standard_normal(shape, dtype=self.loc.dtype, device=self.loc.device)
+        self.manifold.assert_check_vector_on_tangent(self.manifold.zero, v)
+        v = v / self.manifold.lambda_x(self.manifold.zero, keepdim=True)
+        u = self.manifold.transp(self.manifold.zero, self.loc, v)
+        z = self.manifold.expmap(self.loc, u)
+        return z
+
+    def log_prob(self, x):
+        shape = x.shape
+        loc = self.loc.unsqueeze(0).expand(x.shape[0], *self.batch_shape, self.manifold.coord_dim)
+        if len(shape) < len(loc.shape): x = x.unsqueeze(1)
+        v = self.manifold.logmap(loc, x)
+        v = self.manifold.transp(loc, self.manifold.zero, v)
+        u = v * self.manifold.lambda_x(self.manifold.zero, keepdim=True)
+        norm_pdf = Normal(torch.zeros_like(self.scale), self.scale).log_prob(u).sum(-1, keepdim=True)
+        logdetexp = self.manifold.logdetexp(loc, x, keepdim=True)
+        result = norm_pdf - logdetexp
+        return result
diff --git a/PVAE/manifolds/__init__.py b/PVAE/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd1d10798fac6c83bf106a00d054ff2073df7b52
--- /dev/null
+++ b/PVAE/manifolds/__init__.py
@@ -0,0 +1,4 @@
+from Ghypeddings.PVAE.manifolds.euclidean import Euclidean
+from Ghypeddings.PVAE.manifolds.poincareball import PoincareBall
+
+__all__ = [Euclidean, PoincareBall]
\ No newline at end of file
diff --git a/PVAE/manifolds/euclidean.py b/PVAE/manifolds/euclidean.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0b362b7818847a4928b6cc647c90f95de6847fc
--- /dev/null
+++ b/PVAE/manifolds/euclidean.py
@@ -0,0 +1,42 @@
+import torch
+from geoopt.manifolds import Euclidean as EuclideanParent
+
+
+class Euclidean(EuclideanParent):
+
+    def __init__(self, dim, c=0.):
+        super().__init__(1)
+        self.register_buffer("dim", torch.as_tensor(dim, dtype=torch.int))
+        self.register_buffer("c", torch.as_tensor(c, dtype=torch.get_default_dtype()))
+
+    @property
+    def coord_dim(self):
+        return int(self.dim)
+
+    @property
+    def device(self):
+        return self.c.device
+
+    @property
+    def zero(self):
+        return torch.zeros(1, self.dim).to(self.device)
+
+    def logdetexp(self, x, y, is_vector=False, keepdim=False):
+        result = torch.zeros(x.shape[:-1]).to(x)
+        if keepdim: result = result.unsqueeze(-1)
+        return result
+
+    def expmap0(self, u):
+        return u
+
+    def logmap0(self, u):
+        return u
+
+    def proju0(self, u):
+        return self.proju(self.zero.expand_as(u), u)
+
+    def transp0(self, x, u):
+        return self.transp(self.zero.expand_as(u), x, u)
+
+    def lambda_x(self, x, *, keepdim=False, dim=-1):
+        return torch.ones_like(x.sum(dim=dim, keepdim=keepdim))
diff --git a/PVAE/manifolds/poincareball.py b/PVAE/manifolds/poincareball.py
new file mode 100644
index 0000000000000000000000000000000000000000..924511de237cf5d038ef82d39b7be0e6cb30503a
--- /dev/null
+++ b/PVAE/manifolds/poincareball.py
@@ -0,0 +1,84 @@
+import torch
+from geoopt.manifolds import PoincareBall as PoincareBallParent
+from geoopt.manifolds.stereographic.math import _lambda_x, arsinh, tanh
+
+MIN_NORM = 1e-15
+
+
+class PoincareBall(PoincareBallParent):
+
+    def __init__(self, dim, c=1.0):
+        super().__init__(c)
+        self.register_buffer("dim", torch.as_tensor(dim, dtype=torch.int))
+
+    def proju0(self, u):
+        return self.proju(self.zero.expand_as(u), u)
+
+    @property
+    def coord_dim(self):
+        return int(self.dim)
+
+    @property
+    def device(self):
+        return self.c.device
+
+    @property
+    def zero(self):
+        return torch.zeros(1, self.dim).to(self.device)
+
+    def logdetexp(self, x, y, is_vector=False, keepdim=False):
+        d = self.norm(x, y, keepdim=keepdim) if is_vector else self.dist(x, y, keepdim=keepdim)
+        d[d == 0] = 1e-15
+        return (self.dim - 1) * (torch.sinh(self.c.sqrt()*d) / self.c.sqrt() / d).log()
+
+    def inner(self, x, u, v=None, *, keepdim=False, dim=-1):
+        if v is None: v = u
+        return _lambda_x(x, self.c, keepdim=keepdim, dim=dim) ** 2 * (u * v).sum(
+            dim=dim, keepdim=keepdim
+        )
+
+    def expmap_polar(self, x, u, r, dim: int = -1):
+        sqrt_c = self.c ** 0.5
+        u_norm = u.norm(dim=dim, p=2, keepdim=True).clamp_min(MIN_NORM)
+        second_term = (
+            tanh(sqrt_c / 2 * r)
+            * u
+            / (sqrt_c * u_norm)
+        )
+        gamma_1 = self.mobius_add(x, second_term, dim=dim)
+        return gamma_1
+
+    def normdist2plane(self, x, a, p, keepdim: bool = False, signed: bool = False, dim: int = -1, norm: bool = False):
+        c = self.c
+        sqrt_c = c ** 0.5
+        diff = self.mobius_add(-p, x, dim=dim)
+        diff_norm2 = diff.pow(2).sum(dim=dim, keepdim=keepdim).clamp_min(MIN_NORM)
+        sc_diff_a = (diff * a).sum(dim=dim, keepdim=keepdim)
+        if not signed:
+            sc_diff_a = sc_diff_a.abs()
+        a_norm = a.norm(dim=dim, keepdim=keepdim, p=2).clamp_min(MIN_NORM)
+        num = 2 * sqrt_c * sc_diff_a
+        denom = (1 - c * diff_norm2) * a_norm
+        res = arsinh(num / denom.clamp_min(MIN_NORM)) / sqrt_c
+        if norm:
+            res = res * a_norm# * self.lambda_x(a, dim=dim, keepdim=keepdim)
+        return res
+
+
+
+class PoincareBallExact(PoincareBall):
+    __doc__ = r"""
+    See Also
+    --------
+    :class:`PoincareBall`
+    Notes
+    -----
+    The implementation of retraction is an exact exponential map, this retraction will be used in optimization
+    """
+
+    retr_transp = PoincareBall.expmap_transp
+    transp_follow_retr = PoincareBall.transp_follow_expmap
+    retr = PoincareBall.expmap
+
+    def extra_repr(self):
+        return "exact"
diff --git a/PVAE/models/__init__.py b/PVAE/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdb822cc3ef52a1adbb1e24f356b3857d5479067
--- /dev/null
+++ b/PVAE/models/__init__.py
@@ -0,0 +1,2 @@
+from Ghypeddings.PVAE.models.tabular import Tabular
+__all__ = [Tabular]
\ No newline at end of file
diff --git a/PVAE/models/architectures.py b/PVAE/models/architectures.py
new file mode 100644
index 0000000000000000000000000000000000000000..92a049661f17533ce909b5b3f3fe4f1b79c53595
--- /dev/null
+++ b/PVAE/models/architectures.py
@@ -0,0 +1,180 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from numpy import prod
+from Ghypeddings.PVAE.utils import Constants
+from Ghypeddings.PVAE.ops.manifold_layers import GeodesicLayer, MobiusLayer, LogZero, ExpZero
+from torch.nn.modules.module import Module
+
+def get_dim_act(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+    dims = [args.feat_dim] + ([args.hidden_dim] * (args.num_layers - 1))
+
+    return dims, acts
+
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x, adj):
+        input = (x, adj)
+        output, _ = self.layers.forward(input)
+        return output
+
+class GraphConvolution(Module):
+    """
+    Simple GCN layer.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(GraphConvolution, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+        self.in_features = in_features
+        self.out_features = out_features
+
+    def forward(self, input):
+        x, adj = input
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        if adj.is_sparse:
+            support = torch.spmm(adj, hidden)
+        else:
+            support = torch.mm(adj, hidden)
+        output = self.act(support), adj
+        return output
+
+    def extra_repr(self):
+        return 'input_dim={}, output_dim={}'.format(
+                self.in_features, self.out_features
+        )
+
+class GCN(Encoder):
+    """
+    Graph Convolution Networks.
+    """
+
+    def __init__(self, c, args):
+        super(GCN, self).__init__(c)
+        assert args.num_layers > 0
+        dims, acts = get_dim_act(args)
+        gc_layers = []
+        for i in range(len(dims) - 1):
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            gc_layers.append(GraphConvolution(in_dim, out_dim, args.dropout, act, args.bias))
+        self.layers = nn.Sequential(*gc_layers)
+
+
+def extra_hidden_layer(hidden_dim, non_lin):
+     return nn.Sequential(nn.Linear(hidden_dim, hidden_dim), non_lin)       
+
+class EncWrapped(nn.Module):
+    """ Usual encoder followed by an exponential map """
+    def __init__(self,c,args, manifold, data_size, non_lin, num_hidden_layers, hidden_dim, prior_iso):
+        super(EncWrapped, self).__init__()
+        self.manifold = manifold
+        self.data_size = data_size
+        self.enc = GCN(c,args)
+        self.fc21 = nn.Linear(hidden_dim, manifold.coord_dim)
+        self.fc22 = nn.Linear(hidden_dim, manifold.coord_dim if not prior_iso else 1)
+
+    def forward(self,adj,x):
+        e = self.enc.encode(x,adj)
+        mu = self.fc21(e)          # flatten data
+        mu = self.manifold.expmap0(mu)
+        return mu, F.softplus(self.fc22(e)) + Constants.eta,  self.manifold
+
+
+class DecWrapped(nn.Module):
+    """ Usual encoder preceded by a logarithm map """
+    def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim):
+        super(DecWrapped, self).__init__()
+        self.data_size = data_size
+        self.manifold = manifold
+        modules = []
+        modules.append(nn.Sequential(nn.Linear(manifold.coord_dim, hidden_dim), non_lin))
+        modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)])
+        self.dec = nn.Sequential(*modules)
+        # self.fc31 = nn.Linear(hidden_dim, prod(data_size))
+        self.fc31 = nn.Linear(hidden_dim, data_size[1])
+
+    def forward(self, z):
+        z = self.manifold.logmap0(z)
+        d = self.dec(z)
+        # mu = self.fc31(d).view(*z.size()[:-1], *self.data_size)  # reshape data
+        mu = self.fc31(d).view(*z.size()[:-1], 1, self.data_size[1]) 
+        return mu, torch.ones_like(mu)
+
+
+class DecGeo(nn.Module):
+    """ First layer is a Hypergyroplane followed by usual decoder """
+    def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim):
+        super(DecGeo, self).__init__()
+        self.data_size = data_size
+        modules = []
+        modules.append(nn.Sequential(GeodesicLayer(manifold.coord_dim, hidden_dim, manifold), non_lin))
+        modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)])
+        self.dec = nn.Sequential(*modules)
+        self.fc31 = nn.Linear(hidden_dim, data_size[1])
+
+    def forward(self, z):
+        d = self.dec(z)
+        # mu = self.fc31(d).view(*z.size()[:-1], *self.data_size)  # reshape data
+        mu = self.fc31(d).view(*z.size()[:-1], 1, self.data_size[1]) 
+        return mu, torch.ones_like(mu)
+
+
+class EncMob(nn.Module):
+    """ Last layer is a Mobius layers """
+    def __init__(self,c,args, manifold, data_size, non_lin, num_hidden_layers, hidden_dim, prior_iso):
+        super(EncMob, self).__init__()
+        self.manifold = manifold
+        self.data_size = data_size
+        # modules = []
+        # modules.append(nn.Sequential(nn.Linear(data_size[1], hidden_dim), non_lin))
+        # modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)])
+        # self.enc = nn.Sequential(*modules)
+        self.enc = GCN(c,args)
+        self.fc21 = MobiusLayer(hidden_dim, manifold.coord_dim, manifold)
+        self.fc22 = nn.Linear(hidden_dim, manifold.coord_dim if not prior_iso else 1)
+
+    def forward(self,adj,x):
+        #e = self.enc(x.view(*x.size()[:-len(self.data_size)], -1))            # flatten data
+        e = self.enc.encode(x,adj)
+        mu = self.fc21(e)          # flatten data
+        mu = self.manifold.expmap0(mu)
+        return mu, F.softplus(self.fc22(e)) + Constants.eta,  self.manifold
+
+
+class DecMob(nn.Module):
+    """ First layer is a Mobius Matrix multiplication """
+    def __init__(self, manifold, data_size, non_lin, num_hidden_layers, hidden_dim):
+        super(DecMob, self).__init__()
+        self.data_size = data_size
+        modules = []
+        modules.append(nn.Sequential(MobiusLayer(manifold.coord_dim, hidden_dim, manifold), LogZero(manifold), non_lin))
+        modules.extend([extra_hidden_layer(hidden_dim, non_lin) for _ in range(num_hidden_layers - 1)])
+        self.dec = nn.Sequential(*modules)
+        self.fc31 = nn.Linear(hidden_dim, prod(data_size))
+
+    def forward(self, z):
+        d = self.dec(z)
+        mu = self.fc31(d).view(*z.size()[:-1], *self.data_size)  # reshape data
+        return mu, torch.ones_like(mu)
diff --git a/PVAE/models/tabular.py b/PVAE/models/tabular.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c5b4d571562880727795fea74d2e8560b793624
--- /dev/null
+++ b/PVAE/models/tabular.py
@@ -0,0 +1,36 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.distributions as dist
+from torch.utils.data import DataLoader
+
+import math
+from Ghypeddings.PVAE.models.vae import VAE
+
+from Ghypeddings.PVAE.distributions import RiemannianNormal, WrappedNormal
+from torch.distributions import Normal
+import Ghypeddings.PVAE.manifolds as manifolds
+from Ghypeddings.PVAE.models.architectures import EncWrapped, DecWrapped, EncMob, DecMob, DecGeo
+from Ghypeddings.PVAE.utils import get_activation
+
+class Tabular(VAE):
+    """ Derive a specific sub-class of a VAE for tabular data. """
+    def __init__(self, params):
+        c = nn.Parameter(params.c * torch.ones(1), requires_grad=False)
+        manifold = getattr(manifolds, 'PoincareBall')(params.dim, c)
+        super(Tabular, self).__init__(
+            eval(params.prior),           # prior distribution
+            eval(params.posterior),       # posterior distribution
+            dist.Normal,                  # likelihood distribution
+            eval('Enc' + params.enc)(params.c,params,manifold, params.data_size, get_activation(params), params.num_layers, params.hidden_dim, params.prior_iso),
+            eval('Dec' + params.dec)(manifold, params.data_size, get_activation(params), params.num_layers, params.hidden_dim),
+            params
+        )
+        self.manifold = manifold
+        self._pz_mu = nn.Parameter(torch.zeros(1, params.dim), requires_grad=False)
+        self._pz_logvar = nn.Parameter(torch.zeros(1, 1), requires_grad=params.learn_prior_std)
+        self.modelName = 'Tabular'
+
+    @property
+    def pz_params(self):
+        return self._pz_mu.mul(1), F.softplus(self._pz_logvar).div(math.log(2)).mul(self.prior_std), self.manifold
\ No newline at end of file
diff --git a/PVAE/models/vae.py b/PVAE/models/vae.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb79df5b30a7370118d1d77aa88f47c4b341e2f
--- /dev/null
+++ b/PVAE/models/vae.py
@@ -0,0 +1,63 @@
+# Base VAE class definition
+
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.distributions as dist
+from Ghypeddings.PVAE.utils import get_mean_param
+
+class VAE(nn.Module):
+    def __init__(self, prior_dist, posterior_dist, likelihood_dist, enc, dec, params):
+        super(VAE, self).__init__()
+        self.pz = prior_dist
+        self.px_z = likelihood_dist
+        self.qz_x = posterior_dist
+        self.enc = enc
+        self.dec = dec
+        self.modelName = None
+        self.params = params
+        self.data_size = params.data_size
+        self.prior_std = params.prior_std
+
+        if self.px_z == dist.RelaxedBernoulli:
+            self.px_z.log_prob = lambda self, value: \
+                -F.binary_cross_entropy_with_logits(
+                    self.probs if value.dim() <= self.probs.dim() else self.probs.expand_as(value),
+                    value.expand(self.batch_shape) if value.dim() <= self.probs.dim() else value,
+                    reduction='none'
+                )
+
+    def generate(self, N, K):
+        self.eval()
+        with torch.no_grad():
+            mean_pz = get_mean_param(self.pz_params)
+            mean = get_mean_param(self.dec(mean_pz))
+            px_z_params = self.dec(self.pz(*self.pz_params).sample(torch.Size([N])))
+            means = get_mean_param(px_z_params)
+            samples = self.px_z(*px_z_params).sample(torch.Size([K]))
+
+        return mean, \
+            means.view(-1, *means.size()[2:]), \
+            samples.view(-1, *samples.size()[3:])
+
+    def reconstruct(self, data , edge_index):
+        self.eval()
+        with torch.no_grad():
+            qz_x = self.qz_x(*self.enc(edge_index,data))
+            px_z_params = self.dec(qz_x.rsample(torch.Size([1])).squeeze(0))
+
+        return get_mean_param(px_z_params)
+
+    def forward(self, x , edge_index, K=1):
+        embeddings = self.enc(edge_index,x)
+        qz_x = self.qz_x(*embeddings)
+        zs = qz_x.rsample(torch.Size([K]))
+        px_z = self.px_z(*self.dec(zs))
+        return qz_x, px_z, zs , embeddings
+
+    @property
+    def pz_params(self):
+        return self._pz_mu.mul(1), F.softplus(self._pz_logvar).div(math.log(2)).mul(self.prior_std_scale)
+
+    def init_last_layer_bias(self, dataset): pass
diff --git a/PVAE/objectives.py b/PVAE/objectives.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd9afeabd8f589bb33659f7f7b1aae36264e4159
--- /dev/null
+++ b/PVAE/objectives.py
@@ -0,0 +1,46 @@
+import torch
+import torch.distributions as dist
+from numpy import prod
+from Ghypeddings.PVAE.utils import has_analytic_kl, log_mean_exp
+import torch.nn.functional as F
+
+def vae_objective(model, idx, x , graph, K=1, beta=1.0, components=False, analytical_kl=False, **kwargs):
+    """Computes E_{p(x)}[ELBO] """
+    qz_x, px_z, zs , embeddings = model(x, graph,K)
+    _, B, D = zs.size()
+    flat_rest = torch.Size([*px_z.batch_shape[:2], -1])
+    x = x.unsqueeze(0).unsqueeze(2)
+    lpx_z = px_z.log_prob(x.expand(px_z.batch_shape)).view(flat_rest).sum(-1)
+    pz = model.pz(*model.pz_params)
+    kld = dist.kl_divergence(qz_x, pz).unsqueeze(0).sum(-1) if \
+        has_analytic_kl(type(qz_x), model.pz) and analytical_kl else \
+        qz_x.log_prob(zs).sum(-1) - pz.log_prob(zs).sum(-1)
+    lpx_z_selected = lpx_z[:, idx]
+    kld_selected = kld[:, idx]
+    obj = -lpx_z_selected.mean(0).sum() + beta * kld_selected.mean(0).sum()
+    return (qz_x, px_z, lpx_z_selected, kld_selected, obj , embeddings) if components else obj
+
+def _iwae_objective_vec(model, x, K):
+    """Helper for IWAE estimate for log p_\theta(x) -- full vectorisation."""
+    qz_x, px_z, zs = model(x, K)
+    flat_rest = torch.Size([*px_z.batch_shape[:2], -1])
+    lpz = model.pz(*model.pz_params).log_prob(zs).sum(-1)
+    lpx_z = px_z.log_prob(x.expand(zs.size(0), *x.size())).view(flat_rest).sum(-1)
+    lqz_x = qz_x.log_prob(zs).sum(-1)
+    obj = lpz.squeeze(-1) + lpx_z.view(lpz.squeeze(-1).shape) - lqz_x.squeeze(-1)
+    return -log_mean_exp(obj).sum()
+
+
+def iwae_objective(model, x, K):
+    """Computes an importance-weighted ELBO estimate for log p_\theta(x)
+    Iterates over the batch as necessary.
+    Appropriate negation (for minimisation) happens in the helper
+    """
+    split_size = int(x.size(0) / (K * prod(x.size()) / (3e7)))  # rough heuristic
+    if split_size >= x.size(0):
+        obj = _iwae_objective_vec(model, x, K)
+    else:
+        obj = 0
+        for bx in x.split(split_size):
+            obj = obj + _iwae_objective_vec(model, bx, K)
+    return obj
diff --git a/PVAE/ops/__init__.py b/PVAE/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/PVAE/ops/manifold_layers.py b/PVAE/ops/manifold_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..643d80fa75b50dc7a9e92ff9a3a7b305afb440cd
--- /dev/null
+++ b/PVAE/ops/manifold_layers.py
@@ -0,0 +1,90 @@
+import math
+import torch
+from torch import nn
+from torch.nn.parameter import Parameter
+from torch.nn import init
+from Ghypeddings.PVAE.manifolds import PoincareBall, Euclidean
+from geoopt import ManifoldParameter
+
+
+class RiemannianLayer(nn.Module):
+    def __init__(self, in_features, out_features, manifold, over_param, weight_norm):
+        super(RiemannianLayer, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.manifold = manifold
+
+        self._weight = Parameter(torch.Tensor(out_features, in_features))
+        self.over_param = over_param
+        self.weight_norm = weight_norm
+        if self.over_param:
+            self._bias = ManifoldParameter(torch.Tensor(out_features, in_features), manifold=manifold)
+        else:
+            self._bias = Parameter(torch.Tensor(out_features, 1))
+        self.reset_parameters()
+
+    @property
+    def weight(self):
+        return self.manifold.transp0(self.bias, self._weight) # weight \in T_0 => weight \in T_bias
+
+    @property
+    def bias(self):
+        if self.over_param:
+            return self._bias
+        else:
+            return self.manifold.expmap0(self._weight * self._bias) # reparameterisation of a point on the manifold
+
+    def reset_parameters(self):
+        init.kaiming_normal_(self._weight, a=math.sqrt(5))
+        fan_in, _ = init._calculate_fan_in_and_fan_out(self._weight)
+        bound = 4 / math.sqrt(fan_in)
+        init.uniform_(self._bias, -bound, bound)
+        if self.over_param:
+            with torch.no_grad(): self._bias.set_(self.manifold.expmap0(self._bias))
+
+
+class GeodesicLayer(RiemannianLayer):
+    def __init__(self, in_features, out_features, manifold, over_param=False, weight_norm=False):
+        super(GeodesicLayer, self).__init__(in_features, out_features, manifold, over_param, weight_norm)
+
+    def forward(self, input):
+        input = input.unsqueeze(-2).expand(*input.shape[:-(len(input.shape) - 2)], self.out_features, self.in_features)
+        res = self.manifold.normdist2plane(input, self.bias, self.weight,
+                                               signed=True, norm=self.weight_norm)
+        return res
+
+
+class Linear(nn.Linear):
+    def __init__(self, in_features, out_features, **kwargs):
+        super(Linear, self).__init__(
+            in_features,
+            out_features,
+        )
+
+
+class MobiusLayer(RiemannianLayer):
+    def __init__(self, in_features, out_features, manifold, over_param=False, weight_norm=False):
+        super(MobiusLayer, self).__init__(in_features, out_features, manifold, over_param, weight_norm)
+
+    def forward(self, input):
+        res = self.manifold.mobius_matvec(self.weight, input)
+        return res
+
+
+class ExpZero(nn.Module):
+    def __init__(self, manifold):
+        super(ExpZero, self).__init__()
+        self.manifold = manifold
+
+    def forward(self, input):
+        return self.manifold.expmap0(input)
+
+
+class LogZero(nn.Module):
+    def __init__(self, manifold):
+        super(LogZero, self).__init__()
+        self.manifold = manifold
+
+    def forward(self, input):
+        return self.manifold.logmap0(input)
+
diff --git a/PVAE/pvae.py b/PVAE/pvae.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1318928f98252ce80bf76225c45cbc63037d794
--- /dev/null
+++ b/PVAE/pvae.py
@@ -0,0 +1,211 @@
+import sys
+sys.path.append(".")
+sys.path.append("..")
+import os
+import datetime
+from collections import defaultdict
+import torch
+from torch import optim
+import numpy as np
+import logging
+import time
+
+from Ghypeddings.PVAE.utils import probe_infnan , process_data , create_args , get_classifier,get_clustering_algorithm,get_anomaly_detection_algorithm
+import Ghypeddings.PVAE.objectives as objectives
+from Ghypeddings.PVAE.models import Tabular
+
+from Ghypeddings.classifiers import calculate_metrics
+
+runId = datetime.datetime.now().isoformat().replace(':','_')
+torch.backends.cudnn.benchmark = True
+
+class PVAE:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                hidden_dim,
+                num_layers=2,
+                c=1.0,
+                act='relu',
+                lr=0.01,
+                cuda=0,
+                epochs=50,
+                seed=42,
+                eval_freq=1,
+                val_prop=0.,
+                test_prop=0.3,
+                dropout=0.1,
+                beta1=0.9,
+                beta2=.999,
+                K=1,
+                beta=.2,
+                analytical_kl=True,
+                posterior='WrappedNormal',
+                prior='WrappedNormal',
+                prior_iso=True,
+                prior_std=1.,
+                learn_prior_std=True,
+                enc='Mob',
+                dec='Geo',
+                bias=True,
+                alpha=0.5,
+                classifier=None,
+                clusterer=None,
+                log_freq=1,
+                normalize_adj=False,
+                normalize_feats=True,
+                anomaly_detector=None
+                ):
+
+        self.args = create_args(dim,hidden_dim,num_layers,c,act,lr,cuda,epochs,seed,eval_freq,val_prop,test_prop,dropout,beta1,beta2,K,beta,analytical_kl,posterior,prior,prior_iso,prior_std,learn_prior_std,enc,dec,bias,alpha,classifier,clusterer,log_freq,normalize_adj,normalize_feats,anomaly_detector)
+        self.args.n_classes = len(np.unique(labels))
+        self.args.feat_dim = features.shape[1]
+        self.data = process_data(self.args,adj,features,labels)
+        self.args.data_size = [adj.shape[0],self.args.feat_dim]
+        self.args.batch_size=1
+
+        self.cls = None
+
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+            self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        else:
+            self.args.device = 'cpu'
+
+        self.args.prior_iso = self.args.prior_iso or self.args.posterior == 'RiemannianNormal'
+
+        # Choosing and saving a random seed for reproducibility
+        if self.args.seed == 0: self.args.seed = int(torch.randint(0, 2**32 - 1, (1,)).item())
+        torch.manual_seed(self.args.seed)
+        np.random.seed(self.args.seed)
+        torch.cuda.manual_seed_all(self.args.seed)
+        torch.manual_seed(self.args.seed)
+        torch.backends.cudnn.deterministic = True
+        self.model = Tabular(self.args).to(self.args.device)
+        self.optimizer = optim.Adam(self.model.parameters(), lr=self.args.lr, amsgrad=True, betas=(self.args.beta1, self.args.beta2))
+        self.loss_function = getattr(objectives,'vae_objective')
+
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+
+        self.tb_embeddings = None
+
+
+    def fit(self):
+
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        agg = defaultdict(list)
+        b_loss, b_recon, b_kl , b_mlik , tb_loss = sys.float_info.max, sys.float_info.max ,sys.float_info.max,sys.float_info.max,sys.float_info.max
+        
+        best_losses = []
+        train_losses = []
+        val_losses = []
+
+        for epoch in range(self.args.epochs):
+            self.model.train()
+            self.optimizer.zero_grad()
+
+            qz_x, px_z, lik, kl, loss , embeddings = self.loss_function(self.model,self.data['idx_train'], self.data['features'], self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True, analytical_kl=self.args.analytical_kl)
+            probe_infnan(loss, "Training loss:")
+            loss.backward()
+            self.optimizer.step()
+
+            t_loss = loss.item() / len(self.data['idx_train'])
+            t_recon = -lik.mean(0).sum().item() / len(self.data['idx_train'])
+            t_kl = kl.sum(-1).mean(0).sum().item() / len(self.data['idx_train'])
+
+            if(t_loss < b_loss):
+                b_loss = t_loss 
+                b_recon = t_recon 
+                b_kl = t_kl 
+
+
+            agg['train_loss'].append(t_loss )
+            agg['train_recon'].append(t_recon )
+            agg['train_kl'].append(t_kl )
+
+            train_losses.append(t_recon)
+            if(len(best_losses) == 0):
+                best_losses.append(train_losses[0])
+            elif (best_losses[-1] > train_losses[-1]):
+                best_losses.append(train_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                print('====> Epoch: {:03d} Loss: {:.2f} Recon: {:.2f} KL: {:.2f}'.format(epoch, agg['train_loss'][-1], agg['train_recon'][-1], agg['train_kl'][-1]))
+
+            if (epoch + 1) % self.args.eval_freq == 0 and self.args.val_prop:
+                self.model.eval()
+                with torch.no_grad():
+                    qz_x, px_z, lik, kl, loss , embeddings= self.loss_function(self.model,self.data['idx_val'], self.data['features'],self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True)
+                    tt_loss = loss.item() / len(self.data['idx_val'])
+                    val_losses.append(tt_loss)
+                    if(tt_loss < tb_loss):
+                        tb_loss = tt_loss 
+                        self.tb_embeddings = embeddings[0]
+
+                    agg['test_loss'].append(tt_loss )
+                    print('====>             Test loss: {:.4f}'.format(agg['test_loss'][-1]))
+
+
+        logging.info("Optimization Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+        print('====> Training: Best Loss: {:.2f} Best Recon: {:.2f} Best KL: {:.2f}'.format(b_loss,b_recon,b_kl))
+        print('====> Testing: Best Loss: {:.2f}'.format(tb_loss))
+
+        train_idx = self.data['idx_train']
+        val_idx = self.data['idx_val']
+        idx = np.unique(np.concatenate((train_idx,val_idx)))
+        X =  self.model.manifold.logmap0(self.tb_embeddings[idx]).cpu().detach().numpy()
+        y = self.data['labels'].cpu().reshape(-1,1)[idx]
+
+        if(self.args.classifier):
+            self.cls = get_classifier(self.args, X,y)
+            acc,f1,recall,precision,roc_auc = calculate_metrics(self.cls,X,y)
+        elif self.args.clusterer:
+            y = y.reshape(-1,)
+            acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,X,y)[6:]
+        elif self.args.anomaly_detector:
+            y = y.reshape(-1,)
+            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,X,y)[6:]
+
+        return {'train':train_losses,'best':best_losses,'val':val_losses},acc,f1,recall,precision,roc_auc,time.time() - t_total
+
+    def predict(self):
+        self.model.eval()
+        with torch.no_grad():
+            qz_x, px_z, lik, kl, loss , embeddings=self.loss_function(self.model,self.data['idx_test'], self.data['features'],self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True)
+            tt_loss = loss.item() / len(self.data['idx_test'])
+        test_idx = self.data['idx_test']
+        data = self.model.manifold.logmap0(embeddings[0][test_idx]).cpu().detach().numpy()
+        labels = self.data['labels'].reshape(-1,1).cpu()[test_idx]
+        if self.args.classifier:
+            acc,f1,recall,precision,roc_auc = calculate_metrics(self.cls,data,labels)
+        elif self.args.clusterer:
+            labels = labels.reshape(-1,)
+            acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,data,labels)[6:]
+        elif self.args.anomaly_detector:
+            labels = labels.reshape(-1,)
+            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,data,labels)[6:]
+        self.tb_embeddings = embeddings[0]
+        return abs(tt_loss) , acc, f1 , recall,precision,roc_auc
+
+
+    def save_embeddings(self,directory):
+        tb_embeddings_euc = self.model.manifold.logmap0(self.tb_embeddings)
+        for_classification_hyp = np.hstack((self.tb_embeddings.cpu().detach().numpy(),self.data['labels'].reshape(-1,1).cpu()))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].reshape(-1,1).cpu()))
+        hyp_file_path = os.path.join(directory,'pvae_embeddings_hyp.csv')
+        euc_file_path = os.path.join(directory,'pvae_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
diff --git a/PVAE/utils.py b/PVAE/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f935958fa8d55d60f320c19fc8f1dc8a183ef6a
--- /dev/null
+++ b/PVAE/utils.py
@@ -0,0 +1,355 @@
+import sys
+import math
+import time
+import os
+import shutil
+import torch
+import torch.distributions as dist
+from torch.autograd import Variable, Function, grad
+from sklearn.preprocessing import MinMaxScaler
+import pandas as pd
+import numpy as np
+import argparse
+import torch.nn as nn
+import scipy.sparse as sp
+
+
+def lexpand(A, *dimensions):
+    """Expand tensor, adding new dimensions on left."""
+    return A.expand(tuple(dimensions) + A.shape)
+
+
+def rexpand(A, *dimensions):
+    """Expand tensor, adding new dimensions on right."""
+    return A.view(A.shape + (1,)*len(dimensions)).expand(A.shape + tuple(dimensions))
+
+
+def assert_no_nan(name, g):
+    if torch.isnan(g).any(): raise Exception('nans in {}'.format(name))
+
+
+def assert_no_grad_nan(name, x):
+    if x.requires_grad: x.register_hook(lambda g: assert_no_nan(name, g))
+
+
+# Classes
+class Constants(object):
+    eta = 1e-5
+    log2 = math.log(2)
+    logpi = math.log(math.pi)
+    log2pi = math.log(2 * math.pi)
+    logceilc = 88                # largest cuda v s.t. exp(v) < inf
+    logfloorc = -104             # smallest cuda v s.t. exp(v) > 0
+    invsqrt2pi = 1. / math.sqrt(2 * math.pi)
+    sqrthalfpi = math.sqrt(math.pi/2)
+
+
+def logsinh(x):
+    # torch.log(sinh(x))
+    return x + torch.log(1 - torch.exp(-2 * x)) - Constants.log2
+
+
+def logcosh(x):
+    # torch.log(cosh(x))
+    return x + torch.log(1 + torch.exp(-2 * x)) - Constants.log2
+
+
+class Arccosh(Function):
+    # https://github.com/facebookresearch/poincare-embeddings/blob/master/model.py
+    @staticmethod
+    def forward(ctx, x):
+        ctx.z = torch.sqrt(x * x - 1)
+        return torch.log(x + ctx.z)
+
+    @staticmethod
+    def backward(ctx, g):
+        z = torch.clamp(ctx.z, min=Constants.eta)
+        z = g / z
+        return z
+
+
+class Arcsinh(Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.z = torch.sqrt(x * x + 1)
+        return torch.log(x + ctx.z)
+
+    @staticmethod
+    def backward(ctx, g):
+        z = torch.clamp(ctx.z, min=Constants.eta)
+        z = g / z
+        return z
+
+
+# https://stackoverflow.com/questions/14906764/how-to-redirect-stdout-to-both-file-and-console-with-scripting
+class Logger(object):
+    def __init__(self, filename):
+        self.terminal = sys.stdout
+        self.log = open(filename, "a")
+
+    def write(self, message):
+        self.terminal.write(message)
+        self.log.write(message)
+
+    def flush(self):
+        # this flush method is needed for python 3 compatibility.
+        # this handles the flush command by doing nothing.
+        # you might want to specify some extra behavior here.
+        pass
+
+
+class Timer:
+    def __init__(self, name):
+        self.name = name
+
+    def __enter__(self):
+        self.begin = time.time()
+        return self
+
+    def __exit__(self, *args):
+        self.end = time.time()
+        self.elapsed = self.end - self.begin
+        self.elapsedH = time.gmtime(self.elapsed)
+        print('====> [{}] Time: {:7.3f}s or {}'
+              .format(self.name,
+                      self.elapsed,
+                      time.strftime("%H:%M:%S", self.elapsedH)))
+
+
+# Functions
+def save_vars(vs, filepath):
+    """
+    Saves variables to the given filepath in a safe manner.
+    """
+    if os.path.exists(filepath):
+        shutil.copyfile(filepath, '{}.old'.format(filepath))
+    torch.save(vs, filepath)
+
+
+def save_model(model, filepath):
+    """
+    To load a saved model, simply use
+    `model.load_state_dict(torch.load('path-to-saved-model'))`.
+    """
+    save_vars(model.state_dict(), filepath)
+
+
+def log_mean_exp(value, dim=0, keepdim=False):
+    return log_sum_exp(value, dim, keepdim) - math.log(value.size(dim))
+
+
+def log_sum_exp(value, dim=0, keepdim=False):
+    m, _ = torch.max(value, dim=dim, keepdim=True)
+    value0 = value - m
+    if keepdim is False:
+        m = m.squeeze(dim)
+    return m + torch.log(torch.sum(torch.exp(value0), dim=dim, keepdim=keepdim))
+
+
+def log_sum_exp_signs(value, signs, dim=0, keepdim=False):
+    m, _ = torch.max(value, dim=dim, keepdim=True)
+    value0 = value - m
+    if keepdim is False:
+        m = m.squeeze(dim)
+    return m + torch.log(torch.sum(signs * torch.exp(value0), dim=dim, keepdim=keepdim))
+
+
+def get_mean_param(params):
+    """Return the parameter used to show reconstructions or generations.
+    For example, the mean for Normal, or probs for Bernoulli.
+    For Bernoulli, skip first parameter, as that's (scalar) temperature
+    """
+    if params[0].dim() == 0:
+        return params[1]
+    # elif len(params) == 3:
+    #     return params[1]
+    else:
+        return params[0]
+
+
+def probe_infnan(v, name, extras={}):
+    nps = torch.isnan(v)
+    s = nps.sum().item()
+    if s > 0:
+        print('>>> {} >>>'.format(name))
+        print(name, s)
+        print(v[nps])
+        for k, val in extras.items():
+            print(k, val, val.sum().item())
+        quit()
+
+
+def has_analytic_kl(type_p, type_q):
+    return (type_p, type_q) in torch.distributions.kl._KL_REGISTRY
+
+
+def split_data(labels, test_prop,val_prop):
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    
+    return idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg, idx_val_pos + idx_val_neg,
+
+def process_data(args, adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train'], data['features'] = process(
+            data['adj_train'], data['features'],args.normalize_adj,args.normalize_feats
+    )
+    return data
+
+def process_data_nc(args,adj,features,labels):
+    idx_test, idx_train , idx_val= split_data(labels, args.test_prop,args.val_prop)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train,  'idx_test': idx_test , 'idx_val':idx_val}
+    return data
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats: 
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj)
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--hidden_dim', type=int, default=args[1])
+    parser.add_argument('--num_layers', type=int, default=args[2])
+    parser.add_argument('--c', type=int, default=args[3])
+    parser.add_argument('--act', type=str, default=args[4])
+    parser.add_argument('--lr', type=float, default=args[5])
+    parser.add_argument('--cuda', type=int, default=args[6])
+    parser.add_argument('--epochs', type=int, default=args[7])
+    parser.add_argument('--seed', type=int, default=args[8])
+    parser.add_argument('--eval_freq', type=int, default=args[9])
+    parser.add_argument('--val_prop', type=float, default=args[10])
+    parser.add_argument('--test_prop', type=float, default=args[11])
+    parser.add_argument('--dropout', type=float, default=args[12])
+    parser.add_argument('--beta1', type=float, default=args[13])
+    parser.add_argument('--beta2', type=float, default=args[14])
+    parser.add_argument('--K', type=int, default=args[15])
+    parser.add_argument('--beta', type=float, default=args[16])
+    parser.add_argument('--analytical_kl', type=bool, default=args[17])
+    parser.add_argument('--posterior', type=str, default=args[18])
+    parser.add_argument('--prior', type=str, default=args[19])
+    parser.add_argument('--prior_iso', type=bool, default=args[20])
+    parser.add_argument('--prior_std', type=float, default=args[21])
+    parser.add_argument('--learn_prior_std', type=bool, default=args[22])
+    parser.add_argument('--enc', type=str, default=args[23])
+    parser.add_argument('--dec', type=str, default=args[24])
+    parser.add_argument('--bias', type=bool, default=args[25])
+    parser.add_argument('--alpha', type=float, default=args[26])
+    parser.add_argument('--classifier', type=str, default=args[27])
+    parser.add_argument('--clusterer', type=str, default=args[28])
+    parser.add_argument('--log_freq', type=int, default=args[29])
+    parser.add_argument('--normalize_adj', type=bool, default=args[30])
+    parser.add_argument('--normalize_feats', type=bool, default=args[31])
+    parser.add_argument('--anomaly_detector', type=str, default=args[32])
+    flags, unknown = parser.parse_known_args()
+    return flags
+
+
+def get_activation(args):
+    if args.act == 'leaky_relu':
+        return nn.LeakyReLU(args.alpha)
+    elif args.act == 'rrelu':
+        return nn.RReLU()
+    elif args.act == 'relu':
+        return nn.ReLU()
+    elif args.act == 'elu':
+        return nn.ELU()
+    elif args.act == 'prelu':
+        return nn.PReLU()
+    elif args.act == 'selu':
+        return nn.SELU()
+
+
+from Ghypeddings.classifiers import *
+def get_classifier(args,X,y):
+    if(args.classifier):
+        if(args.classifier == 'svm'):
+            return SVM(X,y)
+        elif(args.classifier == 'mlp'):
+            return mlp(X,y,1,10,seed=args.seed)
+        elif(args.classifier == 'decision tree'):
+            return decision_tree(X,y)
+        elif(args.classifier == 'random forest'):
+            return random_forest(X,y,args.seed)
+        elif(args.classifier == 'adaboost'):
+            return adaboost(X,y,args.seed)
+        elif(args.classifier == 'knn'):
+            return KNN(X,y)
+        elif(args.classifier == 'naive bayes'):
+            return naive_bayes(X,y)
+        else:
+            raise NotImplementedError
+    
+
+from Ghypeddings.clusterers import *
+def get_clustering_algorithm(clusterer,X,y):
+    if(clusterer == 'agglomerative_clustering'):
+        return agglomerative_clustering(X,y)
+    elif(clusterer == 'dbscan'):
+        return dbscan(X,y)
+    elif(clusterer == 'fuzzy_c_mean'):
+        return fuzzy_c_mean(X,y)
+    elif(clusterer == 'gaussian_mixture'):
+        return gaussian_mixture(X,y)
+    elif(clusterer == 'kmeans'):
+        return kmeans(X,y)
+    elif(clusterer == 'mean_shift'):
+        return mean_shift(X,y)
+    else:
+        raise NotImplementedError
+    
+from Ghypeddings.anomaly_detection import *
+def get_anomaly_detection_algorithm(algorithm,X,y):
+    if(algorithm == 'isolation_forest'):
+        return isolation_forest(X,y)
+    elif(algorithm == 'one_class_svm'):
+        return one_class_svm(X,y)
+    elif(algorithm == 'dbscan'):
+        return dbscan(X,y)
+    elif(algorithm == 'kmeans'):
+        return kmeans(X,y,n_clusters=2)
+    elif(algorithm == 'local_outlier_factor'):
+        return local_outlier_factor(X,y)
+    else:
+        raise NotImplementedError
\ No newline at end of file
diff --git a/Poincare/__init__.py b/Poincare/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfa83a015b9025ddbca2b7c1ed543c66fd3af3d9
--- /dev/null
+++ b/Poincare/__init__.py
@@ -0,0 +1,2 @@
+from __future__ import print_function
+from __future__ import division
diff --git a/Poincare/layers/__init__.py b/Poincare/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Poincare/layers/layers.py b/Poincare/layers/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..94778f8a79b92f2383dddcb7a96fc60d0fad6b70
--- /dev/null
+++ b/Poincare/layers/layers.py
@@ -0,0 +1,43 @@
+"""Euclidean layers."""
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.module import Module
+from torch.nn.parameter import Parameter
+
+
+def get_dim_act(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+    dims = [args.feat_dim] + ([args.dim] * (args.num_layers - 1))
+    if args.task in ['lp', 'rec']:
+        dims += [args.dim]
+        acts += [act]
+    return dims, acts
+
+class Linear(Module):
+    """
+    Simple Linear layer with dropout.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(Linear, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+
+    def forward(self, x):
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        out = self.act(hidden)
+        return out
diff --git a/Poincare/manifolds/__init__.py b/Poincare/manifolds/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ac57200dff3cf341b4148b750fe1ecadb88c620
--- /dev/null
+++ b/Poincare/manifolds/__init__.py
@@ -0,0 +1,3 @@
+from Ghypeddings.Poincare.manifolds.base import ManifoldParameter
+from Ghypeddings.Poincare.manifolds.poincare import PoincareBall
+from Ghypeddings.Poincare.manifolds.euclidean import Euclidean
\ No newline at end of file
diff --git a/Poincare/manifolds/base.py b/Poincare/manifolds/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..925d4a6b2a59dae47a3a8ca33a7dcdcb20e0f08e
--- /dev/null
+++ b/Poincare/manifolds/base.py
@@ -0,0 +1,88 @@
+"""Base manifold."""
+
+from torch.nn import Parameter
+
+
+class Manifold(object):
+    """
+    Abstract class to define operations on a manifold.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.eps = 10e-8
+
+    def sqdist(self, p1, p2, c):
+        """Squared distance between pairs of points."""
+        raise NotImplementedError
+
+    def egrad2rgrad(self, p, dp, c):
+        """Converts Euclidean Gradient to Riemannian Gradients."""
+        raise NotImplementedError
+
+    def proj(self, p, c):
+        """Projects point p on the manifold."""
+        raise NotImplementedError
+
+    def proj_tan(self, u, p, c):
+        """Projects u on the tangent space of p."""
+        raise NotImplementedError
+
+    def proj_tan0(self, u, c):
+        """Projects u on the tangent space of the origin."""
+        raise NotImplementedError
+
+    def expmap(self, u, p, c):
+        """Exponential map of u at point p."""
+        raise NotImplementedError
+
+    def logmap(self, p1, p2, c):
+        """Logarithmic map of point p1 at point p2."""
+        raise NotImplementedError
+
+    def expmap0(self, u, c):
+        """Exponential map of u at the origin."""
+        raise NotImplementedError
+
+    def logmap0(self, p, c):
+        """Logarithmic map of point p at the origin."""
+        raise NotImplementedError
+
+    def mobius_add(self, x, y, c, dim=-1):
+        """Adds points x and y."""
+        raise NotImplementedError
+
+    def mobius_matvec(self, m, x, c):
+        """Performs hyperboic martrix-vector multiplication."""
+        raise NotImplementedError
+
+    def init_weights(self, w, c, irange=1e-5):
+        """Initializes random weigths on the manifold."""
+        raise NotImplementedError
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        """Inner product for tangent vectors at point x."""
+        raise NotImplementedError
+
+    def ptransp(self, x, y, u, c):
+        """Parallel transport of u from x to y."""
+        raise NotImplementedError
+
+    def ptransp0(self, x, u, c):
+        """Parallel transport of u from the origin to y."""
+        raise NotImplementedError
+
+
+class ManifoldParameter(Parameter):
+    """
+    Subclass of torch.nn.Parameter for Riemannian optimization.
+    """
+    def __new__(cls, data, requires_grad, manifold, c):
+        return Parameter.__new__(cls, data, requires_grad)
+
+    def __init__(self, data, requires_grad, manifold, c):
+        self.c = c
+        self.manifold = manifold
+
+    def __repr__(self):
+        return '{} Parameter containing:\n'.format(self.manifold.name) + super(Parameter, self).__repr__()
diff --git a/Poincare/manifolds/euclidean.py b/Poincare/manifolds/euclidean.py
new file mode 100644
index 0000000000000000000000000000000000000000..177ebb2bf8a03d211732408b84d5f5d8bbec962e
--- /dev/null
+++ b/Poincare/manifolds/euclidean.py
@@ -0,0 +1,67 @@
+"""Euclidean manifold."""
+
+from Ghypeddings.Poincare.manifolds.base import Manifold
+
+
+class Euclidean(Manifold):
+    """
+    Euclidean Manifold class.
+    """
+
+    def __init__(self):
+        super(Euclidean, self).__init__()
+        self.name = 'Euclidean'
+
+    def normalize(self, p):
+        dim = p.size(-1)
+        p.view(-1, dim).renorm_(2, 0, 1.)
+        return p
+
+    def sqdist(self, p1, p2, c):
+        return (p1 - p2).pow(2).sum(dim=-1)
+
+    def egrad2rgrad(self, p, dp, c):
+        return dp
+
+    def proj(self, p, c):
+        return p
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        return p + u
+
+    def logmap(self, p1, p2, c):
+        return p2 - p1
+
+    def expmap0(self, u, c):
+        return u
+
+    def logmap0(self, p, c):
+        return p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        return x + y
+
+    def mobius_matvec(self, m, x, c):
+        mx = x @ m.transpose(-1, -2)
+        return mx
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def inner(self, p, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        return (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, v, c):
+        return v
+
+    def ptransp0(self, x, v, c):
+        return x + v
diff --git a/Poincare/manifolds/poincare.py b/Poincare/manifolds/poincare.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f52cee6ada9b4a2db8f7ce5051907979a08c023
--- /dev/null
+++ b/Poincare/manifolds/poincare.py
@@ -0,0 +1,145 @@
+"""Poincare ball manifold."""
+
+import torch
+
+from Ghypeddings.Poincare.manifolds.base import Manifold
+from Ghypeddings.Poincare.utils.math_utils import artanh, tanh
+
+
+class PoincareBall(Manifold):
+    """
+    PoicareBall Manifold class.
+
+    We use the following convention: x0^2 + x1^2 + ... + xd^2 < 1 / c
+
+    Note that 1/sqrt(c) is the Poincare ball radius.
+
+    """
+
+    def __init__(self, ):
+        super(PoincareBall, self).__init__()
+        self.name = 'PoincareBall'
+        self.min_norm = 1e-15
+        self.eps = {torch.float32: 4e-3, torch.float64: 1e-5}
+
+    def sqdist(self, p1, p2, c):
+        sqrt_c = c ** 0.5
+        dist_c = artanh(
+            sqrt_c * self.mobius_add(-p1, p2, c, dim=-1).norm(dim=-1, p=2, keepdim=False)
+        )
+        dist = dist_c * 2 / sqrt_c
+        return dist ** 2
+
+    def _lambda_x(self, x, c):
+        x_sqnorm = torch.sum(x.data.pow(2), dim=-1, keepdim=True)
+        return 2 / (1. - c * x_sqnorm).clamp_min(self.min_norm)
+
+    def egrad2rgrad(self, p, dp, c):
+        lambda_p = self._lambda_x(p, c)
+        dp /= lambda_p.pow(2)
+        return dp
+
+    def proj(self, x, c):
+        norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm)
+        maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5)
+        cond = norm > maxnorm
+        projected = x / norm * maxnorm
+        return torch.where(cond, projected, x)
+
+    def proj_tan(self, u, p, c):
+        return u
+
+    def proj_tan0(self, u, c):
+        return u
+
+    def expmap(self, u, p, c):
+        sqrt_c = c ** 0.5
+        u_norm = u.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        second_term = (
+                tanh(sqrt_c / 2 * self._lambda_x(p, c) * u_norm)
+                * u
+                / (sqrt_c * u_norm)
+        )
+        gamma_1 = self.mobius_add(p, second_term, c)
+        return gamma_1
+
+    def logmap(self, p1, p2, c):
+        sub = self.mobius_add(-p1, p2, c)
+        sub_norm = sub.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        lam = self._lambda_x(p1, c)
+        sqrt_c = c ** 0.5
+        return 2 / sqrt_c / lam * artanh(sqrt_c * sub_norm) * sub / sub_norm
+
+    def expmap0(self, u, c):
+        sqrt_c = c ** 0.5
+        u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm)
+        gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm)
+        return gamma_1
+
+    def logmap0(self, p, c):
+        sqrt_c = c ** 0.5
+        p_norm = p.norm(dim=-1, p=2, keepdim=True).clamp_min(self.min_norm)
+        scale = 1. / sqrt_c * artanh(sqrt_c * p_norm) / p_norm
+        return scale * p
+
+    def mobius_add(self, x, y, c, dim=-1):
+        x2 = x.pow(2).sum(dim=dim, keepdim=True)
+        y2 = y.pow(2).sum(dim=dim, keepdim=True)
+        xy = (x * y).sum(dim=dim, keepdim=True)
+        num = (1 + 2 * c * xy + c * y2) * x + (1 - c * x2) * y
+        denom = 1 + 2 * c * xy + c ** 2 * x2 * y2
+        return num / denom.clamp_min(self.min_norm)
+
+    def mobius_matvec(self, m, x, c):
+        sqrt_c = c ** 0.5
+        x_norm = x.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        mx = x @ m.transpose(-1, -2)
+        mx_norm = mx.norm(dim=-1, keepdim=True, p=2).clamp_min(self.min_norm)
+        res_c = tanh(mx_norm / x_norm * artanh(sqrt_c * x_norm)) * mx / (mx_norm * sqrt_c)
+        cond = (mx == 0).prod(-1, keepdim=True, dtype=torch.uint8)
+        res_0 = torch.zeros(1, dtype=res_c.dtype, device=res_c.device)
+        res = torch.where(cond, res_0, res_c)
+        return res
+
+    def init_weights(self, w, c, irange=1e-5):
+        w.data.uniform_(-irange, irange)
+        return w
+
+    def _gyration(self, u, v, w, c, dim: int = -1):
+        u2 = u.pow(2).sum(dim=dim, keepdim=True)
+        v2 = v.pow(2).sum(dim=dim, keepdim=True)
+        uv = (u * v).sum(dim=dim, keepdim=True)
+        uw = (u * w).sum(dim=dim, keepdim=True)
+        vw = (v * w).sum(dim=dim, keepdim=True)
+        c2 = c ** 2
+        a = -c2 * uw * v2 + c * vw + 2 * c2 * uv * vw
+        b = -c2 * vw * u2 - c * uw
+        d = 1 + 2 * c * uv + c2 * u2 * v2
+        return w + 2 * (a * u + b * v) / d.clamp_min(self.min_norm)
+
+    def inner(self, x, c, u, v=None, keepdim=False):
+        if v is None:
+            v = u
+        lambda_x = self._lambda_x(x, c)
+        return lambda_x ** 2 * (u * v).sum(dim=-1, keepdim=keepdim)
+
+    def ptransp(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def ptransp_(self, x, y, u, c):
+        lambda_x = self._lambda_x(x, c)
+        lambda_y = self._lambda_x(y, c)
+        return self._gyration(y, -x, u, c) * lambda_x / lambda_y
+
+    def ptransp0(self, x, u, c):
+        lambda_x = self._lambda_x(x, c)
+        return 2 * u / lambda_x.clamp_min(self.min_norm)
+
+    def to_hyperboloid(self, x, c):
+        K = 1./ c
+        sqrtK = K ** 0.5
+        sqnorm = torch.norm(x, p=2, dim=1, keepdim=True) ** 2
+        return sqrtK * torch.cat([K + sqnorm, 2 * sqrtK * x], dim=1) / (K - sqnorm)
+
diff --git a/Poincare/models/__init__.py b/Poincare/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Poincare/models/base_models.py b/Poincare/models/base_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..142b9371cf08248d096f0ab313dd70fa8707f768
--- /dev/null
+++ b/Poincare/models/base_models.py
@@ -0,0 +1,77 @@
+"""Base model class."""
+
+import numpy as np
+from sklearn.metrics import roc_auc_score, average_precision_score
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.Poincare.manifolds as manifolds
+import Ghypeddings.Poincare.models.encoders as encoders
+from Ghypeddings.Poincare.models.decoders import model2decoder
+from Ghypeddings.Poincare.utils.eval_utils import acc_f1
+
+
+class BaseModel(nn.Module):
+    """
+    Base model for graph embedding tasks.
+    """
+
+    def __init__(self, args):
+        super(BaseModel, self).__init__()
+        self.manifold_name = 'PoincareBall'
+        self.c = torch.tensor([1.0])
+        if not args.cuda == -1:
+            self.c = self.c.to(args.device)
+        self.manifold = getattr(manifolds, self.manifold_name)()
+        self.nnodes = args.n_nodes
+        self.encoder = getattr(encoders, 'Shallow')(self.c, args)
+
+    def encode(self, x):
+        h = self.encoder.encode(x)
+        return h
+
+    def compute_metrics(self, embeddings, data, split):
+        raise NotImplementedError
+
+    def init_metric_dict(self):
+        raise NotImplementedError
+
+    def has_improved(self, m1, m2):
+        raise NotImplementedError
+
+
+class NCModel(BaseModel):
+    """
+    Base model for node classification task.
+    """
+
+    def __init__(self, args):
+        super(NCModel, self).__init__(args)
+        self.decoder = model2decoder(1.0, args)
+        if args.n_classes > 2:
+            self.f1_average = 'micro'
+        else:
+            self.f1_average = 'binary'
+        
+        self.weights = torch.Tensor([1.] * args.n_classes)
+        if not args.cuda == -1:
+            self.weights = self.weights.to(args.device)
+
+    def decode(self, h, idx):
+        output = self.decoder.decode(h)
+        return F.log_softmax(output[idx], dim=1)
+
+    def compute_metrics(self, embeddings, data, split):
+        idx = data[f'idx_{split}']
+        output = self.decode(embeddings, idx)
+        loss = F.nll_loss(output, data['labels'][idx], self.weights)
+        acc, f1,recall,precision,roc_auc = acc_f1(output, data['labels'][idx], average=self.f1_average)
+        metrics = {'loss': loss, 'acc': acc, 'f1': f1,'recall':recall,'precision':precision,'roc_auc':roc_auc}
+        return metrics
+
+    def init_metric_dict(self):
+        return {'acc': -1, 'f1': -1}
+
+    def has_improved(self, m1, m2):
+        return m1["f1"] < m2["f1"]
\ No newline at end of file
diff --git a/Poincare/models/decoders.py b/Poincare/models/decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..8532b62830f9b8d0a050d64b23f2dc1b84ab8bd1
--- /dev/null
+++ b/Poincare/models/decoders.py
@@ -0,0 +1,46 @@
+"""Graph decoders."""
+import Ghypeddings.Poincare.manifolds as manifolds
+import torch.nn as nn
+import torch.nn.functional as F
+from Ghypeddings.Poincare.layers.layers import  Linear
+import torch
+
+class Decoder(nn.Module):
+    """
+    Decoder abstract class for node classification tasks.
+    """
+
+    def __init__(self, c):
+        super(Decoder, self).__init__()
+        self.c = c
+
+    def decode(self, x):
+        probs = self.cls.forward(x)
+        return probs
+
+
+class LinearDecoder(Decoder):
+    """
+    MLP Decoder for Hyperbolic/Euclidean node classification models.
+    """
+
+    def __init__(self, c, args):
+        super(LinearDecoder, self).__init__(c)
+        self.manifold = getattr(manifolds, 'PoincareBall')()
+        self.input_dim = args.dim + args.feat_dim
+        self.output_dim = args.n_classes
+        self.bias = True
+        self.cls = Linear(self.input_dim, self.output_dim, args.dropout, lambda x: x, self.bias)
+
+    def decode(self, x):
+        h = self.manifold.proj_tan0(self.manifold.logmap0(x, c=self.c), c=self.c)
+        return super(LinearDecoder, self).decode(h)
+
+    def extra_repr(self):
+        return 'in_features={}, out_features={}, bias={}, c={}'.format(
+                self.input_dim, self.output_dim, self.bias, self.c
+        )
+
+
+model2decoder = LinearDecoder
+
diff --git a/Poincare/models/encoders.py b/Poincare/models/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..42e6504898f0f6e85db56f4fd597c467890e205a
--- /dev/null
+++ b/Poincare/models/encoders.py
@@ -0,0 +1,42 @@
+"""Graph encoders."""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.Poincare.manifolds as manifolds
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x):
+        pass
+
+class Shallow(Encoder):
+    """
+    Shallow Embedding method.
+    Learns embeddings or loads pretrained embeddings and uses an MLP for classification.
+    """
+
+    def __init__(self, c, args):
+        super(Shallow, self).__init__(c)
+        self.manifold = getattr(manifolds, 'PoincareBall')()
+        weights = torch.Tensor(args.n_nodes, args.dim)
+        weights = self.manifold.init_weights(weights, self.c)
+        trainable = True
+        self.lt = manifolds.ManifoldParameter(weights, trainable, self.manifold, self.c)
+        self.all_nodes = torch.LongTensor(list(range(args.n_nodes)))
+        layers = []
+        self.layers = nn.Sequential(*layers)
+
+    def encode(self, x):
+        h = self.lt[self.all_nodes, :]
+        h = torch.cat((h, x), 1)
+        return h
diff --git a/Poincare/optimizers/__init__.py b/Poincare/optimizers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b0d929f33f4e20f83e7cc3ce87c9fa8fd359447
--- /dev/null
+++ b/Poincare/optimizers/__init__.py
@@ -0,0 +1,2 @@
+from torch.optim import Adam
+from Ghypeddings.Poincare.optimizers.radam import RiemannianAdam
diff --git a/Poincare/optimizers/radam.py b/Poincare/optimizers/radam.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4904422f52d271dc7de85ed3069ef9972f3015b
--- /dev/null
+++ b/Poincare/optimizers/radam.py
@@ -0,0 +1,172 @@
+"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/)."""
+import torch.optim
+from Ghypeddings.Poincare.manifolds import Euclidean, ManifoldParameter
+
+_default_manifold = Euclidean()
+
+
+class OptimMixin(object):
+    def __init__(self, *args, stabilize=None, **kwargs):
+        self._stabilize = stabilize
+        super().__init__(*args, **kwargs)
+
+    def stabilize_group(self, group):
+        pass
+
+    def stabilize(self):
+        """Stabilize parameters if they are off-manifold due to numerical reasons
+        """
+        for group in self.param_groups:
+            self.stabilize_group(group)
+
+
+def copy_or_set_(dest, source):
+    """
+    A workaround to respect strides of :code:`dest` when copying :code:`source`
+    (https://github.com/geoopt/geoopt/issues/70)
+    Parameters
+    ----------
+    dest : torch.Tensor
+        Destination tensor where to store new data
+    source : torch.Tensor
+        Source data to put in the new tensor
+    Returns
+    -------
+    dest
+        torch.Tensor, modified inplace
+    """
+    if dest.stride() != source.stride():
+        return dest.copy_(source)
+    else:
+        return dest.set_(source)
+
+
+class RiemannianAdam(OptimMixin, torch.optim.Adam):
+    r"""Riemannian Adam with the same API as :class:`torch.optim.Adam`
+    Parameters
+    ----------
+    params : iterable
+        iterable of parameters to optimize or dicts defining
+        parameter groups
+    lr : float (optional)
+        learning rate (default: 1e-3)
+    betas : Tuple[float, float] (optional)
+        coefficients used for computing
+        running averages of gradient and its square (default: (0.9, 0.999))
+    eps : float (optional)
+        term added to the denominator to improve
+        numerical stability (default: 1e-8)
+    weight_decay : float (optional)
+        weight decay (L2 penalty) (default: 0)
+    amsgrad : bool (optional)
+        whether to use the AMSGrad variant of this
+        algorithm from the paper `On the Convergence of Adam and Beyond`_
+        (default: False)
+    Other Parameters
+    ----------------
+    stabilize : int
+        Stabilize parameters if they are off-manifold due to numerical
+        reasons every ``stabilize`` steps (default: ``None`` -- no stabilize)
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments
+        ---------
+        closure : callable (optional)
+            A closure that reevaluates the model
+            and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        with torch.no_grad():
+            for group in self.param_groups:
+                if "step" not in group:
+                    group["step"] = 0
+                betas = group["betas"]
+                weight_decay = group["weight_decay"]
+                eps = group["eps"]
+                learning_rate = group["lr"]
+                amsgrad = group["amsgrad"]
+                for point in group["params"]:
+                    grad = point.grad
+                    if grad is None:
+                        continue
+                    if isinstance(point, (ManifoldParameter)):
+                        manifold = point.manifold
+                        c = point.c
+                    else:
+                        manifold = _default_manifold
+                        c = None
+                    if grad.is_sparse:
+                        raise RuntimeError(
+                                "Riemannian Adam does not support sparse gradients yet (PR is welcome)"
+                        )
+
+                    state = self.state[point]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state["step"] = 0
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(point)
+                        # Exponential moving average of squared gradient values
+                        state["exp_avg_sq"] = torch.zeros_like(point)
+                        if amsgrad:
+                            # Maintains max of all exp. moving avg. of sq. grad. values
+                            state["max_exp_avg_sq"] = torch.zeros_like(point)
+                    # make local variables for easy access
+                    exp_avg = state["exp_avg"]
+                    exp_avg_sq = state["exp_avg_sq"]
+                    # actual step
+                    grad.add_(weight_decay, point)
+                    grad = manifold.egrad2rgrad(point, grad, c)
+                    exp_avg.mul_(betas[0]).add_(1 - betas[0], grad)
+                    exp_avg_sq.mul_(betas[1]).add_(
+                            1 - betas[1], manifold.inner(point, c, grad, keepdim=True)
+                    )
+                    if amsgrad:
+                        max_exp_avg_sq = state["max_exp_avg_sq"]
+                        # Maintains the maximum of all 2nd moment running avg. till now
+                        torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                        # Use the max. for normalizing running avg. of gradient
+                        denom = max_exp_avg_sq.sqrt().add_(eps)
+                    else:
+                        denom = exp_avg_sq.sqrt().add_(eps)
+                    group["step"] += 1
+                    bias_correction1 = 1 - betas[0] ** group["step"]
+                    bias_correction2 = 1 - betas[1] ** group["step"]
+                    step_size = (
+                        learning_rate * bias_correction2 ** 0.5 / bias_correction1
+                    )
+                    # copy the state, we need it for retraction
+                    # get the direction for ascend
+                    direction = exp_avg / denom
+                    # transport the exponential averaging to the new point
+                    new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c)
+                    exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c)
+                    # use copy only for user facing point
+                    copy_or_set_(point, new_point)
+                    exp_avg.set_(exp_avg_new)
+
+                    group["step"] += 1
+                if self._stabilize is not None and group["step"] % self._stabilize == 0:
+                    self.stabilize_group(group)
+        return loss
+
+    @torch.no_grad()
+    def stabilize_group(self, group):
+        for p in group["params"]:
+            if not isinstance(p, ManifoldParameter):
+                continue
+            state = self.state[p]
+            if not state:  # due to None grads
+                continue
+            manifold = p.manifold
+            c = p.c
+            exp_avg = state["exp_avg"]
+            copy_or_set_(p, manifold.proj(p, c))
+            exp_avg.set_(manifold.proj_tan(exp_avg, u, c))
diff --git a/Poincare/poincare.py b/Poincare/poincare.py
new file mode 100644
index 0000000000000000000000000000000000000000..018bf5b4053a4d26b1051d4df15598e8b95cfacc
--- /dev/null
+++ b/Poincare/poincare.py
@@ -0,0 +1,156 @@
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import time
+
+import numpy as np
+import Ghypeddings.Poincare.optimizers as optimizers
+import torch
+from Ghypeddings.Poincare.models.base_models import NCModel
+from Ghypeddings.Poincare.utils.data_utils import process_data
+from Ghypeddings.Poincare.utils.train_utils import format_metrics, create_args
+
+
+class POINCARE:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                grad_clip=None,
+                weight_decay=0.01,
+                lr=0.1,
+                gamma=0.5,
+                lr_reduce_freq=500,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.15,
+                test_prop=0.15,
+                double_precision=0,
+                dropout=0.01,
+                normalize_adj=False,
+                normalize_feats=True):
+        
+        self.args = create_args(dim,grad_clip,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        np.random.seed(self.args.seed)
+        torch.manual_seed(self.args.seed)
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+        if not self.args.lr_reduce_freq:
+            self.args.lr_reduce_freq = self.args.epochs
+        self.model = NCModel(self.args)
+        self.optimizer = getattr(optimizers, 'RiemannianAdam')(params=self.model.parameters(), lr=self.args.lr,
+                                                        weight_decay=self.args.weight_decay)
+        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
+            self.optimizer,
+            step_size=int(self.args.lr_reduce_freq),
+            gamma=float(self.args.gamma)
+        )
+
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+        self.best_emb = None
+
+
+    def fit(self):
+
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(str(self.model))
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        train_losses = []
+        val_losses = []
+        for epoch in range(self.args.epochs):
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'])
+            assert not torch.isnan(embeddings).any()
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.lr_scheduler.step()
+
+            train_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(train_losses[0])
+            elif (best_losses[-1] > train_losses[-1]):
+                best_losses.append(train_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                    'lr: {}'.format(self.lr_scheduler.get_lr()[0]),
+                                    format_metrics(train_metrics, 'train'),
+                                    'time: {:.4f}s'.format(time.time() - t)
+                                    ]))
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                embeddings = self.model.encode(self.data['features'])
+                val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                val_losses.append(val_metrics['loss'].item())
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                    
+                if self.model.has_improved(best_val_metrics, val_metrics):
+                    self.best_emb = embeddings
+                    best_val_metrics = val_metrics
+                    counter = 0
+                else:
+                    counter += 1
+                    if counter == self.args.patience and epoch > self.args.min_epochs:
+                        logging.info("Early stopping")
+                        break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+
+        return {'train':train_losses,'best':best_losses,'val':val_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
+    
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'])
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
+
+    def save_embeddings(self):
+        tb_embeddings_euc = self.model.manifold.logmap0(self.best_emb,self.model.decoder.c)
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'poincare_embeddings_hyp.csv')
+        euc_file_path = os.path.join(os.getcwd(),'poincare_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/Poincare/utils/__init__.py b/Poincare/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Poincare/utils/data_utils.py b/Poincare/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc5c634801fe17a9231ff2f582dfcae159377ad3
--- /dev/null
+++ b/Poincare/utils/data_utils.py
@@ -0,0 +1,83 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+
+
+def process_data(args, adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj,args.normalize_feats
+    )
+    return data
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj + sp.eye(adj.shape[0]))
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+def process_data_nc(args,adj,features,labels):
+    idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test}
+    return data
diff --git a/Poincare/utils/eval_utils.py b/Poincare/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7494c5f3e618155257bfa1f6af2a2c91acd2f526
--- /dev/null
+++ b/Poincare/utils/eval_utils.py
@@ -0,0 +1,14 @@
+from sklearn.metrics import accuracy_score, f1_score,precision_score,recall_score,roc_auc_score
+
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels,preds)
+    recall = recall_score(labels,preds)
+    precision = precision_score(labels,preds)
+    roc_auc = roc_auc_score(labels,preds)
+    f1 = f1_score(labels,preds, average=average)
+    return accuracy, f1,recall,precision,roc_auc
+
diff --git a/Poincare/utils/math_utils.py b/Poincare/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2fee953984adca2f6f271db79f2b5624d9ad5bd
--- /dev/null
+++ b/Poincare/utils/math_utils.py
@@ -0,0 +1,69 @@
+"""Math utils functions."""
+
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-7, 1 - 1e-7)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-7).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1.0 + 1e-7)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-7).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
diff --git a/Poincare/utils/train_utils.py b/Poincare/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb43e0d3044366d8c4d1c0bded82fa2b4e477edd
--- /dev/null
+++ b/Poincare/utils/train_utils.py
@@ -0,0 +1,38 @@
+import os
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--grad_clip', type=float, default=args[1])
+    parser.add_argument('--weight_decay', type=float, default=args[2])
+    parser.add_argument('--lr', type=float, default=args[3])
+    parser.add_argument('--gamma', type=float, default=args[4])
+    parser.add_argument('--lr_reduce_freq', type=int, default=args[5])
+    parser.add_argument('--cuda', type=int, default=args[6])
+    parser.add_argument('--epochs', type=int, default=args[7])
+    parser.add_argument('--min_epochs', type=int, default=args[8])
+    parser.add_argument('--patience', type=int, default=args[9])
+    parser.add_argument('--seed', type=int, default=args[10])
+    parser.add_argument('--log_freq', type=int, default=args[11])
+    parser.add_argument('--eval_freq', type=int, default=args[12])
+    parser.add_argument('--val_prop', type=float, default=args[13])
+    parser.add_argument('--test_prop', type=float, default=args[14])
+    parser.add_argument('--double_precision', type=int, default=args[15])
+    parser.add_argument('--dropout', type=float, default=args[16])
+    parser.add_argument('--normalize_adj', type=bool, default=args[17])
+    parser.add_argument('--normalize_feats', type=bool, default=args[18])
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
diff --git a/README.md b/README.md
index 5f5b435843eda7dbbd600e65b53df67afc90ac9c..12547f5ae679b6e820d89b7d3844bd70b19e4810 100644
--- a/README.md
+++ b/README.md
@@ -1,93 +1,71 @@
-# Ghypeddings
+# G-Hypeddings
 
+## 1. Overview
 
+G-hypeddings is a **Python library** designed for **graph hyperbolic embeddings**, primarily utilized in **detecting cybersecurity anomalies**. It includes 06 distinct models with various configurations, all of which utilize **hyperbolic geometry** for their operations. The library is built on top of the [PyTorch framework](https://pytorch.org/).
 
-## Getting started
+### 1.1. Models
 
-To make it easy for you to get started with GitLab, here's a list of recommended next steps.
+The models can be divided into three main categories based on the model's overall architecture namely Shallow models (PoincarÃ©), Convolutional-based models (HGCN & HGNN), and Autoencoder-based models (HGCAE & PVAE).
 
-Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
+| Name     | Year     | Encoder  | Decoder | Manifold                  | Ref   |
+|----------|----------|----------|---------|---------------------------|-------|
+| PoincarÃ© | 2017     | /        | MLP     | PoincarÃ© Ball             | [1]   |
+| HGNN     | 2019     | HGCN     | MLP     | PoincarÃ© Ball, Lorentz    | [2]   |
+| HGCN     | 2019     | HGCN     | MLP     | Lorentz                   | [3]   |
+| P-VAE    | 2019     | GCN      | MLP     | PoincarÃ© Ball             | [4]   |
+| H2H-GCN  | 2021     | HGCN     | MLP     | Lorentz                   | [5]   |
+| HGCAE    | 2021     | HGCN     | HGCN    | PoincarÃ© Ball             | [6]   |
 
-## Add your files
+In this library, we provide a variety of binary classifiers, clustering algorithms, and unsupervised anomaly detection algorithms to use with the autoencoder-based models (HGCAE & PVAE). All of these are [Scikit-learn](https://scikit-learn.org/) models tuned using the Grid-Search technique.
 
-- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
-- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
+| Name                                        | Type                        |
+|---------------------------------------------|-----------------------------|
+| Support Vector Machine (SVM)                | Binary Classifier           |
+| Multilayer Perceptrone (MLP)                | Binary Classifier           |
+| Decision Tree                               | Binary Classifier           |
+| Random Forest                               | Binary Classifier           |
+| AdaBoost                                    | Binary Classifier           |
+| K-Nearest Neighbors (KNN)                   | Binary Classifier           |
+| Naive Bayes                                 | Binary Classifier           |
+| Agglomerative Hierarchical Clustering (AHC) | Clustering Algorithm        |
+| DBSCAN                                      | Clustering Algorithm        |
+| Fuzzy C mean                                | Clustering Algorithm        |
+| Gaussian Mixture                            | Clustering Algorithm        |
+| K-means                                     | Clustering Algorithm        |
+| Mean shift                                  | Clustering Algorithm        |
+| Isolation Forest                            | Anomaly Detection Algorithm |
+| One-class SVM                               | Anomaly Detection Algorithm |
 
-```
-cd existing_repo
-git remote add origin https://gitlab.liris.cnrs.fr/gladis/ghypeddings.git
-git branch -M main
-git push -uf origin main
-```
+### 1.2. Datasets
 
-## Integrate with your tools
+The following intrusion detection datasets were used to test and evaluate the models. Our code includes all the pre-processing steps required to convert these datasets from tabular format into graphs. Due to usage restrictions, this library provides only a single graph of each dataset, with 5,000 nodes, already pre-processed and normalized.
 
-- [ ] [Set up project integrations](https://gitlab.liris.cnrs.fr/gladis/ghypeddings/-/settings/integrations)
+| Name            | Ref   |
+|-----------------|-------|
+| CIC-DDoS2019    | [7]   |
+| AWID3           |       |
 
-## Collaborate with your team
 
-- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
-- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
-- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
-- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
-- [ ] [Set auto-merge](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
 
-## Test and Deploy
+## 2. Installation
 
-Use the built-in continuous integration in GitLab.
+## 3. Usage
 
-- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
-- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing (SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
-- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
-- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
-- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
+Training and evaluation a model using our library is done in 03 lines of code only!
 
-***
+### 3.1. Models
 
-# Editing this README
+### 3.2. Datasets
 
-When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thanks to [makeareadme.com](https://www.makeareadme.com/) for this template.
+## 4. Citation
 
-## Suggestions for a good README
+## 5. References
 
-Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
-
-## Name
-Choose a self-explaining name for your project.
-
-## Description
-Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
-
-## Badges
-On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
-
-## Visuals
-Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
-
-## Installation
-Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
-
-## Usage
-Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
-
-## Support
-Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
-
-## Roadmap
-If you have ideas for releases in the future, it is a good idea to list them in the README.
-
-## Contributing
-State if you are open to contributions and what your requirements are for accepting them.
-
-For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
-
-You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
-
-## Authors and acknowledgment
-Show your appreciation to those who have contributed to the project.
-
-## License
-For open source projects, say how it is licensed.
-
-## Project status
-If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
+[1]: [Nickel, Maximillian, and Douwe Kiela. "PoincarÃ© embeddings for learning hierarchical representations." Advances in neural information processing systems 30 (2017).](https://proceedings.neurips.cc/paper_files/paper/2017/hash/59dfa2df42d9e3d41f5b02bfc32229dd-Abstract.html)
+[2]: [Liu, Qi, Maximilian Nickel, and Douwe Kiela. "Hyperbolic graph neural networks." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper/2019/hash/103303dd56a731e377d01f6a37badae3-Abstract.html)
+[3]: [Chami, Ines, et al. "Hyperbolic graph convolutional neural networks." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper_files/paper/2019/hash/0415740eaa4d9decbc8da001d3fd805f-Abstract.html)
+[4]: [Mathieu, Emile, et al. "Continuous hierarchical representations with poincarÃ© variational auto-encoders." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper/2019/hash/0ec04cb3912c4f08874dd03716f80df1-Abstract.html)
+[5]: [Dai, Jindou, et al. "A hyperbolic-to-hyperbolic graph convolutional network." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.](https://www.computer.org/csdl/proceedings-article/cvpr/2021/450900a154/1yeJgfbgw6Y)
+[6]: [Park, Jiwoong, et al. "Unsupervised hyperbolic representation learning via message passing auto-encoders." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2021.](https://ieeexplore.ieee.org/document/9577649)
+[7]: [CIC-DDoS2019](https://www.unb.ca/cic/datasets/ddos-2019.html)
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..37936737e7ef84fa8bc1299b9dbd2053d56c48b3
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,14 @@
+from Ghypeddings.H2HGCN.h2hgcn import H2HGCN
+from Ghypeddings.HGCAE.hgcae import HGCAE
+from Ghypeddings.HGCN.hgcn import HGCN
+from Ghypeddings.HGNN.hgnn import HGNN
+from Ghypeddings.Poincare.poincare import POINCARE
+from Ghypeddings.PVAE.pvae import PVAE
+
+from Ghypeddings.datasets.datasets import CIC_DDoS2019
+from Ghypeddings.datasets.datasets import NF_CIC_IDS2018_v2
+from Ghypeddings.datasets.datasets import NF_UNSW_NB15_v2
+from Ghypeddings.datasets.datasets import Darknet
+from Ghypeddings.datasets.datasets import AWID3
+from Ghypeddings.datasets.datasets import NF_TON_IoT_v2
+from Ghypeddings.datasets.datasets import NF_BOT_IoT_v2
\ No newline at end of file
diff --git a/anomaly_detection/__init__.py b/anomaly_detection/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..41092b1e9bc9d3fa72849637f8a78dcda3a06fcf
--- /dev/null
+++ b/anomaly_detection/__init__.py
@@ -0,0 +1,5 @@
+from Ghypeddings.anomaly_detection.isolation_forest import isolation_forest
+from Ghypeddings.anomaly_detection.one_class_svm import one_class_svm
+from Ghypeddings.anomaly_detection.dbscan import dbscan
+from Ghypeddings.anomaly_detection.kmeans import kmeans
+from Ghypeddings.anomaly_detection.local_outlier_factor import local_outlier_factor
\ No newline at end of file
diff --git a/anomaly_detection/dbscan.py b/anomaly_detection/dbscan.py
new file mode 100644
index 0000000000000000000000000000000000000000..00bc3d669316442b5edb4595f65872f49361a755
--- /dev/null
+++ b/anomaly_detection/dbscan.py
@@ -0,0 +1,9 @@
+from sklearn.cluster import DBSCAN
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+
+
+def dbscan(X,y):
+    dbscan = DBSCAN(eps=0.5, min_samples=5)
+    labels = dbscan.fit_predict(X)
+    outliers = labels == -1
+    return calculate_metrics(y,outliers)
diff --git a/anomaly_detection/isolation_forest.py b/anomaly_detection/isolation_forest.py
new file mode 100644
index 0000000000000000000000000000000000000000..52ea90463b1026ac8d482f240f9bb5b4a64219d4
--- /dev/null
+++ b/anomaly_detection/isolation_forest.py
@@ -0,0 +1,12 @@
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+
+
+from sklearn.ensemble import IsolationForest
+
+def isolation_forest(X,y,anomalies_percentage = 0.1):
+    model = IsolationForest(contamination=anomalies_percentage)
+    model.fit(X)
+    y_pred = model.predict(X)
+    y_pred[y_pred == 1] = 0
+    y_pred[y_pred == -1]= 1
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/anomaly_detection/kmeans.py b/anomaly_detection/kmeans.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5fbfc8343bda122043d33743def26ba18dfcd5c
--- /dev/null
+++ b/anomaly_detection/kmeans.py
@@ -0,0 +1,12 @@
+from sklearn.cluster import KMeans
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+import numpy as np
+
+def kmeans(X,y,n_clusters,outlier_percentage=.1):
+    model = KMeans(n_clusters=n_clusters)
+    model.fit(X)
+    # y_pred = model.predict(X)
+    distances = model.transform(X).min(axis=1)
+    threshold = np.percentile(distances, 100 * (1 - outlier_percentage))
+    outliers = distances > threshold
+    return calculate_metrics(y,outliers)
\ No newline at end of file
diff --git a/anomaly_detection/local_outlier_factor.py b/anomaly_detection/local_outlier_factor.py
new file mode 100644
index 0000000000000000000000000000000000000000..36caa7022fafb9f826a7e3200d0b637fb9cf7679
--- /dev/null
+++ b/anomaly_detection/local_outlier_factor.py
@@ -0,0 +1,10 @@
+from sklearn.neighbors import LocalOutlierFactor
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+import numpy as np
+
+def local_outlier_factor(X,y,n_neighbors=20,outlier_percentage=.1):
+    lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=outlier_percentage)
+    y_pred = lof.fit_predict(X)
+    y_pred[y_pred == 1] = 0
+    y_pred[y_pred == -1] = 1
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/anomaly_detection/one_class_svm.py b/anomaly_detection/one_class_svm.py
new file mode 100644
index 0000000000000000000000000000000000000000..c383e8d0e2fa979a6fd8b1aff173ba75b62de572
--- /dev/null
+++ b/anomaly_detection/one_class_svm.py
@@ -0,0 +1,11 @@
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+
+
+from sklearn.svm import OneClassSVM
+
+def one_class_svm(X,y, kernel='rbf',nu=0.1):
+    model = OneClassSVM(kernel=kernel, nu=nu)
+    model.fit(X)
+    y_pred = model.predict(X)
+    y_pred[y_pred == -1]=0
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/anomaly_detection/utils.py b/anomaly_detection/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfb39f3c0065e4a7cb253e3d3b3af23da024a88d
--- /dev/null
+++ b/anomaly_detection/utils.py
@@ -0,0 +1,22 @@
+## external evaluation metrics
+from sklearn.metrics import adjusted_rand_score
+from sklearn.metrics import normalized_mutual_info_score
+from sklearn.metrics import fowlkes_mallows_score
+## additional evaluation metrics
+from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score
+## classification metrics
+from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
+
+def calculate_metrics(y_true,y_pred):
+    ari = adjusted_rand_score(y_true, y_pred)
+    nmi = normalized_mutual_info_score(y_true, y_pred)
+    fmi = fowlkes_mallows_score(y_true, y_pred)
+    homogeneity = homogeneity_score(y_true, y_pred)
+    completeness = completeness_score(y_true, y_pred)
+    v_measure = v_measure_score(y_true, y_pred)
+    acc = accuracy_score(y_true,y_pred)
+    f1 = f1_score(y_true,y_pred)
+    rec = recall_score(y_true,y_pred)
+    pre = precision_score(y_true,y_pred)
+    roc = roc_auc_score(y_true,y_pred)
+    return ari,nmi,fmi,homogeneity,completeness,v_measure,acc,f1,rec,pre,roc
\ No newline at end of file
diff --git a/classifiers/__init__.py b/classifiers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd5dc32496db620dbefc3f08e564b1747b1ced0b
--- /dev/null
+++ b/classifiers/__init__.py
@@ -0,0 +1,19 @@
+from Ghypeddings.classifiers.svm import SVM
+from Ghypeddings.classifiers.mlp import mlp
+from Ghypeddings.classifiers.decision_tree import decision_tree
+from Ghypeddings.classifiers.random_forest import random_forest
+from Ghypeddings.classifiers.adaboost import adaboost
+from Ghypeddings.classifiers.knn import KNN
+from Ghypeddings.classifiers.naive_bayes import naive_bayes
+
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+
+def calculate_metrics(clf,X,y):
+    y_pred = clf.predict(X)
+    accuracy = accuracy_score(y, y_pred)
+    f1 = f1_score(y, y_pred)
+    recall = recall_score(y, y_pred)
+    precision = precision_score(y, y_pred)
+    roc_auc = roc_auc_score(y, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
diff --git a/classifiers/adaboost.py b/classifiers/adaboost.py
new file mode 100644
index 0000000000000000000000000000000000000000..35c469fecdf7f5131d74a11a95a60c9e15a97bab
--- /dev/null
+++ b/classifiers/adaboost.py
@@ -0,0 +1,5 @@
+from sklearn.ensemble import AdaBoostClassifier
+
+def adaboost(X,y,seed,n_estimators=2):
+    ada_boost = AdaBoostClassifier(n_estimators=n_estimators, random_state=seed)
+    return ada_boost.fit(X, y)
\ No newline at end of file
diff --git a/classifiers/decision_tree.py b/classifiers/decision_tree.py
new file mode 100644
index 0000000000000000000000000000000000000000..c59108307d032307075bd02cb460ee3c1b7bac60
--- /dev/null
+++ b/classifiers/decision_tree.py
@@ -0,0 +1,5 @@
+from sklearn.tree import DecisionTreeClassifier
+
+def decision_tree(X,y,max_depth=2):
+    clf = DecisionTreeClassifier(max_depth=max_depth)
+    return clf.fit(X, y)
\ No newline at end of file
diff --git a/classifiers/knn.py b/classifiers/knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..70823c050a50d7ee03fe55a7996884e6d1dcbee7
--- /dev/null
+++ b/classifiers/knn.py
@@ -0,0 +1,5 @@
+from sklearn.neighbors import KNeighborsClassifier
+
+def KNN(X,y,k=5):
+    knn = KNeighborsClassifier(n_neighbors=k)
+    return knn.fit(X, y)
\ No newline at end of file
diff --git a/classifiers/mlp.py b/classifiers/mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d76b43b2e60e163950aa876ba153b4335c52fe5
--- /dev/null
+++ b/classifiers/mlp.py
@@ -0,0 +1,7 @@
+from sklearn.neural_network import MLPClassifier
+import time
+import numpy as np
+
+def mlp(X,y,n_hidden_layers,hidden_dim,epochs=50,batch_size=64,seed=42):
+    mlp = MLPClassifier(hidden_layer_sizes=(n_hidden_layers, hidden_dim),learning_rate='adaptive',batch_size=batch_size ,activation='identity', solver='lbfgs', max_iter=epochs, random_state=seed)
+    return mlp.fit(X, y)
\ No newline at end of file
diff --git a/classifiers/naive_bayes.py b/classifiers/naive_bayes.py
new file mode 100644
index 0000000000000000000000000000000000000000..62d6d9b575a3a72eea74687e4e38bbb8c10ed680
--- /dev/null
+++ b/classifiers/naive_bayes.py
@@ -0,0 +1,5 @@
+from sklearn.naive_bayes import GaussianNB
+
+def naive_bayes(X,y):
+    clf = GaussianNB()
+    return clf.fit(X, y)
\ No newline at end of file
diff --git a/classifiers/random_forest.py b/classifiers/random_forest.py
new file mode 100644
index 0000000000000000000000000000000000000000..24c10c46fe8c1c0d507f0cf621e90c0f642481ae
--- /dev/null
+++ b/classifiers/random_forest.py
@@ -0,0 +1,5 @@
+from sklearn.ensemble import RandomForestClassifier
+
+def random_forest(X,y,seed,n_estimators=10,max_depth=10,max_features='log2'):
+    clf = RandomForestClassifier(max_features=max_features,n_estimators=n_estimators, max_depth=max_depth, random_state=seed)
+    return clf.fit(X, y)
\ No newline at end of file
diff --git a/classifiers/svm.py b/classifiers/svm.py
new file mode 100644
index 0000000000000000000000000000000000000000..a48c0a52af8cb9eb3500f79ce16c9c204bd7f044
--- /dev/null
+++ b/classifiers/svm.py
@@ -0,0 +1,6 @@
+from sklearn import svm
+
+
+def SVM(X,y,kernel='rbf',gamma='scale',C=1):    
+    cls = svm.SVC(kernel=kernel, gamma=gamma, C=C)
+    return cls.fit(X, y)
\ No newline at end of file
diff --git a/clusterers/__init__.py b/clusterers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bb80fb0047624b6c0c404d8060daca7352dcaad
--- /dev/null
+++ b/clusterers/__init__.py
@@ -0,0 +1,6 @@
+from Ghypeddings.clusterers.ahc import agglomerative_clustering
+from Ghypeddings.clusterers.dbscan import dbscan
+from Ghypeddings.clusterers.fuzzy_c_mean import fuzzy_c_mean
+from Ghypeddings.clusterers.gaussian_mixture import gaussian_mixture
+from Ghypeddings.clusterers.kmeans import kmeans
+from Ghypeddings.clusterers.mean_shift import mean_shift
\ No newline at end of file
diff --git a/clusterers/ahc.py b/clusterers/ahc.py
new file mode 100644
index 0000000000000000000000000000000000000000..aee3bfd5933a498cb1ba8a3decbe687dd1aea4df
--- /dev/null
+++ b/clusterers/ahc.py
@@ -0,0 +1,7 @@
+from sklearn.cluster import AgglomerativeClustering
+from Ghypeddings.clusterers.utils import calculate_metrics
+
+def agglomerative_clustering(X,y,n_clusters =2, linkage = 'ward'):
+    model = AgglomerativeClustering(n_clusters=n_clusters,linkage=linkage)
+    labels = model.fit_predict(X)
+    return calculate_metrics(y,labels)
\ No newline at end of file
diff --git a/clusterers/dbscan.py b/clusterers/dbscan.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b17e5594b7b28d447f932ffc395d540405890ce
--- /dev/null
+++ b/clusterers/dbscan.py
@@ -0,0 +1,13 @@
+from Ghypeddings.clusterers.utils import calculate_metrics
+from sklearn.cluster import DBSCAN
+
+def dbscan(X,y,eps=1e-4,min_samples=300):
+    model = DBSCAN(eps=eps, min_samples=min_samples)
+    y_pred = model.fit_predict(X)
+    mask = y_pred != -1
+    y_true_filtered = y[mask]
+    y_pred_filtered = y_pred[mask]
+    y_pred_filtered[y_pred_filtered>0] = -1
+    y_pred_filtered[y_pred_filtered == 0] = 1
+    y_pred_filtered[y_pred_filtered == -1]=0
+    return calculate_metrics(y_true_filtered,y_pred_filtered)
\ No newline at end of file
diff --git a/clusterers/fuzzy_c_mean.py b/clusterers/fuzzy_c_mean.py
new file mode 100644
index 0000000000000000000000000000000000000000..af934eead709269782adb39722cf6924de0bc768
--- /dev/null
+++ b/clusterers/fuzzy_c_mean.py
@@ -0,0 +1,9 @@
+from Ghypeddings.clusterers.utils import calculate_metrics
+import skfuzzy as fuzz
+import numpy as np
+
+def fuzzy_c_mean(X,y,n_clusters=5,power=2,error=0.005,maxiter=1000,init=None):
+    X_transposed = np.transpose(X)
+    cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(X_transposed, n_clusters, power, error=error, maxiter=maxiter, init=init)
+    y_pred = np.argmax(u, axis=0)
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/clusterers/gaussian_mixture.py b/clusterers/gaussian_mixture.py
new file mode 100644
index 0000000000000000000000000000000000000000..3405e019bb8b4ab64ca406bb62fb3de7c5e83e40
--- /dev/null
+++ b/clusterers/gaussian_mixture.py
@@ -0,0 +1,7 @@
+from sklearn.mixture import GaussianMixture
+from Ghypeddings.clusterers.utils import calculate_metrics
+
+def gaussian_mixture(X,y,n_components=2):
+    model = GaussianMixture(n_components=2)
+    y_pred = model.fit_predict(X)
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/clusterers/kmeans.py b/clusterers/kmeans.py
new file mode 100644
index 0000000000000000000000000000000000000000..848fef469ae29d55cdaf097e4a8df8057f89e2d4
--- /dev/null
+++ b/clusterers/kmeans.py
@@ -0,0 +1,11 @@
+from Ghypeddings.clusterers.utils import calculate_metrics
+
+from sklearn.cluster import KMeans
+
+
+def kmeans(X,y,n_clusters=2,n_init=10):
+    model = KMeans(n_clusters=n_clusters,n_init=n_init)
+    model.fit(X)
+    y_pred = model.labels_
+    y_pred[y_pred!=1]=0
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/clusterers/mean_shift.py b/clusterers/mean_shift.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba987548bf18300b323217b0f3e06df97188c92c
--- /dev/null
+++ b/clusterers/mean_shift.py
@@ -0,0 +1,10 @@
+from Ghypeddings.clusterers.utils import calculate_metrics
+
+from sklearn.cluster import MeanShift
+
+def mean_shift(X,y):
+    y_pred = MeanShift().fit_predict(X)
+    y_pred[y_pred>0] = -1
+    y_pred[y_pred == 0] = 1
+    y_pred[y_pred == -1]=0
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/clusterers/utils.py b/clusterers/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfb39f3c0065e4a7cb253e3d3b3af23da024a88d
--- /dev/null
+++ b/clusterers/utils.py
@@ -0,0 +1,22 @@
+## external evaluation metrics
+from sklearn.metrics import adjusted_rand_score
+from sklearn.metrics import normalized_mutual_info_score
+from sklearn.metrics import fowlkes_mallows_score
+## additional evaluation metrics
+from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score
+## classification metrics
+from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
+
+def calculate_metrics(y_true,y_pred):
+    ari = adjusted_rand_score(y_true, y_pred)
+    nmi = normalized_mutual_info_score(y_true, y_pred)
+    fmi = fowlkes_mallows_score(y_true, y_pred)
+    homogeneity = homogeneity_score(y_true, y_pred)
+    completeness = completeness_score(y_true, y_pred)
+    v_measure = v_measure_score(y_true, y_pred)
+    acc = accuracy_score(y_true,y_pred)
+    f1 = f1_score(y_true,y_pred)
+    rec = recall_score(y_true,y_pred)
+    pre = precision_score(y_true,y_pred)
+    roc = roc_auc_score(y_true,y_pred)
+    return ari,nmi,fmi,homogeneity,completeness,v_measure,acc,f1,rec,pre,roc
\ No newline at end of file
diff --git a/datasets/.gitignore b/datasets/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..d22b9a22608a3bacbc730c3ad7c080f98c9ead54
--- /dev/null
+++ b/datasets/.gitignore
@@ -0,0 +1,3 @@
+outlier_datasets.py
+
+repetition_datasets.py
\ No newline at end of file
diff --git a/datasets/__init__.py b/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/datasets/datasets.py b/datasets/datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..71250482a063d6fbdf02672d7ce837170fc2864e
--- /dev/null
+++ b/datasets/datasets.py
@@ -0,0 +1,310 @@
+import os
+
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.preprocessing import StandardScaler
+import pickle
+from sklearn.preprocessing import LabelEncoder
+import time
+import datetime
+import progressbar
+import category_encoders as ce
+
+class Dataset:
+    def __init__(self,features_path='',adj_path='',labels_path='',directory=''):
+        self.features_path = features_path
+        self.adj_path = adj_path
+        self.labels_path = labels_path
+        self.directory = directory
+
+    def _get_files(self):
+        return [os.path.join(self.directory,file) for file in os.listdir(self.directory) if os.path.isfile(os.path.join(self.directory, file)) and '.gitignore' not in file]
+
+    def save_samples(self,adj,features,labels):
+        with open(self.adj_path,'wb') as f:
+            pickle.dump(adj,f)
+        print('The adjacency matrix is saved in',self.adj_path)
+        with open(self.features_path,'wb') as f:
+            pickle.dump(features,f)
+        print('The node features matrix is saved in',self.features_path)
+        with open(self.labels_path,'wb') as f:
+            pickle.dump(labels,f)
+        print('The labels are saved in ',self.labels_path)
+
+    def load_samples(self):
+        with open(self.adj_path,'rb') as f:
+            adj = pickle.load(f)
+        print('The adjacency matrix has been loaded successfully')
+        with open(self.features_path,'rb') as f:
+            features = pickle.load(f)
+        print('The node features matrix has been loaded successfully')
+        with open(self.labels_path,'rb') as f:
+            labels = pickle.load(f)
+        print('The labels have been loaded successfully')
+        print('features shape:',features.shape,'adj shape',adj.shape,'labels shape',labels.shape)
+        return adj,features,labels
+
+class CIC_DDoS2019(Dataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','labels.pkl'),
+            directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','original')
+        )
+
+    def build(self,n_nodes,n_classes=2):
+        df = self._create_file_bc(n_nodes,n_classes)
+        for column in df.columns:
+            max_value = df.loc[df[column] != np.inf, column].max()
+            min_value = df.loc[df[column] != -np.inf, column].min()
+            df.loc[df[column] == np.inf, column] = max_value
+            df.loc[df[column] == -np.inf, column] = min_value
+        adj = self._filling_adjacency_numpy(df)
+        labels = df[' Label'].apply(lambda x: 0 if x == 'BENIGN' else 1).to_numpy()
+        columns_to_exclude = ['Unnamed: 0', 'Flow ID', ' Source IP',' Source Port',' Destination Port',' Flow Duration',' Protocol', ' Destination IP', ' Timestamp', 'SimillarHTTP',' Inbound',' Label']
+        df.drop(columns_to_exclude, axis=1, inplace=True)
+        features = df.to_numpy()
+        scaler = MinMaxScaler()
+        features = scaler.fit_transform(features)
+        self.save_samples(adj,features,labels)
+        return adj, features, labels
+    
+    def _load_file(self,path,max_per_class,list_classes=[]):
+        df = pd.read_csv(path,low_memory=False)
+        df.dropna(axis=0, inplace=True)
+        if(len(list_classes)):
+            df = df[df[' Label'].isin(list_classes)]
+            df = df.groupby([' Label']).apply(lambda x: x.sample(max_per_class)).reset_index(drop=True)
+        return df
+        
+    def _create_file_bc(self,n_nodes,n_classes):
+        file_paths = self._get_files()
+        max_per_class = int(n_nodes / (n_classes * len(file_paths))) +1
+        df_list = []
+        for path in file_paths:
+            class_name = path.split('\\')[-1].split('.')[0]
+            list_classes = ['BENIGN',class_name]
+            df_list.append(self._load_file(path,max_per_class,list_classes))
+            print('finishing loading the file : {}'.format(path))
+        df = pd.concat(df_list,ignore_index=True)
+        df = df.sample(n=n_nodes).reset_index(drop=True)
+        print(df.shape)
+        # print(df[' Label'].value_counts())
+        # df = pd.read_csv(os.path.join(self.directory,'all.csv'),low_memory=False)
+        # df[' Label'] = df[' Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)
+        # node_per_class = int(n_nodes/n_classes)
+        # df = df.groupby([' Label']).apply(lambda x: x.sample(node_per_class)).reset_index(drop=True)
+        return df
+
+    def _filling_adjacency_numpy(self,data):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+
+        source_ips = data[' Source IP'].to_numpy()
+        destination_ips = data[' Destination IP'].to_numpy()
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips)| (destination_ips[:, np.newaxis] == destination_ips) )
+        adjacency[mask] = True
+        return adjacency
+
+class NetFlowDataset(Dataset):
+    def __init__(self,features_path,adj_path,labels_path,file):
+        super().__init__(features_path,adj_path,labels_path)
+        self.file = file
+
+    def build(self,n_nodes,n_classes=2):
+        df = pd.read_csv(self.file) 
+        df = df.groupby(['Label']).apply(lambda x: x.sample(int(n_nodes/n_classes))).reset_index(drop=True) 
+        df = df.sample(frac=1).reset_index(drop=True)
+        adj = self._filling_adjacency_numpy(df)
+        labels = df['Label'].to_numpy()
+        labels = labels.astype(np.bool_)
+        df.drop(['IPV4_SRC_ADDR','IPV4_DST_ADDR','Attack','Label','L4_SRC_PORT','L4_DST_PORT'],axis=1,inplace=True)
+        #df = pd.get_dummies(df,columns=['PROTOCOL','DNS_QUERY_TYPE','FTP_COMMAND_RET_CODE'])
+
+        encoder = ce.TargetEncoder(cols=['TCP_FLAGS','L7_PROTO','PROTOCOL'])
+        encoder.fit(df,labels)
+        df = encoder.transform(df)
+ 
+        features = df.to_numpy()
+        scaler = MinMaxScaler()
+        features = scaler.fit_transform(features)
+        print("features:",features.shape)
+        self.save_samples(adj,features,labels)
+        return adj,features,labels
+
+    def _filling_adjacency_numpy(self,data):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+
+        if 'bot_iot' in self.file:
+            data['IPV4_SRC_ADDR'] = data['IPV4_SRC_ADDR'].apply(str)
+            data['IPV4_DST_ADDR'] = data['IPV4_DST_ADDR'].apply(str)
+            data['L4_SRC_PORT'] = data['L4_SRC_PORT'].apply(str)
+            data['L4_DST_PORT'] = data['L4_DST_PORT'].apply(str)
+            data['IPV4_SRC_ADDR'] = data['IPV4_SRC_ADDR']+':'+data['L4_SRC_PORT']
+            data['IPV4_DST_ADDR'] = data['IPV4_DST_ADDR']+':'+data['L4_DST_PORT']
+
+        source_ips = data['IPV4_SRC_ADDR'].to_numpy()
+        destination_ips = data['IPV4_DST_ADDR'].to_numpy()
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        return adjacency
+
+class NF_CIC_IDS2018_v2(NetFlowDataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','labels.pkl'),
+            file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','original','cic_ids2018.csv')
+        )   
+
+class NF_UNSW_NB15_v2(NetFlowDataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','UNSW_NB15','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','UNSW_NB15','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','UNSW_NB15','labels.pkl'),
+            file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','UNSW_NB15','original','unsw_nb15.csv')
+        )
+
+class Darknet(Dataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','Darknet','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','Darknet','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','Darknet','labels.pkl')
+        )
+        self.file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','Darknet','original','Darknet.csv')
+
+    def _to_binary_classification(self,x):
+        if 'Non' in x:
+            return 0
+        else:
+            return 1
+
+    def build(self,n_nodes,n_classes=2):
+        print('Starting building a graph of size ...',n_nodes)
+        df = pd.read_csv(self.file)
+        df.dropna(axis=0, inplace=True)
+        df['Label'] = df['Label'].apply(self._to_binary_classification)
+        df = df.groupby(['Label']).apply(lambda x: x.sample(int(n_nodes/n_classes))).reset_index(drop=True)
+        df = df.sample(n=n_nodes).reset_index(drop=True)
+        data = df.to_numpy()
+        print('finishing data preprocessing ...')
+        adj = self._filling_adjacency_numpy(data,1,3)
+        print('building the adjacency matrix ...')
+        labels = df['Label'].to_numpy()
+        columns_to_exclude = ['Flow ID', 'Src IP','Src Port', 'Dst IP','Dst Port', 'Timestamp','Label','Label.1','Protocol','Flow Duration']
+        df.drop(columns_to_exclude, axis=1, inplace=True)
+        features = df.to_numpy()
+        print('saving the graph in the current project ...')
+        self.save_samples(adj,features,labels)
+        print('Building a graph has been successfully finished !')
+        return adj,features,labels
+    
+    def _filling_adjacency_numpy(self,data,source_ip_index, destination_ip_index):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+        source_ips = data[:, source_ip_index]
+        destination_ips = data[:, destination_ip_index]
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        return adjacency
+
+class NF_BOT_IoT_v2(NetFlowDataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','labels.pkl'),
+            file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','original','bot_iot.csv')
+        )
+
+class NF_TON_IoT_v2(NetFlowDataset):
+    def __init__(self):
+        # directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','original'),
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','labels.pkl'),
+            file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','original','ton_iot.csv')
+        )
+
+class AWID3(Dataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','labels.pkl'),
+            directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','AWID3','original')
+        )
+
+    def _config_signal(self,x):
+        words = str(x).split('-')
+        return np.mean([float(i)*-1 for i in words if i!=''])
+    
+    def build(self,n_nodes):
+        path = os.path.join(os.getcwd(),'Ghypeddings','datasets','examples','AWID3','original','awid3.csv')
+        df = pd.read_csv(path)
+        df['Label'] = df['Label'].apply(lambda x: 0 if 'Normal' in x else 1)
+        df = df.groupby(['Label']).apply(lambda x: x.sample(int(n_nodes/2))).reset_index(drop=True)
+        df = df.sample(frac=1).reset_index(drop=True)
+        data=df[['ip.src','ip.dst']]
+        df.dropna(axis=1, inplace=True)
+        to_drop = ['frame.number','frame.time','radiotap.timestamp.ts','frame.time_delta_displayed','frame.time_epoch','frame.time_relative','wlan.duration','wlan.ra']
+        df.drop(columns=to_drop,axis=1,inplace=True)
+        alone = []
+        for c in df.columns:
+            if(len(df[c].unique()) == 1):
+                alone.append(c)
+            elif len(df[c].unique()) == 2:
+                df = pd.get_dummies(df,columns=[c],drop_first=True)
+            elif len(df[c].unique()) <=8:
+                df = pd.get_dummies(df,columns=[c])
+            elif len(df[c].unique()) <=15:
+                labels = df['Label']
+                df.drop(columns=['Label'],axis=1,inplace=True)
+                encoder = ce.TargetEncoder(cols=[c])
+                encoder.fit(df,labels)
+                df = encoder.transform(df)
+                df['Label']=labels
+            else:
+                if(df[c].dtype == 'object' and c!='radiotap.dbm_antsignal'):
+                    print(c,df[c].unique(),len(df[c].unique()))
+        df.drop(columns=alone,axis=1,inplace=True)
+        df['radiotap.dbm_antsignal'] = df['radiotap.dbm_antsignal'].apply(self._config_signal) # It contains a list
+        labels = df['Label_1'].to_numpy()
+        adj = self._filling_adjacency_numpy(data)
+        df.drop(columns=['frame.time_delta','Label_1'],axis=1,inplace=True)
+        features = df.to_numpy()
+        scaler = StandardScaler()
+        features = scaler.fit_transform(features)
+        # scaler = MinMaxScaler()
+        # features = scaler.fit_transform(features)
+        self.save_samples(adj=adj,features=features,labels=labels)
+        return adj,features,labels
+    
+    def _filling_adjacency_numpy(self,data):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+        source_ips = data['ip.src'].to_numpy()
+        destination_ips = data['ip.dst'].to_numpy()
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips) )
+        adjacency[mask] = True
+        np.fill_diagonal(adjacency, True)
+        return adjacency
\ No newline at end of file
diff --git a/datasets/examples/.gitignore b/datasets/examples/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..09677a39bd6efc31c50b089e0aaa46c95f30a293
--- /dev/null
+++ b/datasets/examples/.gitignore
@@ -0,0 +1 @@
+outlier/
\ No newline at end of file
diff --git a/datasets/utils.py b/datasets/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a65a154f3cda8bc37e70344a727c1e8c690b6904
--- /dev/null
+++ b/datasets/utils.py
@@ -0,0 +1,32 @@
+import os
+import pickle as pkl
+import sys
+import time
+import scipy.sparse as sp
+import networkx as nx
+import numpy as np
+from tqdm import tqdm
+
+def hyperbolicity(adj, num_samples):
+    curr_time = time.time()
+    hyps = []
+    G = nx.from_numpy_array(adj)
+    for _ in tqdm(range(num_samples)):
+        node_tuple = np.random.choice(G.nodes(), 4, replace=False)
+        s = []
+        try:
+            d01 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[1], weight=None)
+            d23 = nx.shortest_path_length(G, source=node_tuple[2], target=node_tuple[3], weight=None)
+            d02 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[2], weight=None)
+            d13 = nx.shortest_path_length(G, source=node_tuple[1], target=node_tuple[3], weight=None)
+            d03 = nx.shortest_path_length(G, source=node_tuple[0], target=node_tuple[3], weight=None)
+            d12 = nx.shortest_path_length(G, source=node_tuple[1], target=node_tuple[2], weight=None)
+            s.append(d01 + d23)
+            s.append(d02 + d13)
+            s.append(d03 + d12)
+            s.sort()
+            hyps.append((s[-1] - s[-2]) / 2)
+        except Exception as e:
+            continue
+    print('Time for hyp: ', time.time() - curr_time , 'hyp:', max(hyps))
+    return max(hyps)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11e5ded51ddc2ffb101c513fca2f39b932241db7
Binary files /dev/null and b/requirements.txt differ