final push

7725415a · yacinetouahria · 7725415a · 7725415a · 7725415a · 7725415a
Commit 7725415a authored 10 months ago by yacinetouahria
--- a/Poincare/models/encoders.py
+++ b/Poincare/models/encoders.py
+"""Graph encoders."""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import Ghypeddings.Poincare.manifolds as manifolds
+
+class Encoder(nn.Module):
+    """
+    Encoder abstract class.
+    """
+
+    def __init__(self, c):
+        super(Encoder, self).__init__()
+        self.c = c
+
+    def encode(self, x):
+        pass
+
+class Shallow(Encoder):
+    """
+    Shallow Embedding method.
+    Learns embeddings or loads pretrained embeddings and uses an MLP for classification.
+    """
+
+    def __init__(self, c, args):
+        super(Shallow, self).__init__(c)
+        self.manifold = getattr(manifolds, 'PoincareBall')()
+        weights = torch.Tensor(args.n_nodes, args.dim)
+        weights = self.manifold.init_weights(weights, self.c)
+        trainable = True
+        self.lt = manifolds.ManifoldParameter(weights, trainable, self.manifold, self.c)
+        self.all_nodes = torch.LongTensor(list(range(args.n_nodes)))
+        layers = []
+        self.layers = nn.Sequential(*layers)
+
+    def encode(self, x):
+        h = self.lt[self.all_nodes, :]
+        h = torch.cat((h, x), 1)
+        return h
--- a/Poincare/optimizers/__init__.py
+++ b/Poincare/optimizers/__init__.py
+from torch.optim import Adam
+from Ghypeddings.Poincare.optimizers.radam import RiemannianAdam
--- a/Poincare/optimizers/radam.py
+++ b/Poincare/optimizers/radam.py
+"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/)."""
+import torch.optim
+from Ghypeddings.Poincare.manifolds import Euclidean, ManifoldParameter
+
+_default_manifold = Euclidean()
+
+
+class OptimMixin(object):
+    def __init__(self, *args, stabilize=None, **kwargs):
+        self._stabilize = stabilize
+        super().__init__(*args, **kwargs)
+
+    def stabilize_group(self, group):
+        pass
+
+    def stabilize(self):
+        """Stabilize parameters if they are off-manifold due to numerical reasons
+        """
+        for group in self.param_groups:
+            self.stabilize_group(group)
+
+
+def copy_or_set_(dest, source):
+    """
+    A workaround to respect strides of :code:`dest` when copying :code:`source`
+    (https://github.com/geoopt/geoopt/issues/70)
+    Parameters
+    ----------
+    dest : torch.Tensor
+        Destination tensor where to store new data
+    source : torch.Tensor
+        Source data to put in the new tensor
+    Returns
+    -------
+    dest
+        torch.Tensor, modified inplace
+    """
+    if dest.stride() != source.stride():
+        return dest.copy_(source)
+    else:
+        return dest.set_(source)
+
+
+class RiemannianAdam(OptimMixin, torch.optim.Adam):
+    r"""Riemannian Adam with the same API as :class:`torch.optim.Adam`
+    Parameters
+    ----------
+    params : iterable
+        iterable of parameters to optimize or dicts defining
+        parameter groups
+    lr : float (optional)
+        learning rate (default: 1e-3)
+    betas : Tuple[float, float] (optional)
+        coefficients used for computing
+        running averages of gradient and its square (default: (0.9, 0.999))
+    eps : float (optional)
+        term added to the denominator to improve
+        numerical stability (default: 1e-8)
+    weight_decay : float (optional)
+        weight decay (L2 penalty) (default: 0)
+    amsgrad : bool (optional)
+        whether to use the AMSGrad variant of this
+        algorithm from the paper `On the Convergence of Adam and Beyond`_
+        (default: False)
+    Other Parameters
+    ----------------
+    stabilize : int
+        Stabilize parameters if they are off-manifold due to numerical
+        reasons every ``stabilize`` steps (default: ``None`` -- no stabilize)
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments
+        ---------
+        closure : callable (optional)
+            A closure that reevaluates the model
+            and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        with torch.no_grad():
+            for group in self.param_groups:
+                if "step" not in group:
+                    group["step"] = 0
+                betas = group["betas"]
+                weight_decay = group["weight_decay"]
+                eps = group["eps"]
+                learning_rate = group["lr"]
+                amsgrad = group["amsgrad"]
+                for point in group["params"]:
+                    grad = point.grad
+                    if grad is None:
+                        continue
+                    if isinstance(point, (ManifoldParameter)):
+                        manifold = point.manifold
+                        c = point.c
+                    else:
+                        manifold = _default_manifold
+                        c = None
+                    if grad.is_sparse:
+                        raise RuntimeError(
+                                "Riemannian Adam does not support sparse gradients yet (PR is welcome)"
+                        )
+
+                    state = self.state[point]
+
+                    # State initialization
+                    if len(state) == 0:
+                        state["step"] = 0
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(point)
+                        # Exponential moving average of squared gradient values
+                        state["exp_avg_sq"] = torch.zeros_like(point)
+                        if amsgrad:
+                            # Maintains max of all exp. moving avg. of sq. grad. values
+                            state["max_exp_avg_sq"] = torch.zeros_like(point)
+                    # make local variables for easy access
+                    exp_avg = state["exp_avg"]
+                    exp_avg_sq = state["exp_avg_sq"]
+                    # actual step
+                    grad.add_(weight_decay, point)
+                    grad = manifold.egrad2rgrad(point, grad, c)
+                    exp_avg.mul_(betas[0]).add_(1 - betas[0], grad)
+                    exp_avg_sq.mul_(betas[1]).add_(
+                            1 - betas[1], manifold.inner(point, c, grad, keepdim=True)
+                    )
+                    if amsgrad:
+                        max_exp_avg_sq = state["max_exp_avg_sq"]
+                        # Maintains the maximum of all 2nd moment running avg. till now
+                        torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                        # Use the max. for normalizing running avg. of gradient
+                        denom = max_exp_avg_sq.sqrt().add_(eps)
+                    else:
+                        denom = exp_avg_sq.sqrt().add_(eps)
+                    group["step"] += 1
+                    bias_correction1 = 1 - betas[0] ** group["step"]
+                    bias_correction2 = 1 - betas[1] ** group["step"]
+                    step_size = (
+                        learning_rate * bias_correction2 ** 0.5 / bias_correction1
+                    )
+                    # copy the state, we need it for retraction
+                    # get the direction for ascend
+                    direction = exp_avg / denom
+                    # transport the exponential averaging to the new point
+                    new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c)
+                    exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c)
+                    # use copy only for user facing point
+                    copy_or_set_(point, new_point)
+                    exp_avg.set_(exp_avg_new)
+
+                    group["step"] += 1
+                if self._stabilize is not None and group["step"] % self._stabilize == 0:
+                    self.stabilize_group(group)
+        return loss
+
+    @torch.no_grad()
+    def stabilize_group(self, group):
+        for p in group["params"]:
+            if not isinstance(p, ManifoldParameter):
+                continue
+            state = self.state[p]
+            if not state:  # due to None grads
+                continue
+            manifold = p.manifold
+            c = p.c
+            exp_avg = state["exp_avg"]
+            copy_or_set_(p, manifold.proj(p, c))
+            exp_avg.set_(manifold.proj_tan(exp_avg, u, c))
--- a/Poincare/poincare.py
+++ b/Poincare/poincare.py
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import time
+
+import numpy as np
+import Ghypeddings.Poincare.optimizers as optimizers
+import torch
+from Ghypeddings.Poincare.models.base_models import NCModel
+from Ghypeddings.Poincare.utils.data_utils import process_data
+from Ghypeddings.Poincare.utils.train_utils import format_metrics, create_args
+
+
+class POINCARE:
+    def __init__(self,
+                adj,
+                features,
+                labels,
+                dim,
+                grad_clip=None,
+                weight_decay=0.01,
+                lr=0.1,
+                gamma=0.5,
+                lr_reduce_freq=500,
+                cuda=0,
+                epochs=50,
+                min_epochs=50,
+                patience=None,
+                seed=42,
+                log_freq=1,
+                eval_freq=1,
+                val_prop=0.15,
+                test_prop=0.15,
+                double_precision=0,
+                dropout=0.01,
+                normalize_adj=False,
+                normalize_feats=True):
+        
+        self.args = create_args(dim,grad_clip,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
+        self.args.n_nodes = adj.shape[0]
+        self.args.feat_dim = features.shape[1]
+        self.args.n_classes = len(np.unique(labels))
+        self.data = process_data(self.args,adj,features,labels)
+
+        np.random.seed(self.args.seed)
+        torch.manual_seed(self.args.seed)
+        if int(self.args.double_precision):
+            torch.set_default_dtype(torch.float64)
+        if int(self.args.cuda) >= 0:
+            torch.cuda.manual_seed(self.args.seed)
+        self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
+        self.args.patience = self.args.epochs if not self.args.patience else  int(self.args.patience)
+        if not self.args.lr_reduce_freq:
+            self.args.lr_reduce_freq = self.args.epochs
+        self.model = NCModel(self.args)
+        self.optimizer = getattr(optimizers, 'RiemannianAdam')(params=self.model.parameters(), lr=self.args.lr,
+                                                        weight_decay=self.args.weight_decay)
+        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
+            self.optimizer,
+            step_size=int(self.args.lr_reduce_freq),
+            gamma=float(self.args.gamma)
+        )
+
+        if self.args.cuda is not None and int(self.args.cuda) >= 0 :
+            os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
+            self.model = self.model.to(self.args.device)
+            for x, val in self.data.items():
+                if torch.is_tensor(self.data[x]):
+                    self.data[x] = self.data[x].to(self.args.device)
+        self.best_emb = None
+
+
+    def fit(self):
+
+        logging.getLogger().setLevel(logging.INFO)
+        logging.info(str(self.model))
+        tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
+        logging.info(f"Total number of parameters: {tot_params}")
+
+        t_total = time.time()
+        counter = 0
+        best_val_metrics = self.model.init_metric_dict()
+
+        best_losses = []
+        train_losses = []
+        val_losses = []
+        for epoch in range(self.args.epochs):
+            t = time.time()
+            self.model.train()
+            self.optimizer.zero_grad()
+            embeddings = self.model.encode(self.data['features'])
+            assert not torch.isnan(embeddings).any()
+            train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
+            train_metrics['loss'].backward()
+            if self.args.grad_clip is not None:
+                max_norm = float(self.args.grad_clip)
+                all_params = list(self.model.parameters())
+                for param in all_params:
+                    torch.nn.utils.clip_grad_norm_(param, max_norm)
+            self.optimizer.step()
+            self.lr_scheduler.step()
+
+            train_losses.append(train_metrics['loss'].item())
+            if(len(best_losses) == 0):
+                best_losses.append(train_losses[0])
+            elif (best_losses[-1] > train_losses[-1]):
+                best_losses.append(train_losses[-1])
+            else:
+                best_losses.append(best_losses[-1])
+
+
+            if (epoch + 1) % self.args.log_freq == 0:
+                logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
+                                    'lr: {}'.format(self.lr_scheduler.get_lr()[0]),
+                                    format_metrics(train_metrics, 'train'),
+                                    'time: {:.4f}s'.format(time.time() - t)
+                                    ]))
+            if (epoch + 1) % self.args.eval_freq == 0:
+                self.model.eval()
+                embeddings = self.model.encode(self.data['features'])
+                val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                val_losses.append(val_metrics['loss'].item())
+                if (epoch + 1) % self.args.log_freq == 0:
+                    logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                    
+                if self.model.has_improved(best_val_metrics, val_metrics):
+                    self.best_emb = embeddings
+                    best_val_metrics = val_metrics
+                    counter = 0
+                else:
+                    counter += 1
+                    if counter == self.args.patience and epoch > self.args.min_epochs:
+                        logging.info("Early stopping")
+                        break
+
+        logging.info("Training Finished!")
+        logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
+
+        return {'train':train_losses,'best':best_losses,'val':val_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
+    
+    def predict(self):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'])
+        val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
+        return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
+
+    def save_embeddings(self):
+        tb_embeddings_euc = self.model.manifold.logmap0(self.best_emb,self.model.decoder.c)
+        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
+        hyp_file_path = os.path.join(os.getcwd(),'poincare_embeddings_hyp.csv')
+        euc_file_path = os.path.join(os.getcwd(),'poincare_embeddings_euc.csv')
+        np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
+        np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
--- a/Poincare/utils/__init__.py
+++ b/Poincare/utils/__init__.py
--- a/Poincare/utils/data_utils.py
+++ b/Poincare/utils/data_utils.py
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+
+
+def process_data(args, adj,features,labels):
+    data = process_data_nc(args,adj,features,labels)
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj,args.normalize_feats
+    )
+    return data
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj + sp.eye(adj.shape[0]))
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+def process_data_nc(args,adj,features,labels):
+    idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test}
+    return data
--- a/Poincare/utils/eval_utils.py
+++ b/Poincare/utils/eval_utils.py
+from sklearn.metrics import accuracy_score, f1_score,precision_score,recall_score,roc_auc_score
+
+def acc_f1(output, labels, average='binary'):
+    preds = output.max(1)[1].type_as(labels)
+    if preds.is_cuda:
+        preds = preds.cpu()
+        labels = labels.cpu()
+    accuracy = accuracy_score(labels,preds)
+    recall = recall_score(labels,preds)
+    precision = precision_score(labels,preds)
+    roc_auc = roc_auc_score(labels,preds)
+    f1 = f1_score(labels,preds, average=average)
+    return accuracy, f1,recall,precision,roc_auc
+
--- a/Poincare/utils/math_utils.py
+++ b/Poincare/utils/math_utils.py
+"""Math utils functions."""
+
+import torch
+
+
+def cosh(x, clamp=15):
+    return x.clamp(-clamp, clamp).cosh()
+
+
+def sinh(x, clamp=15):
+    return x.clamp(-clamp, clamp).sinh()
+
+
+def tanh(x, clamp=15):
+    return x.clamp(-clamp, clamp).tanh()
+
+
+def arcosh(x):
+    return Arcosh.apply(x)
+
+
+def arsinh(x):
+    return Arsinh.apply(x)
+
+
+def artanh(x):
+    return Artanh.apply(x)
+
+
+class Artanh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(-1 + 1e-7, 1 - 1e-7)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 - input ** 2)
+
+
+class Arsinh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-7).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (1 + input ** 2) ** 0.5
+
+
+class Arcosh(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        x = x.clamp(min=1.0 + 1e-7)
+        ctx.save_for_backward(x)
+        z = x.double()
+        return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-7).log_().to(x.dtype)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output / (input ** 2 - 1) ** 0.5
+
--- a/Poincare/utils/train_utils.py
+++ b/Poincare/utils/train_utils.py
+import os
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.nn.modules.loss
+import argparse
+
+
+def format_metrics(metrics, split):
+    """Format metric in metric dict for logging."""
+    return " ".join(
+            ["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
+
+
+def create_args(*args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dim', type=int, default=args[0])
+    parser.add_argument('--grad_clip', type=float, default=args[1])
+    parser.add_argument('--weight_decay', type=float, default=args[2])
+    parser.add_argument('--lr', type=float, default=args[3])
+    parser.add_argument('--gamma', type=float, default=args[4])
+    parser.add_argument('--lr_reduce_freq', type=int, default=args[5])
+    parser.add_argument('--cuda', type=int, default=args[6])
+    parser.add_argument('--epochs', type=int, default=args[7])
+    parser.add_argument('--min_epochs', type=int, default=args[8])
+    parser.add_argument('--patience', type=int, default=args[9])
+    parser.add_argument('--seed', type=int, default=args[10])
+    parser.add_argument('--log_freq', type=int, default=args[11])
+    parser.add_argument('--eval_freq', type=int, default=args[12])
+    parser.add_argument('--val_prop', type=float, default=args[13])
+    parser.add_argument('--test_prop', type=float, default=args[14])
+    parser.add_argument('--double_precision', type=int, default=args[15])
+    parser.add_argument('--dropout', type=float, default=args[16])
+    parser.add_argument('--normalize_adj', type=bool, default=args[17])
+    parser.add_argument('--normalize_feats', type=bool, default=args[18])
+    flags, unknown = parser.parse_known_args()
+    return flags
\ No newline at end of file
--- a/README.md
+++ b/README.md
+# G-Hypeddings
+
+## 1. Overview
+
+G-hypeddings is a **Python library** designed for **graph hyperbolic embeddings**, primarily utilized in **detecting cybersecurity anomalies**. It includes 06 distinct models with various configurations, all of which utilize **hyperbolic geometry** for their operations. The library is built on top of the [PyTorch framework](https://pytorch.org/).
+
+### 1.1. Models
+
+The models can be divided into three main categories based on the model's overall architecture namely Shallow models (Poincaré), Convolutional-based models (HGCN & HGNN), and Autoencoder-based models (HGCAE & PVAE).
+
+| Name     | Year     | Encoder  | Decoder | Manifold                  | Ref   |
+|----------|----------|----------|---------|---------------------------|-------|
+| Poincaré | 2017     | /        | MLP     | Poincaré Ball             | [1]   |
+| HGNN     | 2019     | HGCN     | MLP     | Poincaré Ball, Lorentz    | [2]   |
+| HGCN     | 2019     | HGCN     | MLP     | Lorentz                   | [3]   |
+| P-VAE    | 2019     | GCN      | MLP     | Poincaré Ball             | [4]   |
+| H2H-GCN  | 2021     | HGCN     | MLP     | Lorentz                   | [5]   |
+| HGCAE    | 2021     | HGCN     | HGCN    | Poincaré Ball             | [6]   |
+
+In this library, we provide a variety of binary classifiers, clustering algorithms, and unsupervised anomaly detection algorithms to use with the autoencoder-based models (HGCAE & PVAE). All of these are [Scikit-learn](https://scikit-learn.org/) models tuned using the Grid-Search technique.
+
+| Name                                        | Type                        |
+|---------------------------------------------|-----------------------------|
+| Support Vector Machine (SVM)                | Binary Classifier           |
+| Multilayer Perceptrone (MLP)                | Binary Classifier           |
+| Decision Tree                               | Binary Classifier           |
+| Random Forest                               | Binary Classifier           |
+| AdaBoost                                    | Binary Classifier           |
+| K-Nearest Neighbors (KNN)                   | Binary Classifier           |
+| Naive Bayes                                 | Binary Classifier           |
+| Agglomerative Hierarchical Clustering (AHC) | Clustering Algorithm        |
+| DBSCAN                                      | Clustering Algorithm        |
+| Fuzzy C mean                                | Clustering Algorithm        |
+| Gaussian Mixture                            | Clustering Algorithm        |
+| K-means                                     | Clustering Algorithm        |
+| Mean shift                                  | Clustering Algorithm        |
+| Isolation Forest                            | Anomaly Detection Algorithm |
+| One-class SVM                               | Anomaly Detection Algorithm |
+
+### 1.2. Datasets
+
+The following intrusion detection datasets were used to test and evaluate the models. Our code includes all the pre-processing steps required to convert these datasets from tabular format into graphs. Due to usage restrictions, this library provides only a single graph of each dataset, with 5,000 nodes, already pre-processed and normalized.
+
+| Name            | Ref   |
+|-----------------|-------|
+| CIC-DDoS2019    | [7]   |
+| AWID3           |       |
+
+
+
+## 2. Installation
+
+## 3. Usage
+
+Training and evaluation a model using our library is done in 03 lines of code only!
+
+### 3.1. Models
+
+### 3.2. Datasets
+
+## 4. Citation
+
+## 5. References
+
+[1]: [Nickel, Maximillian, and Douwe Kiela. "Poincaré embeddings for learning hierarchical representations." Advances in neural information processing systems 30 (2017).](https://proceedings.neurips.cc/paper_files/paper/2017/hash/59dfa2df42d9e3d41f5b02bfc32229dd-Abstract.html)
+[2]: [Liu, Qi, Maximilian Nickel, and Douwe Kiela. "Hyperbolic graph neural networks." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper/2019/hash/103303dd56a731e377d01f6a37badae3-Abstract.html)
+[3]: [Chami, Ines, et al. "Hyperbolic graph convolutional neural networks." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper_files/paper/2019/hash/0415740eaa4d9decbc8da001d3fd805f-Abstract.html)
+[4]: [Mathieu, Emile, et al. "Continuous hierarchical representations with poincaré variational auto-encoders." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper/2019/hash/0ec04cb3912c4f08874dd03716f80df1-Abstract.html)
+[5]: [Dai, Jindou, et al. "A hyperbolic-to-hyperbolic graph convolutional network." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.](https://www.computer.org/csdl/proceedings-article/cvpr/2021/450900a154/1yeJgfbgw6Y)
+[6]: [Park, Jiwoong, et al. "Unsupervised hyperbolic representation learning via message passing auto-encoders." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2021.](https://ieeexplore.ieee.org/document/9577649)
+[7]: [CIC-DDoS2019](https://www.unb.ca/cic/datasets/ddos-2019.html)
\ No newline at end of file
--- a/__init__.py
+++ b/__init__.py
+from Ghypeddings.H2HGCN.h2hgcn import H2HGCN
+from Ghypeddings.HGCAE.hgcae import HGCAE
+from Ghypeddings.HGCN.hgcn import HGCN
+from Ghypeddings.HGNN.hgnn import HGNN
+from Ghypeddings.Poincare.poincare import POINCARE
+from Ghypeddings.PVAE.pvae import PVAE
+
+from Ghypeddings.datasets.datasets import CIC_DDoS2019
+from Ghypeddings.datasets.datasets import NF_CIC_IDS2018_v2
+from Ghypeddings.datasets.datasets import NF_UNSW_NB15_v2
+from Ghypeddings.datasets.datasets import Darknet
+from Ghypeddings.datasets.datasets import AWID3
+from Ghypeddings.datasets.datasets import NF_TON_IoT_v2
+from Ghypeddings.datasets.datasets import NF_BOT_IoT_v2
\ No newline at end of file
--- a/anomaly_detection/__init__.py
+++ b/anomaly_detection/__init__.py
+from Ghypeddings.anomaly_detection.isolation_forest import isolation_forest
+from Ghypeddings.anomaly_detection.one_class_svm import one_class_svm
+from Ghypeddings.anomaly_detection.dbscan import dbscan
+from Ghypeddings.anomaly_detection.kmeans import kmeans
+from Ghypeddings.anomaly_detection.local_outlier_factor import local_outlier_factor
\ No newline at end of file
--- a/anomaly_detection/dbscan.py
+++ b/anomaly_detection/dbscan.py
+from sklearn.cluster import DBSCAN
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+
+
+def dbscan(X,y):
+    dbscan = DBSCAN(eps=0.5, min_samples=5)
+    labels = dbscan.fit_predict(X)
+    outliers = labels == -1
+    return calculate_metrics(y,outliers)
--- a/anomaly_detection/isolation_forest.py
+++ b/anomaly_detection/isolation_forest.py
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+
+
+from sklearn.ensemble import IsolationForest
+
+def isolation_forest(X,y,anomalies_percentage = 0.1):
+    model = IsolationForest(contamination=anomalies_percentage)
+    model.fit(X)
+    y_pred = model.predict(X)
+    y_pred[y_pred == 1] = 0
+    y_pred[y_pred == -1]= 1
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
--- a/anomaly_detection/kmeans.py
+++ b/anomaly_detection/kmeans.py
+from sklearn.cluster import KMeans
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+import numpy as np
+
+def kmeans(X,y,n_clusters,outlier_percentage=.1):
+    model = KMeans(n_clusters=n_clusters)
+    model.fit(X)
+    # y_pred = model.predict(X)
+    distances = model.transform(X).min(axis=1)
+    threshold = np.percentile(distances, 100 * (1 - outlier_percentage))
+    outliers = distances > threshold
+    return calculate_metrics(y,outliers)
\ No newline at end of file
--- a/anomaly_detection/local_outlier_factor.py
+++ b/anomaly_detection/local_outlier_factor.py
+from sklearn.neighbors import LocalOutlierFactor
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+import numpy as np
+
+def local_outlier_factor(X,y,n_neighbors=20,outlier_percentage=.1):
+    lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=outlier_percentage)
+    y_pred = lof.fit_predict(X)
+    y_pred[y_pred == 1] = 0
+    y_pred[y_pred == -1] = 1
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
--- a/anomaly_detection/one_class_svm.py
+++ b/anomaly_detection/one_class_svm.py
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+
+
+from sklearn.svm import OneClassSVM
+
+def one_class_svm(X,y, kernel='rbf',nu=0.1):
+    model = OneClassSVM(kernel=kernel, nu=nu)
+    model.fit(X)
+    y_pred = model.predict(X)
+    y_pred[y_pred == -1]=0
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
--- a/anomaly_detection/utils.py
+++ b/anomaly_detection/utils.py
+## external evaluation metrics
+from sklearn.metrics import adjusted_rand_score
+from sklearn.metrics import normalized_mutual_info_score
+from sklearn.metrics import fowlkes_mallows_score
+## additional evaluation metrics
+from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score
+## classification metrics
+from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
+
+def calculate_metrics(y_true,y_pred):
+    ari = adjusted_rand_score(y_true, y_pred)
+    nmi = normalized_mutual_info_score(y_true, y_pred)
+    fmi = fowlkes_mallows_score(y_true, y_pred)
+    homogeneity = homogeneity_score(y_true, y_pred)
+    completeness = completeness_score(y_true, y_pred)
+    v_measure = v_measure_score(y_true, y_pred)
+    acc = accuracy_score(y_true,y_pred)
+    f1 = f1_score(y_true,y_pred)
+    rec = recall_score(y_true,y_pred)
+    pre = precision_score(y_true,y_pred)
+    roc = roc_auc_score(y_true,y_pred)
+    return ari,nmi,fmi,homogeneity,completeness,v_measure,acc,f1,rec,pre,roc
\ No newline at end of file
--- a/classifiers/__init__.py
+++ b/classifiers/__init__.py
+from Ghypeddings.classifiers.svm import SVM
+from Ghypeddings.classifiers.mlp import mlp
+from Ghypeddings.classifiers.decision_tree import decision_tree
+from Ghypeddings.classifiers.random_forest import random_forest
+from Ghypeddings.classifiers.adaboost import adaboost
+from Ghypeddings.classifiers.knn import KNN
+from Ghypeddings.classifiers.naive_bayes import naive_bayes
+
+from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
+
+
+def calculate_metrics(clf,X,y):
+    y_pred = clf.predict(X)
+    accuracy = accuracy_score(y, y_pred)
+    f1 = f1_score(y, y_pred)
+    recall = recall_score(y, y_pred)
+    precision = precision_score(y, y_pred)
+    roc_auc = roc_auc_score(y, y_pred)
+    return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
--- a/classifiers/adaboost.py
+++ b/classifiers/adaboost.py
+from sklearn.ensemble import AdaBoostClassifier
+
+def adaboost(X,y,seed,n_estimators=2):
+    ada_boost = AdaBoostClassifier(n_estimators=n_estimators, random_state=seed)
+    return ada_boost.fit(X, y)
\ No newline at end of file