Skip to content
Snippets Groups Projects
Commit 7725415a authored by yacinetouahria's avatar yacinetouahria
Browse files

final push

parents
No related branches found
No related tags found
1 merge request!1final push
Showing
with 778 additions and 0 deletions
"""Graph encoders."""
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import Ghypeddings.Poincare.manifolds as manifolds
class Encoder(nn.Module):
"""
Encoder abstract class.
"""
def __init__(self, c):
super(Encoder, self).__init__()
self.c = c
def encode(self, x):
pass
class Shallow(Encoder):
"""
Shallow Embedding method.
Learns embeddings or loads pretrained embeddings and uses an MLP for classification.
"""
def __init__(self, c, args):
super(Shallow, self).__init__(c)
self.manifold = getattr(manifolds, 'PoincareBall')()
weights = torch.Tensor(args.n_nodes, args.dim)
weights = self.manifold.init_weights(weights, self.c)
trainable = True
self.lt = manifolds.ManifoldParameter(weights, trainable, self.manifold, self.c)
self.all_nodes = torch.LongTensor(list(range(args.n_nodes)))
layers = []
self.layers = nn.Sequential(*layers)
def encode(self, x):
h = self.lt[self.all_nodes, :]
h = torch.cat((h, x), 1)
return h
from torch.optim import Adam
from Ghypeddings.Poincare.optimizers.radam import RiemannianAdam
"""Riemannian adam optimizer geoopt implementation (https://github.com/geoopt/)."""
import torch.optim
from Ghypeddings.Poincare.manifolds import Euclidean, ManifoldParameter
_default_manifold = Euclidean()
class OptimMixin(object):
def __init__(self, *args, stabilize=None, **kwargs):
self._stabilize = stabilize
super().__init__(*args, **kwargs)
def stabilize_group(self, group):
pass
def stabilize(self):
"""Stabilize parameters if they are off-manifold due to numerical reasons
"""
for group in self.param_groups:
self.stabilize_group(group)
def copy_or_set_(dest, source):
"""
A workaround to respect strides of :code:`dest` when copying :code:`source`
(https://github.com/geoopt/geoopt/issues/70)
Parameters
----------
dest : torch.Tensor
Destination tensor where to store new data
source : torch.Tensor
Source data to put in the new tensor
Returns
-------
dest
torch.Tensor, modified inplace
"""
if dest.stride() != source.stride():
return dest.copy_(source)
else:
return dest.set_(source)
class RiemannianAdam(OptimMixin, torch.optim.Adam):
r"""Riemannian Adam with the same API as :class:`torch.optim.Adam`
Parameters
----------
params : iterable
iterable of parameters to optimize or dicts defining
parameter groups
lr : float (optional)
learning rate (default: 1e-3)
betas : Tuple[float, float] (optional)
coefficients used for computing
running averages of gradient and its square (default: (0.9, 0.999))
eps : float (optional)
term added to the denominator to improve
numerical stability (default: 1e-8)
weight_decay : float (optional)
weight decay (L2 penalty) (default: 0)
amsgrad : bool (optional)
whether to use the AMSGrad variant of this
algorithm from the paper `On the Convergence of Adam and Beyond`_
(default: False)
Other Parameters
----------------
stabilize : int
Stabilize parameters if they are off-manifold due to numerical
reasons every ``stabilize`` steps (default: ``None`` -- no stabilize)
.. _On the Convergence of Adam and Beyond:
https://openreview.net/forum?id=ryQu7f-RZ
"""
def step(self, closure=None):
"""Performs a single optimization step.
Arguments
---------
closure : callable (optional)
A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
with torch.no_grad():
for group in self.param_groups:
if "step" not in group:
group["step"] = 0
betas = group["betas"]
weight_decay = group["weight_decay"]
eps = group["eps"]
learning_rate = group["lr"]
amsgrad = group["amsgrad"]
for point in group["params"]:
grad = point.grad
if grad is None:
continue
if isinstance(point, (ManifoldParameter)):
manifold = point.manifold
c = point.c
else:
manifold = _default_manifold
c = None
if grad.is_sparse:
raise RuntimeError(
"Riemannian Adam does not support sparse gradients yet (PR is welcome)"
)
state = self.state[point]
# State initialization
if len(state) == 0:
state["step"] = 0
# Exponential moving average of gradient values
state["exp_avg"] = torch.zeros_like(point)
# Exponential moving average of squared gradient values
state["exp_avg_sq"] = torch.zeros_like(point)
if amsgrad:
# Maintains max of all exp. moving avg. of sq. grad. values
state["max_exp_avg_sq"] = torch.zeros_like(point)
# make local variables for easy access
exp_avg = state["exp_avg"]
exp_avg_sq = state["exp_avg_sq"]
# actual step
grad.add_(weight_decay, point)
grad = manifold.egrad2rgrad(point, grad, c)
exp_avg.mul_(betas[0]).add_(1 - betas[0], grad)
exp_avg_sq.mul_(betas[1]).add_(
1 - betas[1], manifold.inner(point, c, grad, keepdim=True)
)
if amsgrad:
max_exp_avg_sq = state["max_exp_avg_sq"]
# Maintains the maximum of all 2nd moment running avg. till now
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
# Use the max. for normalizing running avg. of gradient
denom = max_exp_avg_sq.sqrt().add_(eps)
else:
denom = exp_avg_sq.sqrt().add_(eps)
group["step"] += 1
bias_correction1 = 1 - betas[0] ** group["step"]
bias_correction2 = 1 - betas[1] ** group["step"]
step_size = (
learning_rate * bias_correction2 ** 0.5 / bias_correction1
)
# copy the state, we need it for retraction
# get the direction for ascend
direction = exp_avg / denom
# transport the exponential averaging to the new point
new_point = manifold.proj(manifold.expmap(-step_size * direction, point, c), c)
exp_avg_new = manifold.ptransp(point, new_point, exp_avg, c)
# use copy only for user facing point
copy_or_set_(point, new_point)
exp_avg.set_(exp_avg_new)
group["step"] += 1
if self._stabilize is not None and group["step"] % self._stabilize == 0:
self.stabilize_group(group)
return loss
@torch.no_grad()
def stabilize_group(self, group):
for p in group["params"]:
if not isinstance(p, ManifoldParameter):
continue
state = self.state[p]
if not state: # due to None grads
continue
manifold = p.manifold
c = p.c
exp_avg = state["exp_avg"]
copy_or_set_(p, manifold.proj(p, c))
exp_avg.set_(manifold.proj_tan(exp_avg, u, c))
from __future__ import division
from __future__ import print_function
import logging
import os
import time
import numpy as np
import Ghypeddings.Poincare.optimizers as optimizers
import torch
from Ghypeddings.Poincare.models.base_models import NCModel
from Ghypeddings.Poincare.utils.data_utils import process_data
from Ghypeddings.Poincare.utils.train_utils import format_metrics, create_args
class POINCARE:
def __init__(self,
adj,
features,
labels,
dim,
grad_clip=None,
weight_decay=0.01,
lr=0.1,
gamma=0.5,
lr_reduce_freq=500,
cuda=0,
epochs=50,
min_epochs=50,
patience=None,
seed=42,
log_freq=1,
eval_freq=1,
val_prop=0.15,
test_prop=0.15,
double_precision=0,
dropout=0.01,
normalize_adj=False,
normalize_feats=True):
self.args = create_args(dim,grad_clip,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
self.args.n_nodes = adj.shape[0]
self.args.feat_dim = features.shape[1]
self.args.n_classes = len(np.unique(labels))
self.data = process_data(self.args,adj,features,labels)
np.random.seed(self.args.seed)
torch.manual_seed(self.args.seed)
if int(self.args.double_precision):
torch.set_default_dtype(torch.float64)
if int(self.args.cuda) >= 0:
torch.cuda.manual_seed(self.args.seed)
self.args.device = 'cuda:' + str(self.args.cuda) if int(self.args.cuda) >= 0 else 'cpu'
self.args.patience = self.args.epochs if not self.args.patience else int(self.args.patience)
if not self.args.lr_reduce_freq:
self.args.lr_reduce_freq = self.args.epochs
self.model = NCModel(self.args)
self.optimizer = getattr(optimizers, 'RiemannianAdam')(params=self.model.parameters(), lr=self.args.lr,
weight_decay=self.args.weight_decay)
self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
self.optimizer,
step_size=int(self.args.lr_reduce_freq),
gamma=float(self.args.gamma)
)
if self.args.cuda is not None and int(self.args.cuda) >= 0 :
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.args.cuda)
self.model = self.model.to(self.args.device)
for x, val in self.data.items():
if torch.is_tensor(self.data[x]):
self.data[x] = self.data[x].to(self.args.device)
self.best_emb = None
def fit(self):
logging.getLogger().setLevel(logging.INFO)
logging.info(str(self.model))
tot_params = sum([np.prod(p.size()) for p in self.model.parameters()])
logging.info(f"Total number of parameters: {tot_params}")
t_total = time.time()
counter = 0
best_val_metrics = self.model.init_metric_dict()
best_losses = []
train_losses = []
val_losses = []
for epoch in range(self.args.epochs):
t = time.time()
self.model.train()
self.optimizer.zero_grad()
embeddings = self.model.encode(self.data['features'])
assert not torch.isnan(embeddings).any()
train_metrics = self.model.compute_metrics(embeddings, self.data, 'train')
train_metrics['loss'].backward()
if self.args.grad_clip is not None:
max_norm = float(self.args.grad_clip)
all_params = list(self.model.parameters())
for param in all_params:
torch.nn.utils.clip_grad_norm_(param, max_norm)
self.optimizer.step()
self.lr_scheduler.step()
train_losses.append(train_metrics['loss'].item())
if(len(best_losses) == 0):
best_losses.append(train_losses[0])
elif (best_losses[-1] > train_losses[-1]):
best_losses.append(train_losses[-1])
else:
best_losses.append(best_losses[-1])
if (epoch + 1) % self.args.log_freq == 0:
logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1),
'lr: {}'.format(self.lr_scheduler.get_lr()[0]),
format_metrics(train_metrics, 'train'),
'time: {:.4f}s'.format(time.time() - t)
]))
if (epoch + 1) % self.args.eval_freq == 0:
self.model.eval()
embeddings = self.model.encode(self.data['features'])
val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
val_losses.append(val_metrics['loss'].item())
if (epoch + 1) % self.args.log_freq == 0:
logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
if self.model.has_improved(best_val_metrics, val_metrics):
self.best_emb = embeddings
best_val_metrics = val_metrics
counter = 0
else:
counter += 1
if counter == self.args.patience and epoch > self.args.min_epochs:
logging.info("Early stopping")
break
logging.info("Training Finished!")
logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
return {'train':train_losses,'best':best_losses,'val':val_losses},best_val_metrics['acc'],best_val_metrics['f1'],best_val_metrics['recall'],best_val_metrics['precision'],best_val_metrics['roc_auc'],time.time() - t_total
def predict(self):
self.model.eval()
embeddings = self.model.encode(self.data['features'])
val_metrics = self.model.compute_metrics(embeddings, self.data, 'test')
return val_metrics['loss'].item(),val_metrics['acc'],val_metrics['f1'],val_metrics['recall'],val_metrics['precision'],val_metrics['roc_auc']
def save_embeddings(self):
tb_embeddings_euc = self.model.manifold.logmap0(self.best_emb,self.model.decoder.c)
for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].cpu().reshape(-1,1)))
hyp_file_path = os.path.join(os.getcwd(),'poincare_embeddings_hyp.csv')
euc_file_path = os.path.join(os.getcwd(),'poincare_embeddings_euc.csv')
np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
"""Data utils functions for pre-processing and data loading."""
import os
import pickle as pkl
import sys
import networkx as nx
import numpy as np
import scipy.sparse as sp
import torch
def process_data(args, adj,features,labels):
data = process_data_nc(args,adj,features,labels)
data['adj_train_norm'], data['features'] = process(
data['adj_train'], data['features'], args.normalize_adj,args.normalize_feats
)
return data
def process(adj, features, normalize_adj, normalize_feats):
if sp.isspmatrix(features):
features = np.array(features.todense())
if normalize_feats:
features = normalize(features)
features = torch.Tensor(features)
if normalize_adj:
adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)
return adj, features
def normalize(mx):
"""Row-normalize sparse matrix."""
rowsum = np.array(mx.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
mx = r_mat_inv.dot(mx)
return mx
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
"""Convert a scipy sparse matrix to a torch sparse tensor."""
sparse_mx = sparse_mx.tocoo()
indices = torch.from_numpy(
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
)
values = torch.Tensor(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
return torch.sparse.FloatTensor(indices, values, shape)
def augment(adj, features, normalize_feats=True):
deg = np.squeeze(np.sum(adj, axis=0).astype(int))
deg[deg > 5] = 5
deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
const_f = torch.ones(features.size(0), 1)
features = torch.cat((features, deg_onehot, const_f), dim=1)
return features
def split_data(labels, val_prop, test_prop, seed):
np.random.seed(seed)
nb_nodes = labels.shape[0]
all_idx = np.arange(nb_nodes)
pos_idx = labels.nonzero()[0]
neg_idx = (1. - labels).nonzero()[0]
np.random.shuffle(pos_idx)
np.random.shuffle(neg_idx)
pos_idx = pos_idx.tolist()
neg_idx = neg_idx.tolist()
nb_pos_neg = min(len(pos_idx), len(neg_idx))
nb_val = round(val_prop * nb_pos_neg)
nb_test = round(test_prop * nb_pos_neg)
idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
nb_val + nb_test:]
idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
nb_val + nb_test:]
return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
def process_data_nc(args,adj,features,labels):
idx_val, idx_test, idx_train = split_data(labels, args.val_prop, args.test_prop, seed=args.seed)
labels = torch.LongTensor(labels)
data = {'adj_train': sp.csr_matrix(adj), 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test}
return data
from sklearn.metrics import accuracy_score, f1_score,precision_score,recall_score,roc_auc_score
def acc_f1(output, labels, average='binary'):
preds = output.max(1)[1].type_as(labels)
if preds.is_cuda:
preds = preds.cpu()
labels = labels.cpu()
accuracy = accuracy_score(labels,preds)
recall = recall_score(labels,preds)
precision = precision_score(labels,preds)
roc_auc = roc_auc_score(labels,preds)
f1 = f1_score(labels,preds, average=average)
return accuracy, f1,recall,precision,roc_auc
"""Math utils functions."""
import torch
def cosh(x, clamp=15):
return x.clamp(-clamp, clamp).cosh()
def sinh(x, clamp=15):
return x.clamp(-clamp, clamp).sinh()
def tanh(x, clamp=15):
return x.clamp(-clamp, clamp).tanh()
def arcosh(x):
return Arcosh.apply(x)
def arsinh(x):
return Arsinh.apply(x)
def artanh(x):
return Artanh.apply(x)
class Artanh(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
x = x.clamp(-1 + 1e-7, 1 - 1e-7)
ctx.save_for_backward(x)
z = x.double()
return (torch.log_(1 + z).sub_(torch.log_(1 - z))).mul_(0.5).to(x.dtype)
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output / (1 - input ** 2)
class Arsinh(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)
z = x.double()
return (z + torch.sqrt_(1 + z.pow(2))).clamp_min_(1e-7).log_().to(x.dtype)
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output / (1 + input ** 2) ** 0.5
class Arcosh(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
x = x.clamp(min=1.0 + 1e-7)
ctx.save_for_backward(x)
z = x.double()
return (z + torch.sqrt_(z.pow(2) - 1)).clamp_min_(1e-7).log_().to(x.dtype)
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output / (input ** 2 - 1) ** 0.5
import os
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn.modules.loss
import argparse
def format_metrics(metrics, split):
"""Format metric in metric dict for logging."""
return " ".join(
["{}_{}: {:.4f}".format(split, metric_name, metric_val) for metric_name, metric_val in metrics.items()])
def create_args(*args):
parser = argparse.ArgumentParser()
parser.add_argument('--dim', type=int, default=args[0])
parser.add_argument('--grad_clip', type=float, default=args[1])
parser.add_argument('--weight_decay', type=float, default=args[2])
parser.add_argument('--lr', type=float, default=args[3])
parser.add_argument('--gamma', type=float, default=args[4])
parser.add_argument('--lr_reduce_freq', type=int, default=args[5])
parser.add_argument('--cuda', type=int, default=args[6])
parser.add_argument('--epochs', type=int, default=args[7])
parser.add_argument('--min_epochs', type=int, default=args[8])
parser.add_argument('--patience', type=int, default=args[9])
parser.add_argument('--seed', type=int, default=args[10])
parser.add_argument('--log_freq', type=int, default=args[11])
parser.add_argument('--eval_freq', type=int, default=args[12])
parser.add_argument('--val_prop', type=float, default=args[13])
parser.add_argument('--test_prop', type=float, default=args[14])
parser.add_argument('--double_precision', type=int, default=args[15])
parser.add_argument('--dropout', type=float, default=args[16])
parser.add_argument('--normalize_adj', type=bool, default=args[17])
parser.add_argument('--normalize_feats', type=bool, default=args[18])
flags, unknown = parser.parse_known_args()
return flags
\ No newline at end of file
# G-Hypeddings
## 1. Overview
G-hypeddings is a **Python library** designed for **graph hyperbolic embeddings**, primarily utilized in **detecting cybersecurity anomalies**. It includes 06 distinct models with various configurations, all of which utilize **hyperbolic geometry** for their operations. The library is built on top of the [PyTorch framework](https://pytorch.org/).
### 1.1. Models
The models can be divided into three main categories based on the model's overall architecture namely Shallow models (Poincaré), Convolutional-based models (HGCN & HGNN), and Autoencoder-based models (HGCAE & PVAE).
| Name | Year | Encoder | Decoder | Manifold | Ref |
|----------|----------|----------|---------|---------------------------|-------|
| Poincaré | 2017 | / | MLP | Poincaré Ball | [1] |
| HGNN | 2019 | HGCN | MLP | Poincaré Ball, Lorentz | [2] |
| HGCN | 2019 | HGCN | MLP | Lorentz | [3] |
| P-VAE | 2019 | GCN | MLP | Poincaré Ball | [4] |
| H2H-GCN | 2021 | HGCN | MLP | Lorentz | [5] |
| HGCAE | 2021 | HGCN | HGCN | Poincaré Ball | [6] |
In this library, we provide a variety of binary classifiers, clustering algorithms, and unsupervised anomaly detection algorithms to use with the autoencoder-based models (HGCAE & PVAE). All of these are [Scikit-learn](https://scikit-learn.org/) models tuned using the Grid-Search technique.
| Name | Type |
|---------------------------------------------|-----------------------------|
| Support Vector Machine (SVM) | Binary Classifier |
| Multilayer Perceptrone (MLP) | Binary Classifier |
| Decision Tree | Binary Classifier |
| Random Forest | Binary Classifier |
| AdaBoost | Binary Classifier |
| K-Nearest Neighbors (KNN) | Binary Classifier |
| Naive Bayes | Binary Classifier |
| Agglomerative Hierarchical Clustering (AHC) | Clustering Algorithm |
| DBSCAN | Clustering Algorithm |
| Fuzzy C mean | Clustering Algorithm |
| Gaussian Mixture | Clustering Algorithm |
| K-means | Clustering Algorithm |
| Mean shift | Clustering Algorithm |
| Isolation Forest | Anomaly Detection Algorithm |
| One-class SVM | Anomaly Detection Algorithm |
### 1.2. Datasets
The following intrusion detection datasets were used to test and evaluate the models. Our code includes all the pre-processing steps required to convert these datasets from tabular format into graphs. Due to usage restrictions, this library provides only a single graph of each dataset, with 5,000 nodes, already pre-processed and normalized.
| Name | Ref |
|-----------------|-------|
| CIC-DDoS2019 | [7] |
| AWID3 | |
## 2. Installation
## 3. Usage
Training and evaluation a model using our library is done in 03 lines of code only!
### 3.1. Models
### 3.2. Datasets
## 4. Citation
## 5. References
[1]: [Nickel, Maximillian, and Douwe Kiela. "Poincaré embeddings for learning hierarchical representations." Advances in neural information processing systems 30 (2017).](https://proceedings.neurips.cc/paper_files/paper/2017/hash/59dfa2df42d9e3d41f5b02bfc32229dd-Abstract.html)
[2]: [Liu, Qi, Maximilian Nickel, and Douwe Kiela. "Hyperbolic graph neural networks." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper/2019/hash/103303dd56a731e377d01f6a37badae3-Abstract.html)
[3]: [Chami, Ines, et al. "Hyperbolic graph convolutional neural networks." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper_files/paper/2019/hash/0415740eaa4d9decbc8da001d3fd805f-Abstract.html)
[4]: [Mathieu, Emile, et al. "Continuous hierarchical representations with poincaré variational auto-encoders." Advances in neural information processing systems 32 (2019).](https://proceedings.neurips.cc/paper/2019/hash/0ec04cb3912c4f08874dd03716f80df1-Abstract.html)
[5]: [Dai, Jindou, et al. "A hyperbolic-to-hyperbolic graph convolutional network." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.](https://www.computer.org/csdl/proceedings-article/cvpr/2021/450900a154/1yeJgfbgw6Y)
[6]: [Park, Jiwoong, et al. "Unsupervised hyperbolic representation learning via message passing auto-encoders." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2021.](https://ieeexplore.ieee.org/document/9577649)
[7]: [CIC-DDoS2019](https://www.unb.ca/cic/datasets/ddos-2019.html)
\ No newline at end of file
from Ghypeddings.H2HGCN.h2hgcn import H2HGCN
from Ghypeddings.HGCAE.hgcae import HGCAE
from Ghypeddings.HGCN.hgcn import HGCN
from Ghypeddings.HGNN.hgnn import HGNN
from Ghypeddings.Poincare.poincare import POINCARE
from Ghypeddings.PVAE.pvae import PVAE
from Ghypeddings.datasets.datasets import CIC_DDoS2019
from Ghypeddings.datasets.datasets import NF_CIC_IDS2018_v2
from Ghypeddings.datasets.datasets import NF_UNSW_NB15_v2
from Ghypeddings.datasets.datasets import Darknet
from Ghypeddings.datasets.datasets import AWID3
from Ghypeddings.datasets.datasets import NF_TON_IoT_v2
from Ghypeddings.datasets.datasets import NF_BOT_IoT_v2
\ No newline at end of file
from Ghypeddings.anomaly_detection.isolation_forest import isolation_forest
from Ghypeddings.anomaly_detection.one_class_svm import one_class_svm
from Ghypeddings.anomaly_detection.dbscan import dbscan
from Ghypeddings.anomaly_detection.kmeans import kmeans
from Ghypeddings.anomaly_detection.local_outlier_factor import local_outlier_factor
\ No newline at end of file
from sklearn.cluster import DBSCAN
from Ghypeddings.anomaly_detection.utils import calculate_metrics
def dbscan(X,y):
dbscan = DBSCAN(eps=0.5, min_samples=5)
labels = dbscan.fit_predict(X)
outliers = labels == -1
return calculate_metrics(y,outliers)
from Ghypeddings.anomaly_detection.utils import calculate_metrics
from sklearn.ensemble import IsolationForest
def isolation_forest(X,y,anomalies_percentage = 0.1):
model = IsolationForest(contamination=anomalies_percentage)
model.fit(X)
y_pred = model.predict(X)
y_pred[y_pred == 1] = 0
y_pred[y_pred == -1]= 1
return calculate_metrics(y,y_pred)
\ No newline at end of file
from sklearn.cluster import KMeans
from Ghypeddings.anomaly_detection.utils import calculate_metrics
import numpy as np
def kmeans(X,y,n_clusters,outlier_percentage=.1):
model = KMeans(n_clusters=n_clusters)
model.fit(X)
# y_pred = model.predict(X)
distances = model.transform(X).min(axis=1)
threshold = np.percentile(distances, 100 * (1 - outlier_percentage))
outliers = distances > threshold
return calculate_metrics(y,outliers)
\ No newline at end of file
from sklearn.neighbors import LocalOutlierFactor
from Ghypeddings.anomaly_detection.utils import calculate_metrics
import numpy as np
def local_outlier_factor(X,y,n_neighbors=20,outlier_percentage=.1):
lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=outlier_percentage)
y_pred = lof.fit_predict(X)
y_pred[y_pred == 1] = 0
y_pred[y_pred == -1] = 1
return calculate_metrics(y,y_pred)
\ No newline at end of file
from Ghypeddings.anomaly_detection.utils import calculate_metrics
from sklearn.svm import OneClassSVM
def one_class_svm(X,y, kernel='rbf',nu=0.1):
model = OneClassSVM(kernel=kernel, nu=nu)
model.fit(X)
y_pred = model.predict(X)
y_pred[y_pred == -1]=0
return calculate_metrics(y,y_pred)
\ No newline at end of file
## external evaluation metrics
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
from sklearn.metrics import fowlkes_mallows_score
## additional evaluation metrics
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score
## classification metrics
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
def calculate_metrics(y_true,y_pred):
ari = adjusted_rand_score(y_true, y_pred)
nmi = normalized_mutual_info_score(y_true, y_pred)
fmi = fowlkes_mallows_score(y_true, y_pred)
homogeneity = homogeneity_score(y_true, y_pred)
completeness = completeness_score(y_true, y_pred)
v_measure = v_measure_score(y_true, y_pred)
acc = accuracy_score(y_true,y_pred)
f1 = f1_score(y_true,y_pred)
rec = recall_score(y_true,y_pred)
pre = precision_score(y_true,y_pred)
roc = roc_auc_score(y_true,y_pred)
return ari,nmi,fmi,homogeneity,completeness,v_measure,acc,f1,rec,pre,roc
\ No newline at end of file
from Ghypeddings.classifiers.svm import SVM
from Ghypeddings.classifiers.mlp import mlp
from Ghypeddings.classifiers.decision_tree import decision_tree
from Ghypeddings.classifiers.random_forest import random_forest
from Ghypeddings.classifiers.adaboost import adaboost
from Ghypeddings.classifiers.knn import KNN
from Ghypeddings.classifiers.naive_bayes import naive_bayes
from sklearn.metrics import accuracy_score , f1_score , recall_score , precision_score , roc_auc_score
def calculate_metrics(clf,X,y):
y_pred = clf.predict(X)
accuracy = accuracy_score(y, y_pred)
f1 = f1_score(y, y_pred)
recall = recall_score(y, y_pred)
precision = precision_score(y, y_pred)
roc_auc = roc_auc_score(y, y_pred)
return accuracy,f1,recall,precision,roc_auc
\ No newline at end of file
from sklearn.ensemble import AdaBoostClassifier
def adaboost(X,y,seed,n_estimators=2):
ada_boost = AdaBoostClassifier(n_estimators=n_estimators, random_state=seed)
return ada_boost.fit(X, y)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment