Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • gladis/hvgae-ad
1 result
Show changes
Commits on Source (2)
Showing
with 326 additions and 0 deletions
from HVGAE_AD.hvgae_ad import HVGAE_AD
\ No newline at end of file
File added
File added
File added
File added
import argparse
import torch
import os
parser = argparse.ArgumentParser(description='HVAENCDG, hyperbolic variational autoencoder for node classification of dynamic graphs')
parser.add_argument('--gamma', type=float, default=0.00001, help='reconstruction importance')# ddos = 0.001 , darknet = 0.00001 (0 don't give better result) , ton iot = 0.0001
parser.add_argument('--nfeat', type=int, default=75, help='dim of input feature')
parser.add_argument('--nb_window', type=int, default=5)
parser.add_argument('--nhid', type=int, default=75, help='dim of hidden embedding')
parser.add_argument('--nout', type=int, default=2, help='dim of output embedding')
parser.add_argument('--act', type=str, default='relu')
parser.add_argument('--timelength', type=int, default=26, help='total number of snapshots')
parser.add_argument('--testlength', type=int, default=.3, help='number of test snapshots')
parser.add_argument('--num_nodes', type=int, default=1000, help='number of nodes per graph')
parser.add_argument('--nclasses', type=int, default=2, help='number of classes')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate') # ddos,darknet,ton_iot = 0.001
parser.add_argument('--max_epoch', type=int, default=20, help='number of epochs to train.')
parser.add_argument('--patience', type=int, default=20, help='patience for early stop')
parser.add_argument('--min_epoch', type=int, default=5, help='min epoch')
parser.add_argument('--weight_decay', type=float, default=0.01, help='weight for L2 loss on basic model.')
parser.add_argument('--dropout', type=float, default=0.1, help='dropout rate (1 - keep probability).')
parser.add_argument('--curvature', type=float, default=1.0, help='curvature value')
parser.add_argument('--num_layers', type=int, default=2)
parser.add_argument('--beta1', type=float, default=0.9)
parser.add_argument('--beta2', type=float, default=0.999)
parser.add_argument('--K', type=int, default=1)
parser.add_argument('--beta', type=float, default=.2)
parser.add_argument('--analytical_kl', type=bool, default=True)
parser.add_argument('--posterior', type=str, default='WrappedNormal')
parser.add_argument('--prior', type=str, default='WrappedNormal')
parser.add_argument('--prior_iso', type=bool, default=True)
parser.add_argument('--prior_std', type=float, default=1.0)
parser.add_argument('--learn_prior_std', type=bool, default=True)
parser.add_argument('--enc', type=str, default='Mob')
parser.add_argument('--dec', type=str, default='Geo')
parser.add_argument('--bias', type=bool, default=True)
parser.add_argument('--alpha', type=float, default=.5)
parser.add_argument('--data_pt_path', type=str, default='./data/', help='parent path of dataset')
parser.add_argument('--device', type=int, default=0, help='gpu id, -1 for cpu')
parser.add_argument('--seed', type=int, default=42, help='random seed')
parser.add_argument('--output_pt_path', type=str, default='./output/', help='parent path of output')
parser.add_argument('--manifold', type=str, default='PoincareBall', help='hyperbolic model')
parser.add_argument('--eps', type=float, default=1e-15, help='eps')
parser.add_argument('--eval_while_testing',type=bool,default=True)
args, unknown = parser.parse_known_args()
if args.device >= 0 and torch.cuda.is_available():
args.device = torch.device('cuda:{}'.format(args.device))
else:
args.device = torch.device('cpu')
args.output_path = os.path.join(args.output_pt_path, 'ddos2019')
if not os.path.isdir(args.output_path):
os.makedirs(args.output_path)
args.log_file = os.path.join(args.output_path, '{}.log'.format('HVGAE_AD'))
args.emb_file = os.path.join(args.output_path, '{}.emb'.format('HVGAE_AD'))
import torch
import numpy as np
import time
from math import isnan
from torch import optim
import torch.optim.lr_scheduler as lr_scheduler
from HVGAE_AD.config import args
from HVGAE_AD.utils.data_utils import prepare
from HVGAE_AD.utils.util import set_random, logger
from HVGAE_AD.model.PVAE.models.pvae import PVAE
from HVGAE_AD.loss import Loss
from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score,roc_auc_score
import warnings
warnings.filterwarnings('ignore')
class HVGAE_AD(object):
def __init__(self,data_path):
self.data_path = data_path
args.data_size = [args.num_nodes,args.nfeat]
self.train_shots = list(range(0, args.timelength - int(args.testlength * args.timelength)))
self.test_shots = list(range(int(args.testlength * args.timelength), args.timelength))
self.model = PVAE(args).to(args.device)
self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr, amsgrad=True, betas=(args.beta1, args.beta2))
self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=20, gamma=0.1)
self.loss = Loss(args, self.model.c,self.model)
set_random(args.seed)
def calculate_metrics(self,preds,labels):
labels = labels.cpu().detach().numpy()
preds = preds.cpu().detach().numpy()
preds = np.argmax(preds,axis=1)
f1 = f1_score(labels, preds)
accuracy = accuracy_score(labels, preds)
recall = recall_score(labels, preds)
precision = precision_score(labels, preds)
roc_auc = roc_auc_score(labels,preds )
return f1,accuracy,precision,recall,roc_auc
def fit(self):
print('Using device {} to train the model ...'.format(args.device))
t_total0 = time.time()
test_results, min_loss = [0] * 5, 10**7
self.model.train()
patience = 0
for epoch in range(1, args.max_epoch + 1):
t0 = time.time()
epoch_losses = []
self.model.init_hiddens()
self.model.train()
for t in self.train_shots:
edge_index,features,labels= prepare(t,args.device,self.data_path)
self.optimizer.zero_grad()
qz_x, px_z, zs,z = self.model(features, edge_index,args.K)
epoch_loss = self.loss(z,labels,features,qz_x, px_z, zs)
logger.info('Epoch:{}, Snapshot: {}; Loss: {:.4f}'.format(epoch, t, epoch_loss.item()))
epoch_loss.backward()
self.optimizer.step()
epoch_losses.append(epoch_loss.item())
self.model.update_hiddens_all_with(z)
self.scheduler.step()
lr = self.optimizer.param_groups[0]["lr"]
average_epoch_loss = np.mean(epoch_losses)
gpu_mem_alloc = torch.cuda.max_memory_allocated() / 1000000 if torch.cuda.is_available() else 0
logger.info('==' * 45)
logger.info("Epoch:{}, LR: {:.4f}, Loss: {:.4f}, Time: {:.3f}, GPU: {:.1f}MiB".format(epoch, lr,average_epoch_loss,time.time() - t0,gpu_mem_alloc))
if average_epoch_loss < min_loss:
min_loss = average_epoch_loss
if args.eval_while_testing:
test_results = self.predict()
logger.info('Epoch:{}, Accuracy: {:.4f}; F1: {:.4f}; Precision: {:.4f}; Recall: {:.4f}; ROC AUC: {:.4f}'.format(epoch, test_results[1], test_results[0],test_results[2],test_results[3],test_results[4]))
patience = 0
else:
patience += 1
if epoch > args.min_epoch and patience > args.patience:
print('early stopping')
break
if isnan(epoch_loss):
print('nan loss')
break
logger.info('>> Total time : %6.2f' % (time.time() - t_total0))
logger.info(">> Parameters: lr:%.4f |Dim:%d |Window:%d |" % (args.lr, args.nout, args.nb_window))
def predict(self):
f1_list,acc_list,pre_list,rec_list,roc_list,memory,ptime = [], [], [], [], [],[],[]
self.model.eval()
for t in self.test_shots:
edge_index, features,labels = prepare(t,args.device,self.data_path)
start = time.time()
qz_x, px_z, zs, embeddings = self.model(features,edge_index,args.K)
preds = self.model.decode(embeddings)
ptime.append(time.time() - start)
memory.append(torch.cuda.max_memory_allocated() / 1000000 if torch.cuda.is_available() else 0)
f1,accuracy,precision,recall,roc_auc = self.calculate_metrics(preds, labels)
f1_list.append(f1)
acc_list.append(accuracy)
pre_list.append(precision)
rec_list.append(recall)
roc_list.append(roc_auc)
return np.mean(f1_list), np.mean(acc_list), np.mean(pre_list), np.mean(rec_list), np.mean(roc_list),np.mean(memory),np.mean(ptime)
\ No newline at end of file
import torch.nn as nn
import torch.nn.functional as F
import HVGAE_AD.model.PVAE.objectives as objectives
class Loss(nn.Module):
def __init__(self, args, c,model):
super(Loss, self).__init__()
self.args = args
self.device = self.args.device
self.c = c
self.model = model
self.reconstruction_loss = getattr(objectives,'vae_objective')
def forward(self,embeddings,labels,features,qz_x, px_z, zs):
loss_rec = self.reconstruction_loss(model=self.model,x=features,qz_x=qz_x, px_z=px_z, zs=zs, beta=self.args.beta, analytical_kl=self.args.analytical_kl)
print('Reconstruction loss:',loss_rec.item())
output = self.model.decode(embeddings)
loss_cla = F.nll_loss(output, labels)
print('Classification loss:',loss_cla.item())
return self.args.gamma * loss_rec + loss_cla
\ No newline at end of file
File added
File added
File added
from HVGAE_AD.model.PVAE.distributions.riemannian_normal import RiemannianNormal
from HVGAE_AD.model.PVAE.distributions.hyperbolic_radius import HyperbolicRadius
from HVGAE_AD.model.PVAE.distributions.wrapped_normal import WrappedNormal
from HVGAE_AD.model.PVAE.distributions.hyperspherical_uniform import HypersphericalUniform
File added
File added
File added
import torch
infty = torch.tensor(float('Inf'))
def diff(x):
return x[:, 1:] - x[:, :-1]
class ARS():
'''
This class implements the Adaptive Rejection Sampling technique of Gilks and Wild '92.
Where possible, naming convention has been borrowed from this paper.
The PDF must be log-concave.
Currently does not exploit lower hull described in paper- which is fine for drawing
only small amount of samples at a time.
'''
def __init__(self, logpdf, grad_logpdf, device, xi, lb=-infty, ub=infty, use_lower=False, ns=50, **fargs):
'''
initialize the upper (and if needed lower) hulls with the specified params
Parameters
==========
f: function that computes log(f(u,...)), for given u, where f(u) is proportional to the
density we want to sample from
fprima: d/du log(f(u,...))
xi: ordered vector of starting points in wich log(f(u,...) is defined
to initialize the hulls
use_lower: True means the lower sqeezing will be used; which is more efficient
for drawing large numbers of samples
lb: lower bound of the domain
ub: upper bound of the domain
ns: maximum number of points defining the hulls
fargs: arguments for f and fprima
'''
self.device = device
self.lb = lb
self.ub = ub
self.logpdf = logpdf
self.grad_logpdf = grad_logpdf
self.fargs = fargs
#set limit on how many points to maintain on hull
self.ns = ns
self.xi = xi.to(self.device) # initialize x, the vector of absicassae at which the function h has been evaluated
self.B, self.K = self.xi.size() # hull size
self.h = torch.zeros(self.B, ns).to(self.device)
self.hprime = torch.zeros(self.B, ns).to(self.device)
self.x = torch.zeros(self.B, ns).to(self.device)
self.h[:, :self.K] = self.logpdf(self.xi, **self.fargs)
self.hprime[:, :self.K] = self.grad_logpdf(self.xi, **self.fargs)
self.x[:, :self.K] = self.xi
# Avoid under/overflow errors. the envelope and pdf are only
# proportional to the true pdf, so can choose any constant of proportionality.
self.offset = self.h.max(-1)[0].view(-1, 1)
self.h = self.h - self.offset
# Derivative at first point in xi must be > 0
# Derivative at last point in xi must be < 0
if not (self.hprime[:, 0] > 0).all(): raise IOError('initial anchor points must span mode of PDF (left)')
if not (self.hprime[:, self.K-1] < 0).all(): raise IOError('initial anchor points must span mode of PDF (right)')
self.insert()
def sample(self, shape=torch.Size()):
'''
Draw N samples and update upper and lower hulls accordingly
'''
shape = shape if isinstance(shape, torch.Size) else torch.Size([shape])
samples = torch.ones(self.B, *shape).to(self.device)
bool_mask = (torch.ones(self.B, *shape) == 1).to(self.device)
count = 0
while bool_mask.sum() != 0:
count += 1
xt, i = self.sampleUpper(shape)
ht = self.logpdf(xt, **self.fargs)
# hprimet = self.grad_logpdf(xt, **self.fargs)
ht = ht - self.offset
ut = self.h.gather(1, i) + (xt - self.x.gather(1, i)) * self.hprime.gather(1, i)
# Accept sample?
u = torch.rand(shape).to(self.device)
accept = u < torch.exp(ht - ut)
reject = ~accept
samples[bool_mask * accept] = xt[bool_mask * accept]
bool_mask[bool_mask * accept] = reject[bool_mask * accept]
# Update hull with new function evaluations
# if self.K < self.ns:
# nb_insert = self.ns - self.K
# self.insert(nb_insert, xt[:, :nb_insert], ht[:, :nb_insert], hprimet[:, :nb_insert])
return samples.t().unsqueeze(-1)
def insert(self, nbnew=0, xnew=None, hnew=None, hprimenew=None):
'''
Update hulls with new point(s) if none given, just recalculate hull from existing x,h,hprime
# '''
# if xnew is not None:
# self.x[:, self.K:self.K+nbnew] = xnew
# self.x, idx = self.x.sort()
# self.h[:, self.K:self.K+nbnew] = hnew
# self.h = self.h.gather(1, idx)
# self.hprime[:, self.K:self.K+nbnew] = hprimenew
# self.hprime = self.hprime.gather(1, idx)
# self.K += xnew.size(-1)
self.z = torch.zeros(self.B, self.K + 1).to(self.device)
self.z[:, 0] = self.lb; self.z[:, self.K] = self.ub
self.z[:, 1:self.K] = (diff(self.h[:, :self.K]) - diff(self.x[:, :self.K] * self.hprime[:, :self.K])) / -diff(self.hprime[:, :self.K])
idx = [0]+list(range(self.K))
self.u = self.h[:, idx] + self.hprime[:, idx] * (self.z-self.x[:, idx])
self.s = diff(torch.exp(self.u)) / self.hprime[:, :self.K]
self.s[self.hprime[:, :self.K] == 0.] = 0. # should be 0 when gradient is 0
self.cs = torch.cat((torch.zeros(self.B, 1).to(self.device), torch.cumsum(self.s, dim=-1)), dim=-1)
self.cu = self.cs[:, -1]
def sampleUpper(self, shape=torch.Size()):
'''
Return a single value randomly sampled from the upper hull and index of segment
'''
u = torch.rand(self.B, *shape).to(self.device)
i = (self.cs/self.cu.unsqueeze(-1)).unsqueeze(-1) <= u.unsqueeze(1).expand(*self.cs.shape, *shape)
idx = i.sum(1) - 1
xt = self.x.gather(1, idx) + (-self.h.gather(1, idx) + torch.log(self.hprime.gather(1, idx)*(self.cu.unsqueeze(-1)*u - self.cs.gather(1, idx)) +
torch.exp(self.u.gather(1, idx)))) / self.hprime.gather(1, idx)
return xt, idx