diff --git a/H2HGCN/h2hgcn.py b/H2HGCN/h2hgcn.py
index ce5b879408717446eb8481839d7af23a09136650..4d92fa94092b6f9e1ba716be17264f30d4765d7e 100644
--- a/H2HGCN/h2hgcn.py
+++ b/H2HGCN/h2hgcn.py
@@ -45,8 +45,8 @@ class H2HGCN:
                 seed=42,
                 log_freq=1,
                 eval_freq=1,
-                val_prop=.2,
-                test_prop=0.3,
+                val_prop=0.15,
+                test_prop=0.15,
                 double_precision=0,
                 dropout=0.1,
                 normalize_adj=False,
diff --git a/HGCAE/hgcae.py b/HGCAE/hgcae.py
index e113fd08266c92747dc5ad9dfad6dbab6536a513..614885ff479917f83335decb0a7ceb0a90e54a14 100644
--- a/HGCAE/hgcae.py
+++ b/HGCAE/hgcae.py
@@ -6,7 +6,7 @@ import os
 import time
 from Ghypeddings.HGCAE.utils.train_utils import get_dir_name, format_metrics
 from Ghypeddings.HGCAE.utils.data_utils import process_data
-from Ghypeddings.HGCAE.utils.train_utils import create_args , get_classifier,get_clustering_algorithm,get_anomaly_detection_algorithm
+from Ghypeddings.HGCAE.utils.train_utils import create_args , get_classifier ,get_clustering_algorithm,get_anomaly_detection_algorithm
 import Ghypeddings.HGCAE.optimizers as optimizers
 from Ghypeddings.HGCAE.utils.data_utils import sparse_mx_to_torch_sparse_tensor
 
@@ -26,7 +26,7 @@ class HGCAE(object):
                 grad_clip=None,
                 optimizer='RiemannianAdam',
                 weight_decay=0.01,
-                lr=0.01,
+                lr=0.001,
                 gamma=0.5,
                 lr_reduce_freq=500,
                 cuda=0,
@@ -34,9 +34,9 @@ class HGCAE(object):
                 min_epochs=50,
                 patience=None,
                 seed=42,
-                log_freq=0,
+                log_freq=1,
                 eval_freq=1,
-                val_prop=.2,
+                val_prop=0.0002,
                 test_prop=0.3,
                 double_precision=0,
                 dropout=0.1,
@@ -134,6 +134,7 @@ class HGCAE(object):
             self.optimizer.zero_grad()
             embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
             train_metrics = self.model.compute_metrics(embeddings, self.data, 'train', epoch)
+            print(train_metrics)
             train_metrics['loss'].backward()
             if self.args.grad_clip is not None:
                 max_norm = float(self.args.grad_clip)
@@ -162,40 +163,41 @@ class HGCAE(object):
                 if (epoch + 1) % self.args.eval_freq == 0:
                     self.model.eval()
                     embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
-                    val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
-                    val_losses.append(val_metrics['loss'].item())
-                    if (epoch + 1) % self.args.log_freq == 0:
-                        logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
-                    if self.model.has_improved(best_val_metrics, val_metrics):
-                        self.best_emb = embeddings
-                        best_val_metrics = val_metrics
-                        counter = 0
-                    else:
-                        counter += 1
-                        if counter == self.args.patience and epoch > self.args.min_epochs:
-                            logging.info("Early stopping")
-                            break
+                    #val_metrics = self.model.compute_metrics(embeddings, self.data, 'val')
+                    # val_losses.append(val_metrics['loss'].item())
+                    # if (epoch + 1) % self.args.log_freq == 0:
+                    #     logging.info(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))
+                    # if self.model.has_improved(best_val_metrics, val_metrics):
+                    #     self.best_emb = embeddings
+                    #     best_val_metrics = val_metrics
+                    #     counter = 0
+                    # else:
+                    #     counter += 1
+                    #     if counter == self.args.patience and epoch > self.args.min_epochs:
+                    #         logging.info("Early stopping")
+                    #         break
 
         logging.info("Training Finished!")
         logging.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
 
-        train_idx = np.unique(self.data['train_edges'][:,0].cpu().detach().numpy())
-        val_idx = np.unique(self.data['val_edges'][:,0].cpu().detach().numpy())
-        idx = np.unique(np.concatenate((train_idx,val_idx)))
-        X = self.model.manifold.logmap0(self.best_emb[idx],self.model.encoder.curvatures[-1]).cpu().detach().numpy()
-        y = self.data['labels'].reshape(-1,1)[idx]
-
-        if(self.args.classifier):
-            self.cls = get_classifier(self.args, X,y)
-            acc,f1,recall,precision,roc_auc = calculate_metrics(self.cls,X,y)
-        elif self.args.clusterer:
-            y = y.reshape(-1,)
-            acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,X,y)[6:]
-        elif self.args.anomaly_detector:
-            y = y.reshape(-1,)
-            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,X,y)[6:]
+        # train_idx = np.unique(self.data['train_edges'][:,0].cpu().detach().numpy())
+        # val_idx = np.unique(self.data['val_edges'][:,0].cpu().detach().numpy())
+        # idx = np.unique(np.concatenate((train_idx,val_idx)))
+        # X = self.model.manifold.logmap0(self.best_emb[idx],self.model.encoder.curvatures[-1]).cpu().detach().numpy()
+        # y = self.data['labels'].reshape(-1,1)[idx]
+
+        # if(self.args.classifier):
+        #     self.cls = get_classifier(self.args, X,y)
+        #     acc,f1,recall,precision,roc_auc = calculate_metrics(self.cls,X,y)
+        # elif self.args.clusterer:
+        #     y = y.reshape(-1,)
+        #     acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,X,y)[6:]
+        # elif self.args.anomaly_detector:
+        #     y = y.reshape(-1,)
+        #     acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,X,y)[6:]
         
-        return {'train':train_losses,'best':best_losses,'val':val_losses},acc,f1,recall,precision,roc_auc , time.time() - t_total
+        # return {'train':train_losses,'best':best_losses,'val':val_losses},acc,f1,recall,precision,roc_auc , time.time() - t_total
+        return {'train':train_losses,'best':best_losses,'val':val_losses}, time.time() - t_total
 
     def predict(self):
         self.model.eval()
@@ -209,14 +211,20 @@ class HGCAE(object):
         elif self.args.clusterer:
             labels = labels.reshape(-1,)
             acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,data,labels)[6:]
+        elif self.args.anomaly_detector:
+            labels = labels.reshape(-1,)
+            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,data,labels)[6:]
+        self.tb_embeddings = embeddings
         return val_metrics['loss'].item(),acc,f1,recall,precision,roc_auc
 
                     
-    def save_embeddings(self,directory,prefix):
-        tb_embeddings_euc = self.model.manifold.logmap0(self.best_emb,self.model.encoder.curvatures[-1])
-        for_classification_hyp = np.hstack((self.best_emb.cpu().detach().numpy(),self.data['labels'].reshape(-1,1)))
+    def save_embeddings(self,directory):
+        self.model.eval()
+        embeddings = self.model.encode(self.data['features'], self.adj_train_enc)
+        tb_embeddings_euc = self.model.manifold.logmap0(embeddings,self.model.encoder.curvatures[-1])
+        for_classification_hyp = np.hstack((embeddings.cpu().detach().numpy(),self.data['labels'].reshape(-1,1)))
         for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].reshape(-1,1)))
-        hyp_file_path = os.path.join(directory,f'{prefix}_embeddings_hyp.csv')
-        euc_file_path = os.path.join(directory,f'{prefix}_embeddings_euc.csv')
+        hyp_file_path = os.path.join(directory,'hgcae_embeddings_hyp.csv')
+        euc_file_path = os.path.join(directory,'hgcae_embeddings_euc.csv')
         np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
         np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
\ No newline at end of file
diff --git a/HGCAE/utils/train_utils.py b/HGCAE/utils/train_utils.py
index 41c1eaa1c0b36f53a74856ecc12447bb42364bd2..42026c479f9cbe09851ff95469669c1cd292b1f0 100644
--- a/HGCAE/utils/train_utils.py
+++ b/HGCAE/utils/train_utils.py
@@ -215,5 +215,11 @@ def get_anomaly_detection_algorithm(algorithm,X,y):
         return isolation_forest(X,y)
     elif(algorithm == 'one_class_svm'):
         return one_class_svm(X,y)
+    elif(algorithm == 'dbscan'):
+        return dbscan(X,y)
+    elif(algorithm == 'kmeans'):
+        return kmeans(X,y)
+    elif(algorithm == 'local_outlier_factor'):
+        return local_outlier_factor(X,y)
     else:
         raise NotImplementedError
\ No newline at end of file
diff --git a/HGCN/hgcn.py b/HGCN/hgcn.py
index ac5bffb4b27db6b9f934a44e1a21ba8a12441c8e..84c735f9a3aae0aeb53bf20e2c72fc1ad8762f53 100644
--- a/HGCN/hgcn.py
+++ b/HGCN/hgcn.py
@@ -26,11 +26,11 @@ class HGCN:
                 num_layers=2,
                 bias=True,
                 act='relu',
-                select_manifold='Hyperboloid',
+                select_manifold='Euclidean', #Euclidean , Hyperboloid
                 grad_clip=1.0,
-                optimizer='RiemannianAdam',
+                optimizer='Adam', #Adam , RiemannianAdam
                 weight_decay=0.01,
-                lr=0.009,
+                lr=0.1, #0.009
                 gamma=0.5,
                 lr_reduce_freq=200,
                 cuda=0,
@@ -38,14 +38,14 @@ class HGCN:
                 min_epochs=50,
                 patience=None,
                 seed=42,
-                log_freq=0,
+                log_freq=1,
                 eval_freq=1,
-                val_prop=.2,
-                test_prop=0.3,
+                val_prop=0.15,
+                test_prop=0.15,
                 double_precision=0,
                 dropout=0.1,
                 use_att= True,
-                alpha=0.2,
+                alpha=0.5,
                 local_agg = False,
                 normalize_adj=False,
                 normalize_feats=True
diff --git a/HGCN/layers/layers.py b/HGCN/layers/layers.py
index c7682cfd33b7a7dcd723558c4722e93a92ff4510..c2eeb70eafdda7141eb6047a7ffbb37c14f6a910 100644
--- a/HGCN/layers/layers.py
+++ b/HGCN/layers/layers.py
@@ -23,4 +23,49 @@ class Linear(Module):
         hidden = F.dropout(hidden, self.dropout, training=self.training)
         out = self.act(hidden)
         return out
-    
\ No newline at end of file
+
+
+
+def get_dim_act(args):
+    """
+    Helper function to get dimension and activation at every layer.
+    :param args:
+    :return:
+    """
+    if not args.act:
+        act = lambda x: x
+    else:
+        act = getattr(F, args.act)
+    acts = [act] * (args.num_layers - 1)
+    dims = [args.feat_dim] + ([args.dim] * (args.num_layers - 1))
+    return dims, acts
+
+
+class GraphConvolution(Module):
+    """
+    Simple GCN layer.
+    """
+
+    def __init__(self, in_features, out_features, dropout, act, use_bias):
+        super(GraphConvolution, self).__init__()
+        self.dropout = dropout
+        self.linear = nn.Linear(in_features, out_features, use_bias)
+        self.act = act
+        self.in_features = in_features
+        self.out_features = out_features
+
+    def forward(self, input):
+        x, adj = input
+        hidden = self.linear.forward(x)
+        hidden = F.dropout(hidden, self.dropout, training=self.training)
+        if adj.is_sparse:
+            support = torch.spmm(adj, hidden)
+        else:
+            support = torch.mm(adj, hidden)
+        output = self.act(support), adj
+        return output
+
+    def extra_repr(self):
+        return 'input_dim={}, output_dim={}'.format(
+                self.in_features, self.out_features
+        )
diff --git a/HGCN/models/base_models.py b/HGCN/models/base_models.py
index e9acf7a144744da2739339a436c9212629053479..00f5628c36362d96a94bd153b0e5abfb44ad20ff 100644
--- a/HGCN/models/base_models.py
+++ b/HGCN/models/base_models.py
@@ -30,7 +30,7 @@ class BaseModel(nn.Module):
         if self.manifold.name == 'Hyperboloid':
             args.feat_dim = args.feat_dim + 1
         self.nnodes = args.n_nodes
-        self.encoder = getattr(encoders, 'HGCN')(self.c, args)
+        self.encoder = getattr(encoders, args.model)(self.c, args)
 
     def encode(self, x, adj):
         if self.manifold.name == 'Hyperboloid':
diff --git a/HGCN/models/encoders.py b/HGCN/models/encoders.py
index 344b8dd35f7d76f0783daeddaa6243beb5393680..c82c611b2bae12bbf192813e906daa001eac822f 100644
--- a/HGCN/models/encoders.py
+++ b/HGCN/models/encoders.py
@@ -9,6 +9,8 @@ import Ghypeddings.HGCN.manifolds as manifolds
 import Ghypeddings.HGCN.layers.hyp_layers as hyp_layers
 import Ghypeddings.HGCN.utils.math_utils as pmath
 
+from Ghypeddings.HGCN.layers.layers import GraphConvolution, Linear, get_dim_act
+from Ghypeddings.HGCN.layers.att_layers import GraphAttentionLayer
 
 class Encoder(nn.Module):
     """
@@ -27,6 +29,45 @@ class Encoder(nn.Module):
             output = self.layers.forward(x)
         return output
 
+class GCN(Encoder):
+    """
+    Graph Convolution Networks.
+    """
+
+    def __init__(self, c, args):
+        super(GCN, self).__init__(c)
+        assert args.num_layers > 0
+        dims, acts = get_dim_act(args)
+        gc_layers = []
+        for i in range(len(dims) - 1):
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            gc_layers.append(GraphConvolution(in_dim, out_dim, args.dropout, act, args.bias))
+        self.layers = nn.Sequential(*gc_layers)
+        self.encode_graph = True
+
+class GAT(Encoder):
+    """
+    Graph Attention Networks.
+    """
+
+    def __init__(self, c, args):
+        super(GAT, self).__init__(c)
+        assert args.num_layers > 0
+        dims, acts = get_dim_act(args)
+        gat_layers = []
+        for i in range(len(dims) - 1):
+            in_dim, out_dim = dims[i], dims[i + 1]
+            act = acts[i]
+            assert dims[i + 1] % args.n_heads == 0
+            out_dim = dims[i + 1] // args.n_heads
+            concat = True
+            gat_layers.append(
+                    GraphAttentionLayer(in_dim, out_dim, args.dropout, act, args.alpha, args.n_heads, concat))
+        self.layers = nn.Sequential(*gat_layers)
+        self.encode_graph = True
+
+
 class HGCN(Encoder):
     """
     Hyperbolic-GCN.
diff --git a/HGCN/utils/data_utils.py b/HGCN/utils/data_utils.py
index 3f037648e2dbf1127ac5808707c459fa2d6989d7..5169f98c576ae898769d8d3ffe5f4133283af93f 100644
--- a/HGCN/utils/data_utils.py
+++ b/HGCN/utils/data_utils.py
@@ -64,8 +64,6 @@ def augment(adj, features, normalize_feats=True):
 
 def split_data(labels, val_prop, test_prop, seed):
     np.random.seed(seed)
-    nb_nodes = labels.shape[0]
-    all_idx = np.arange(nb_nodes)
     pos_idx = labels.nonzero()[0]
     neg_idx = (1. - labels).nonzero()[0]
     np.random.shuffle(pos_idx)
diff --git a/HGCN/utils/train_utils.py b/HGCN/utils/train_utils.py
index 6e4385c5c977b1ea47ee9ffb6afe1d7f013c7fcc..296451e4795df11d93ffae8d345da6c1aba740fe 100644
--- a/HGCN/utils/train_utils.py
+++ b/HGCN/utils/train_utils.py
@@ -41,5 +41,7 @@ def create_args(*args):
     parser.add_argument('--local_agg', type=bool, default=args[25])
     parser.add_argument('--normalize_adj', type=bool, default=args[26])
     parser.add_argument('--normalize_feats', type=bool, default=args[27])
+    parser.add_argument('--model', type=str, default='GAT') #GCN, GAT,HGCN
+    parser.add_argument('--n_heads', type=int, default=1) #GCN, GAT,HGCN
     flags, unknown = parser.parse_known_args()
     return flags
\ No newline at end of file
diff --git a/HGNN/hgnn.py b/HGNN/hgnn.py
index c5b1fe60d15606a68e9a83a7d3fc4a8ee5d9a2ad..d8702dec1502ad91bef985a6c3d4045929c17ff5 100644
--- a/HGNN/hgnn.py
+++ b/HGNN/hgnn.py
@@ -33,8 +33,8 @@ class HGNN:
                 seed=42,
                 log_freq=1,
                 eval_freq=1,
-                val_prop=.2,
-                test_prop=0.3,
+                val_prop=0.15,
+                test_prop=0.15,
                 double_precision=0,
                 dropout=0.01,
                 normalize_adj=False,
diff --git a/PVAE/pvae.py b/PVAE/pvae.py
index 86b89f05e9b0cbab91f746f1a1a7a6cfe946c482..b1318928f98252ce80bf76225c45cbc63037d794 100644
--- a/PVAE/pvae.py
+++ b/PVAE/pvae.py
@@ -28,18 +28,18 @@ class PVAE:
                 hidden_dim,
                 num_layers=2,
                 c=1.0,
-                act='leaky_relu',
+                act='relu',
                 lr=0.01,
                 cuda=0,
                 epochs=50,
                 seed=42,
                 eval_freq=1,
-                val_prop=0.2,
+                val_prop=0.,
                 test_prop=0.3,
                 dropout=0.1,
                 beta1=0.9,
                 beta2=.999,
-                K=10,
+                K=1,
                 beta=.2,
                 analytical_kl=True,
                 posterior='WrappedNormal',
@@ -53,7 +53,7 @@ class PVAE:
                 alpha=0.5,
                 classifier=None,
                 clusterer=None,
-                log_freq=0,
+                log_freq=1,
                 normalize_adj=False,
                 normalize_feats=True,
                 anomaly_detector=None
@@ -78,7 +78,6 @@ class PVAE:
 
         # Choosing and saving a random seed for reproducibility
         if self.args.seed == 0: self.args.seed = int(torch.randint(0, 2**32 - 1, (1,)).item())
-        print('seed', self.args.seed)
         torch.manual_seed(self.args.seed)
         np.random.seed(self.args.seed)
         torch.cuda.manual_seed_all(self.args.seed)
@@ -145,7 +144,7 @@ class PVAE:
             if (epoch + 1) % self.args.log_freq == 0:
                 print('====> Epoch: {:03d} Loss: {:.2f} Recon: {:.2f} KL: {:.2f}'.format(epoch, agg['train_loss'][-1], agg['train_recon'][-1], agg['train_kl'][-1]))
 
-            if (epoch + 1) % self.args.eval_freq == 0:
+            if (epoch + 1) % self.args.eval_freq == 0 and self.args.val_prop:
                 self.model.eval()
                 with torch.no_grad():
                     qz_x, px_z, lik, kl, loss , embeddings= self.loss_function(self.model,self.data['idx_val'], self.data['features'],self.data['adj_train'], K=self.args.K, beta=self.args.beta, components=True)
@@ -178,7 +177,7 @@ class PVAE:
             acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,X,y)[6:]
         elif self.args.anomaly_detector:
             y = y.reshape(-1,)
-            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.clusterer,X,y)[6:]
+            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,X,y)[6:]
 
         return {'train':train_losses,'best':best_losses,'val':val_losses},acc,f1,recall,precision,roc_auc,time.time() - t_total
 
@@ -197,16 +196,16 @@ class PVAE:
             acc,f1,recall,precision,roc_auc = get_clustering_algorithm(self.args.clusterer,data,labels)[6:]
         elif self.args.anomaly_detector:
             labels = labels.reshape(-1,)
-            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.clusterer,data,labels)[6:]
-
+            acc,f1,recall,precision,roc_auc = get_anomaly_detection_algorithm(self.args.anomaly_detector,data,labels)[6:]
+        self.tb_embeddings = embeddings[0]
         return abs(tt_loss) , acc, f1 , recall,precision,roc_auc
 
 
-    def save_embeddings(self,directory,prefix):
+    def save_embeddings(self,directory):
         tb_embeddings_euc = self.model.manifold.logmap0(self.tb_embeddings)
         for_classification_hyp = np.hstack((self.tb_embeddings.cpu().detach().numpy(),self.data['labels'].reshape(-1,1).cpu()))
         for_classification_euc = np.hstack((tb_embeddings_euc.cpu().detach().numpy(),self.data['labels'].reshape(-1,1).cpu()))
-        hyp_file_path = os.path.join(directory,f'{prefix}_embeddings_hyp.csv')
-        euc_file_path = os.path.join(directory,f'{prefix}_embeddings_euc.csv')
+        hyp_file_path = os.path.join(directory,'pvae_embeddings_hyp.csv')
+        euc_file_path = os.path.join(directory,'pvae_embeddings_euc.csv')
         np.savetxt(hyp_file_path, for_classification_hyp, delimiter=',')
         np.savetxt(euc_file_path, for_classification_euc, delimiter=',')
diff --git a/PVAE/utils.py b/PVAE/utils.py
index 36d85574d9584cf5b56c9aa160acce357be1ecfe..2f935958fa8d55d60f320c19fc8f1dc8a183ef6a 100644
--- a/PVAE/utils.py
+++ b/PVAE/utils.py
@@ -199,7 +199,8 @@ def split_data(labels, test_prop,val_prop):
                                                                                                    nb_val + nb_test:]
     idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
                                                                                                    nb_val + nb_test:]
-    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+    
+    return idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg, idx_val_pos + idx_val_neg,
 
 def process_data(args, adj,features,labels):
     data = process_data_nc(args,adj,features,labels)
@@ -344,5 +345,11 @@ def get_anomaly_detection_algorithm(algorithm,X,y):
         return isolation_forest(X,y)
     elif(algorithm == 'one_class_svm'):
         return one_class_svm(X,y)
+    elif(algorithm == 'dbscan'):
+        return dbscan(X,y)
+    elif(algorithm == 'kmeans'):
+        return kmeans(X,y,n_clusters=2)
+    elif(algorithm == 'local_outlier_factor'):
+        return local_outlier_factor(X,y)
     else:
         raise NotImplementedError
\ No newline at end of file
diff --git a/Poincare/poincare.py b/Poincare/poincare.py
index c2eeecc00b27433a1f35d964619dd5d230841a21..018bf5b4053a4d26b1051d4df15598e8b95cfacc 100644
--- a/Poincare/poincare.py
+++ b/Poincare/poincare.py
@@ -31,12 +31,13 @@ class POINCARE:
                 seed=42,
                 log_freq=1,
                 eval_freq=1,
-                val_prop=0.2,
-                test_prop=0.3,
+                val_prop=0.15,
+                test_prop=0.15,
                 double_precision=0,
                 dropout=0.01,
                 normalize_adj=False,
                 normalize_feats=True):
+        
         self.args = create_args(dim,grad_clip,weight_decay,lr,gamma,lr_reduce_freq,cuda,epochs,min_epochs,patience,seed,log_freq,eval_freq,val_prop,test_prop,double_precision,dropout,normalize_adj,normalize_feats)
         self.args.n_nodes = adj.shape[0]
         self.args.feat_dim = features.shape[1]
diff --git a/README.md b/README.md
index f8a1bea3df8f0b83f20034c24584eb4d8bf4243b..929877df60fb0bf94e6a5483c54afbd94e57fde9 100644
--- a/README.md
+++ b/README.md
@@ -41,10 +41,10 @@ In this library, we provide a variety of binary classifiers, clustering algorith
 
 The following intrusion detection datasets were used to test and evaluate the models. Our code includes all the pre-processing steps required to convert these datasets from tabular format into graphs. Due to usage restrictions, this library provides only a single graph of each dataset, with 5,000 nodes, already pre-processed and normalized.
 
-| Name            | Features | Used features  | Hyperbolicity | Ref   |
-|-----------------|----------|----------------|---------------|-------|
-| CIC-DDoS2019    | 80       | 76             | 1.0           | [7]   |
-| AWID3           | Cell 5   | Cell 6         | Cell 7        |       |
+| Name            | Ref   |
+|-----------------|-------|
+| CIC-DDoS2019    | [7]   |
+| AWID3           |       |
 
 
 
diff --git a/__init__.py b/__init__.py
index de2f3397b350d33db935f55fb55b45efa4a65367..37936737e7ef84fa8bc1299b9dbd2053d56c48b3 100644
--- a/__init__.py
+++ b/__init__.py
@@ -6,9 +6,9 @@ from Ghypeddings.Poincare.poincare import POINCARE
 from Ghypeddings.PVAE.pvae import PVAE
 
 from Ghypeddings.datasets.datasets import CIC_DDoS2019
-from Ghypeddings.datasets.datasets import CIC_IDS2018
-from Ghypeddings.datasets.datasets import UNSW_NB15
+from Ghypeddings.datasets.datasets import NF_CIC_IDS2018_v2
+from Ghypeddings.datasets.datasets import NF_UNSW_NB15_v2
 from Ghypeddings.datasets.datasets import Darknet
 from Ghypeddings.datasets.datasets import AWID3
-from Ghypeddings.datasets.datasets import TON_IoT
-from Ghypeddings.datasets.datasets import BOT_IoT
\ No newline at end of file
+from Ghypeddings.datasets.datasets import NF_TON_IoT_v2
+from Ghypeddings.datasets.datasets import NF_BOT_IoT_v2
\ No newline at end of file
diff --git a/anomaly_detection/__init__.py b/anomaly_detection/__init__.py
index 5fc56d706bb25fdf9eda05d19bfd06260f617f21..41092b1e9bc9d3fa72849637f8a78dcda3a06fcf 100644
--- a/anomaly_detection/__init__.py
+++ b/anomaly_detection/__init__.py
@@ -1,2 +1,5 @@
 from Ghypeddings.anomaly_detection.isolation_forest import isolation_forest
-from Ghypeddings.anomaly_detection.one_class_svm import one_class_svm
\ No newline at end of file
+from Ghypeddings.anomaly_detection.one_class_svm import one_class_svm
+from Ghypeddings.anomaly_detection.dbscan import dbscan
+from Ghypeddings.anomaly_detection.kmeans import kmeans
+from Ghypeddings.anomaly_detection.local_outlier_factor import local_outlier_factor
\ No newline at end of file
diff --git a/anomaly_detection/dbscan.py b/anomaly_detection/dbscan.py
new file mode 100644
index 0000000000000000000000000000000000000000..00bc3d669316442b5edb4595f65872f49361a755
--- /dev/null
+++ b/anomaly_detection/dbscan.py
@@ -0,0 +1,9 @@
+from sklearn.cluster import DBSCAN
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+
+
+def dbscan(X,y):
+    dbscan = DBSCAN(eps=0.5, min_samples=5)
+    labels = dbscan.fit_predict(X)
+    outliers = labels == -1
+    return calculate_metrics(y,outliers)
diff --git a/anomaly_detection/isolation_forest.py b/anomaly_detection/isolation_forest.py
index f8c81c7809b0cadccb1c86fe6adda2be5cdfa95c..52ea90463b1026ac8d482f240f9bb5b4a64219d4 100644
--- a/anomaly_detection/isolation_forest.py
+++ b/anomaly_detection/isolation_forest.py
@@ -3,9 +3,10 @@ from Ghypeddings.anomaly_detection.utils import calculate_metrics
 
 from sklearn.ensemble import IsolationForest
 
-def isolation_forest(X,y,anomalies_percentage = 0.5):
+def isolation_forest(X,y,anomalies_percentage = 0.1):
     model = IsolationForest(contamination=anomalies_percentage)
     model.fit(X)
     y_pred = model.predict(X)
-    y_pred[y_pred == -1]=0
+    y_pred[y_pred == 1] = 0
+    y_pred[y_pred == -1]= 1
     return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/anomaly_detection/kmeans.py b/anomaly_detection/kmeans.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5fbfc8343bda122043d33743def26ba18dfcd5c
--- /dev/null
+++ b/anomaly_detection/kmeans.py
@@ -0,0 +1,12 @@
+from sklearn.cluster import KMeans
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+import numpy as np
+
+def kmeans(X,y,n_clusters,outlier_percentage=.1):
+    model = KMeans(n_clusters=n_clusters)
+    model.fit(X)
+    # y_pred = model.predict(X)
+    distances = model.transform(X).min(axis=1)
+    threshold = np.percentile(distances, 100 * (1 - outlier_percentage))
+    outliers = distances > threshold
+    return calculate_metrics(y,outliers)
\ No newline at end of file
diff --git a/anomaly_detection/local_outlier_factor.py b/anomaly_detection/local_outlier_factor.py
new file mode 100644
index 0000000000000000000000000000000000000000..36caa7022fafb9f826a7e3200d0b637fb9cf7679
--- /dev/null
+++ b/anomaly_detection/local_outlier_factor.py
@@ -0,0 +1,10 @@
+from sklearn.neighbors import LocalOutlierFactor
+from Ghypeddings.anomaly_detection.utils import calculate_metrics
+import numpy as np
+
+def local_outlier_factor(X,y,n_neighbors=20,outlier_percentage=.1):
+    lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=outlier_percentage)
+    y_pred = lof.fit_predict(X)
+    y_pred[y_pred == 1] = 0
+    y_pred[y_pred == -1] = 1
+    return calculate_metrics(y,y_pred)
\ No newline at end of file
diff --git a/classifiers/random_forest.py b/classifiers/random_forest.py
index 1e044f3bdee8aab4377cf55fb9a8b050c03e6caf..24c10c46fe8c1c0d507f0cf621e90c0f642481ae 100644
--- a/classifiers/random_forest.py
+++ b/classifiers/random_forest.py
@@ -1,5 +1,5 @@
 from sklearn.ensemble import RandomForestClassifier
 
-def random_forest(X,y,seed,n_estimators=2,max_depth=2,max_features=None):
+def random_forest(X,y,seed,n_estimators=10,max_depth=10,max_features='log2'):
     clf = RandomForestClassifier(max_features=max_features,n_estimators=n_estimators, max_depth=max_depth, random_state=seed)
     return clf.fit(X, y)
\ No newline at end of file
diff --git a/clusterers/kmeans.py b/clusterers/kmeans.py
index 59605e7ef034d6ca970e073582ee11f6b5aebbef..848fef469ae29d55cdaf097e4a8df8057f89e2d4 100644
--- a/clusterers/kmeans.py
+++ b/clusterers/kmeans.py
@@ -3,7 +3,7 @@ from Ghypeddings.clusterers.utils import calculate_metrics
 from sklearn.cluster import KMeans
 
 
-def kmeans(X,y,n_clusters=5,n_init=10):
+def kmeans(X,y,n_clusters=2,n_init=10):
     model = KMeans(n_clusters=n_clusters,n_init=n_init)
     model.fit(X)
     y_pred = model.labels_
diff --git a/datasets/.gitignore b/datasets/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..d22b9a22608a3bacbc730c3ad7c080f98c9ead54
--- /dev/null
+++ b/datasets/.gitignore
@@ -0,0 +1,3 @@
+outlier_datasets.py
+
+repetition_datasets.py
\ No newline at end of file
diff --git a/datasets/datasets.py b/datasets/datasets.py
index 857e3a9ce7152c9156c92f0e998eed315b2d9051..db7cd2c8d4b13fe88533e9cfb6219fd5afbcc2c1 100644
--- a/datasets/datasets.py
+++ b/datasets/datasets.py
@@ -111,7 +111,7 @@ class NetFlowDataset(Dataset):
         self.file = file
 
     def build(self,n_nodes,n_classes=2):
-        df = pd.read_csv(self.file)  
+        df = pd.read_csv(self.file) 
         df = df.groupby(['Label']).apply(lambda x: x.sample(int(n_nodes/n_classes))).reset_index(drop=True) 
         df = df.sample(frac=1).reset_index(drop=True)
         adj = self._filling_adjacency_numpy(df)
@@ -152,7 +152,7 @@ class NetFlowDataset(Dataset):
         adjacency[mask] = True
         return adjacency
 
-class CIC_IDS2018(NetFlowDataset):
+class NF_CIC_IDS2018_v2(NetFlowDataset):
     def __init__(self):
         super().__init__(
             features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','features.pkl'),
@@ -161,7 +161,7 @@ class CIC_IDS2018(NetFlowDataset):
             file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','original','cic_ids2018.csv')
         )   
 
-class UNSW_NB15(NetFlowDataset):
+class NF_UNSW_NB15_v2(NetFlowDataset):
     def __init__(self):
         super().__init__(
             features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','UNSW_NB15','features.pkl'),
@@ -198,7 +198,6 @@ class Darknet(Dataset):
         df.drop(columns_to_exclude, axis=1, inplace=True)
         features = df.to_numpy()
         self.save_samples(adj,features,labels)
-        print('features:',features.shape)
         return adj,features,labels
     
     def _filling_adjacency_numpy(self,data,source_ip_index, destination_ip_index):
@@ -213,7 +212,7 @@ class Darknet(Dataset):
         adjacency[mask] = True
         return adjacency
 
-class BOT_IoT(NetFlowDataset):
+class NF_BOT_IoT_v2(NetFlowDataset):
     def __init__(self):
         super().__init__(
             features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','features.pkl'),
@@ -222,7 +221,7 @@ class BOT_IoT(NetFlowDataset):
             file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','original','bot_iot.csv')
         )
 
-class TON_IoT(NetFlowDataset):
+class NF_TON_IoT_v2(NetFlowDataset):
     def __init__(self):
         # directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','original'),
         super().__init__(
@@ -274,7 +273,7 @@ class AWID3(Dataset):
                 if(df[c].dtype == 'object' and c!='radiotap.dbm_antsignal'):
                     print(c,df[c].unique(),len(df[c].unique()))
         df.drop(columns=alone,axis=1,inplace=True)
-        df['radiotap.dbm_antsignal'] = df['radiotap.dbm_antsignal'].apply(self._config_signal)
+        df['radiotap.dbm_antsignal'] = df['radiotap.dbm_antsignal'].apply(self._config_signal) # It contains a list
         labels = df['Label_1'].to_numpy()
         adj = self._filling_adjacency_numpy(data)
         df.drop(columns=['frame.time_delta','Label_1'],axis=1,inplace=True)
diff --git a/datasets/examples/AWID3/adjacency.pkl b/datasets/examples/AWID3/adjacency.pkl
deleted file mode 100644
index a5c68577c90860acf85655f1d89e74cf3d462728..0000000000000000000000000000000000000000
Binary files a/datasets/examples/AWID3/adjacency.pkl and /dev/null differ
diff --git a/datasets/examples/AWID3/features.pkl b/datasets/examples/AWID3/features.pkl
deleted file mode 100644
index ca50813bf5fa740f9601769af166bf8e64173b35..0000000000000000000000000000000000000000
Binary files a/datasets/examples/AWID3/features.pkl and /dev/null differ
diff --git a/datasets/examples/AWID3/labels.pkl b/datasets/examples/AWID3/labels.pkl
deleted file mode 100644
index 19c62897db47f98b6a9ecb913a78eaf9837ffad6..0000000000000000000000000000000000000000
Binary files a/datasets/examples/AWID3/labels.pkl and /dev/null differ
diff --git a/datasets/examples/BOT_IOT/adjacency.pkl b/datasets/examples/BOT_IOT/adjacency.pkl
deleted file mode 100644
index 41d152eba638d00fe1910f6f08c6c2002c422564..0000000000000000000000000000000000000000
Binary files a/datasets/examples/BOT_IOT/adjacency.pkl and /dev/null differ
diff --git a/datasets/examples/BOT_IOT/features.pkl b/datasets/examples/BOT_IOT/features.pkl
deleted file mode 100644
index b7150b31c3ca033c31801862ab1180ce3eb9ffac..0000000000000000000000000000000000000000
Binary files a/datasets/examples/BOT_IOT/features.pkl and /dev/null differ
diff --git a/datasets/examples/BOT_IOT/labels.pkl b/datasets/examples/BOT_IOT/labels.pkl
deleted file mode 100644
index fc0994e3acb87f0c08963108b62dac185d310024..0000000000000000000000000000000000000000
Binary files a/datasets/examples/BOT_IOT/labels.pkl and /dev/null differ
diff --git a/datasets/examples/CICDDoS2019/adjacency.pkl b/datasets/examples/CICDDoS2019/adjacency.pkl
deleted file mode 100644
index fb90ecc0ac2651f2ebe51c6e08200173db2fc3fa..0000000000000000000000000000000000000000
Binary files a/datasets/examples/CICDDoS2019/adjacency.pkl and /dev/null differ
diff --git a/datasets/examples/CICDDoS2019/features.pkl b/datasets/examples/CICDDoS2019/features.pkl
deleted file mode 100644
index 62bec0a41cfa78ae0522fde10b22aa7a96b80dbc..0000000000000000000000000000000000000000
Binary files a/datasets/examples/CICDDoS2019/features.pkl and /dev/null differ
diff --git a/datasets/examples/CICDDoS2019/labels.pkl b/datasets/examples/CICDDoS2019/labels.pkl
deleted file mode 100644
index e544bb17247e5ba8c6f12b5e975b3f302e815ed0..0000000000000000000000000000000000000000
Binary files a/datasets/examples/CICDDoS2019/labels.pkl and /dev/null differ
diff --git a/datasets/examples/CIC_IDS2018/adjacency.pkl b/datasets/examples/CIC_IDS2018/adjacency.pkl
deleted file mode 100644
index a5b645030e7ddde2597ae85a9f40b0ae28b944b9..0000000000000000000000000000000000000000
Binary files a/datasets/examples/CIC_IDS2018/adjacency.pkl and /dev/null differ
diff --git a/datasets/examples/CIC_IDS2018/features.pkl b/datasets/examples/CIC_IDS2018/features.pkl
deleted file mode 100644
index f38c60cce71eb09a0c26b63fc2be001d3bee7b5f..0000000000000000000000000000000000000000
Binary files a/datasets/examples/CIC_IDS2018/features.pkl and /dev/null differ
diff --git a/datasets/examples/CIC_IDS2018/labels.pkl b/datasets/examples/CIC_IDS2018/labels.pkl
deleted file mode 100644
index f522aee22d8ab127fd585443b0036b43d1b67ce6..0000000000000000000000000000000000000000
Binary files a/datasets/examples/CIC_IDS2018/labels.pkl and /dev/null differ
diff --git a/datasets/examples/Darknet/adjacency.pkl b/datasets/examples/Darknet/adjacency.pkl
deleted file mode 100644
index 17e3b4d1cb240f0f5d6e5a7cc0eaf0236d43c0da..0000000000000000000000000000000000000000
Binary files a/datasets/examples/Darknet/adjacency.pkl and /dev/null differ
diff --git a/datasets/examples/Darknet/features.pkl b/datasets/examples/Darknet/features.pkl
deleted file mode 100644
index e9b20fe5f12fed056ee107dbeb727de4fd38045a..0000000000000000000000000000000000000000
Binary files a/datasets/examples/Darknet/features.pkl and /dev/null differ
diff --git a/datasets/examples/Darknet/labels.pkl b/datasets/examples/Darknet/labels.pkl
deleted file mode 100644
index 66930070c334ac355b0d98a994ca4780fac9bfad..0000000000000000000000000000000000000000
Binary files a/datasets/examples/Darknet/labels.pkl and /dev/null differ
diff --git a/datasets/examples/TON_IOT/adjacency.pkl b/datasets/examples/TON_IOT/adjacency.pkl
deleted file mode 100644
index af0f05afb3ab0644b3a1f90e1ded51ec3a5d2299..0000000000000000000000000000000000000000
Binary files a/datasets/examples/TON_IOT/adjacency.pkl and /dev/null differ
diff --git a/datasets/examples/TON_IOT/features.pkl b/datasets/examples/TON_IOT/features.pkl
deleted file mode 100644
index 5da2741d9aa45cd0e33148deb84e5a594a943eb1..0000000000000000000000000000000000000000
Binary files a/datasets/examples/TON_IOT/features.pkl and /dev/null differ
diff --git a/datasets/examples/TON_IOT/labels.pkl b/datasets/examples/TON_IOT/labels.pkl
deleted file mode 100644
index e25710708f915cd06907902fce4dc9bd80c4937d..0000000000000000000000000000000000000000
Binary files a/datasets/examples/TON_IOT/labels.pkl and /dev/null differ
diff --git a/datasets/examples/UNSW_NB15/adjacency.pkl b/datasets/examples/UNSW_NB15/adjacency.pkl
deleted file mode 100644
index f28432bfb630f092a824a2001611062582babdba..0000000000000000000000000000000000000000
Binary files a/datasets/examples/UNSW_NB15/adjacency.pkl and /dev/null differ
diff --git a/datasets/examples/UNSW_NB15/features.pkl b/datasets/examples/UNSW_NB15/features.pkl
deleted file mode 100644
index 7322f3a5730b2a77f0df3c0c5b47b0d8d65e62c4..0000000000000000000000000000000000000000
Binary files a/datasets/examples/UNSW_NB15/features.pkl and /dev/null differ
diff --git a/datasets/examples/UNSW_NB15/labels.pkl b/datasets/examples/UNSW_NB15/labels.pkl
deleted file mode 100644
index de373d5412775e6f7d186578fa1790b46751e1f1..0000000000000000000000000000000000000000
Binary files a/datasets/examples/UNSW_NB15/labels.pkl and /dev/null differ
diff --git a/datasets/examples/UNSW_NB15/original/.gitignore b/datasets/examples/UNSW_NB15/original/.gitignore
deleted file mode 100644
index f59ec20aabf5842d237244ece8c81ab184faeac1..0000000000000000000000000000000000000000
--- a/datasets/examples/UNSW_NB15/original/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*
\ No newline at end of file
diff --git a/datasets/outlier_datasets.py b/datasets/outlier_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..10c13045e61c03151708b4fd59779543b47047dd
--- /dev/null
+++ b/datasets/outlier_datasets.py
@@ -0,0 +1,314 @@
+import os
+
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.preprocessing import StandardScaler
+import pickle
+from sklearn.preprocessing import LabelEncoder
+import time
+import datetime
+import category_encoders as ce
+
+class Dataset:
+    def __init__(self,features_path='',adj_path='',labels_path='',directory=''):
+        self.features_path = features_path
+        self.adj_path = adj_path
+        self.labels_path = labels_path
+        self.directory = directory
+
+    def _get_files(self):
+        return [os.path.join(self.directory,file) for file in os.listdir(self.directory) if os.path.isfile(os.path.join(self.directory, file)) and '.gitignore' not in file]
+
+    def save_samples(self,adj,features,labels):
+
+        with open(self.adj_path,'wb') as f:
+            pickle.dump(adj,f)
+        with open(self.features_path,'wb') as f:
+            pickle.dump(features,f)
+        with open(self.labels_path,'wb') as f:
+            pickle.dump(labels,f)
+
+    def load_samples(self):
+        with open(self.adj_path,'rb') as f:
+            adj = pickle.load(f)
+        with open(self.features_path,'rb') as f:
+            features = pickle.load(f)
+        with open(self.labels_path,'rb') as f:
+            labels = pickle.load(f)
+        print('features:',features.shape,'adj',adj.shape,'labels',labels.shape)
+        return adj,features,labels
+
+class CIC_DDoS2019(Dataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','CICDDoS2019','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','CICDDoS2019','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','CICDDoS2019','labels.pkl'),
+            directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','original')
+        )
+
+    def build(self,n_nodes,n_classes=2):
+        df = self._create_file_bc(n_nodes,n_classes)
+        for column in df.columns:
+            max_value = df.loc[df[column] != np.inf, column].max()
+            min_value = df.loc[df[column] != -np.inf, column].min()
+            df.loc[df[column] == np.inf, column] = max_value
+            df.loc[df[column] == -np.inf, column] = min_value
+        adj = self._filling_adjacency_numpy(df)
+        labels = df[' Label'].apply(lambda x: 0 if x == 'BENIGN' else 1).to_numpy()
+        columns_to_exclude = ['Unnamed: 0', 'Flow ID', ' Source IP',' Source Port',' Destination Port',' Flow Duration',' Protocol', ' Destination IP', ' Timestamp', 'SimillarHTTP',' Inbound',' Label']
+        df.drop(columns_to_exclude, axis=1, inplace=True)
+        features = df.to_numpy()
+        scaler = MinMaxScaler()
+        features = scaler.fit_transform(features)
+        self.save_samples(adj,features,labels)
+        return adj, features, labels
+    
+    def _load_file(self,path,max_per_class,list_classes=[]):
+        df = pd.read_csv(path,low_memory=False)
+        df.dropna(axis=0, inplace=True)
+        normal_df = df[df[' Label'] == 'BENIGN']
+        if(len(list_classes)):
+            df = df[df[' Label'].isin(list_classes)]
+            df = df.groupby([' Label']).apply(lambda x: x.sample(max_per_class)).reset_index(drop=True)
+        return df , normal_df
+        
+    def _create_file_bc(self,n_nodes,n_classes):
+        outlier_percentage = .1
+        file_paths = self._get_files()
+        max_per_class = int(n_nodes * outlier_percentage /  len(file_paths)) +1
+        df_list = []
+        benign_df = pd.DataFrame([])
+        for path in file_paths:
+            class_name = path.split('\\')[-1].split('.')[0]
+            tmp = self._load_file(path,max_per_class,[class_name])
+            df_list.append(tmp[0])
+            benign_df = pd.concat([benign_df,tmp[1]],ignore_index=True)
+            print('finishing loading the file : {}'.format(path))
+        df = pd.concat(df_list,ignore_index=True)
+        print(df.shape)
+        print(benign_df.shape)
+        benign_df = benign_df.sample(n=int(n_nodes * (1-outlier_percentage))).reset_index(drop=True)
+        print(benign_df.shape)
+        df = pd.concat([benign_df,df],ignore_index=True)
+        print(df.shape)
+        df = df.sample(n=n_nodes).reset_index(drop=True)
+        print(df.shape)
+        # print(df[' Label'].value_counts())
+        # df = pd.read_csv(os.path.join(self.directory,'all.csv'),low_memory=False)
+        # df[' Label'] = df[' Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)
+        # node_per_class = int(n_nodes/n_classes)
+        # df = df.groupby([' Label']).apply(lambda x: x.sample(node_per_class)).reset_index(drop=True)
+        return df
+
+    def _filling_adjacency_numpy(self,data):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+
+        source_ips = data[' Source IP'].to_numpy()
+        destination_ips = data[' Destination IP'].to_numpy()
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips)| (destination_ips[:, np.newaxis] == destination_ips) )
+        adjacency[mask] = True
+        return adjacency
+
+class NetFlowDataset(Dataset):
+    def __init__(self,features_path,adj_path,labels_path,file):
+        super().__init__(features_path,adj_path,labels_path)
+        self.file = file
+
+    def build(self,n_nodes):
+        outlier_percentage = .1
+        df = pd.read_csv(self.file)
+        df = df.groupby(['Label']).apply(lambda x: x.sample(int(n_nodes * (1-outlier_percentage))) if pd.unique(x['Label'])[0] == 0 else x.sample(int(n_nodes * outlier_percentage))).reset_index(drop=True) 
+        df = df.sample(frac=1).reset_index(drop=True)
+        print(df['Label'].value_counts())
+        adj = self._filling_adjacency_numpy(df)
+        labels = df['Label'].to_numpy()
+        labels = labels.astype(np.bool_)
+        df.drop(['IPV4_SRC_ADDR','IPV4_DST_ADDR','Attack','Label','L4_SRC_PORT','L4_DST_PORT'],axis=1,inplace=True)
+        #df = pd.get_dummies(df,columns=['PROTOCOL','DNS_QUERY_TYPE','FTP_COMMAND_RET_CODE'])
+
+        encoder = ce.TargetEncoder(cols=['TCP_FLAGS','L7_PROTO','PROTOCOL'])
+        encoder.fit(df,labels)
+        df = encoder.transform(df)
+ 
+        features = df.to_numpy()
+        scaler = MinMaxScaler()
+        features = scaler.fit_transform(features)
+        print("features:",features.shape)
+        self.save_samples(adj,features,labels)
+        return adj,features,labels
+
+    def _filling_adjacency_numpy(self,data):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+
+        if 'bot_iot' in self.file:
+            data['IPV4_SRC_ADDR'] = data['IPV4_SRC_ADDR'].apply(str)
+            data['IPV4_DST_ADDR'] = data['IPV4_DST_ADDR'].apply(str)
+            data['L4_SRC_PORT'] = data['L4_SRC_PORT'].apply(str)
+            data['L4_DST_PORT'] = data['L4_DST_PORT'].apply(str)
+            data['IPV4_SRC_ADDR'] = data['IPV4_SRC_ADDR']+':'+data['L4_SRC_PORT']
+            data['IPV4_DST_ADDR'] = data['IPV4_DST_ADDR']+':'+data['L4_DST_PORT']
+
+        source_ips = data['IPV4_SRC_ADDR'].to_numpy()
+        destination_ips = data['IPV4_DST_ADDR'].to_numpy()
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        return adjacency
+
+class NF_CIC_IDS2018_v2(NetFlowDataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','CIC_IDS2018','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','CIC_IDS2018','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','CIC_IDS2018','labels.pkl'),
+            file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','original','cic_ids2018.csv')
+        )   
+
+class NF_UNSW_NB15_v2(NetFlowDataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','UNSW_NB15','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','UNSW_NB15','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','UNSW_NB15','labels.pkl'),
+            file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'UNSW_NB15','original','unsw_nb15.csv')
+        )
+
+class Darknet(Dataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','Darknet','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','Darknet','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','Darknet','labels.pkl')
+        )
+        self.file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','Darknet','original','Darknet.csv')
+
+    def _to_binary_classification(self,x):
+        if 'Non' in x:
+            return 0
+        else:
+            return 1
+
+    def build(self,n_nodes,n_classes=2):
+        df = pd.read_csv(self.file)
+        df.dropna(axis=0, inplace=True)
+        df['Label'] = df['Label'].apply(self._to_binary_classification)
+        df = df.groupby(['Label']).apply(lambda x: x.sample(int(n_nodes/n_classes))).reset_index(drop=True)
+        df = df.sample(n=n_nodes).reset_index(drop=True)
+        data = df.to_numpy()
+        adj = self._filling_adjacency_numpy(data,1,3)
+        labels = df['Label'].to_numpy()
+        columns_to_exclude = ['Flow ID', 'Src IP','Src Port', 'Dst IP','Dst Port', 'Timestamp','Label','Label.1','Protocol','Flow Duration']
+        df.drop(columns_to_exclude, axis=1, inplace=True)
+        features = df.to_numpy()
+        self.save_samples(adj,features,labels)
+        print('features:',features.shape)
+        return adj,features,labels
+    
+    def _filling_adjacency_numpy(self,data,source_ip_index, destination_ip_index):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+        source_ips = data[:, source_ip_index]
+        destination_ips = data[:, destination_ip_index]
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips))
+        adjacency[mask] = True
+        return adjacency
+
+class NF_BOT_IoT_v2(NetFlowDataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','BOT_IOT','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','BOT_IOT','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','BOT_IOT','labels.pkl'),
+            file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','original','bot_iot.csv')
+        )
+
+class NF_TON_IoT_v2(NetFlowDataset):
+    def __init__(self):
+        # directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','TON_IOT','original'),
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','TON_IOT','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','TON_IOT','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','TON_IOT','labels.pkl'),
+            file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','original','ton_iot.csv')
+        )
+
+class AWID3(Dataset):
+    def __init__(self):
+        super().__init__(
+            features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','AWID3','features.pkl'),
+            adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','AWID3','adjacency.pkl'),
+            labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'outlier','AWID3','labels.pkl'),
+            directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples', 'AWID3','original')
+        )
+
+    def _config_signal(self,x):
+        words = str(x).split('-')
+        return np.mean([float(i)*-1 for i in words if i!=''])
+    
+    def build(self,n_nodes):
+        outlier_percentage = .1
+        path = os.path.join(os.getcwd(),'Ghypeddings','datasets','examples','AWID3','original','awid3.csv')
+        df = pd.read_csv(path)
+        df['Label'] = df['Label'].apply(lambda x: 0 if 'Normal' in x else 1)
+        df = df.groupby(['Label']).apply(lambda x: x.sample(int(n_nodes*(1-outlier_percentage))) if pd.unique(x['Label'])[0] == 0 else  x.sample(int(n_nodes*outlier_percentage)) ).reset_index(drop=True)
+        print(df['Label'].value_counts())
+        df = df.sample(frac=1).reset_index(drop=True)
+        data=df[['ip.src','ip.dst']]
+        df.dropna(axis=1, inplace=True)
+        to_drop = ['frame.number','frame.time','radiotap.timestamp.ts','frame.time_delta_displayed','frame.time_epoch','frame.time_relative','wlan.duration','wlan.ra']
+        df.drop(columns=to_drop,axis=1,inplace=True)
+        alone = []
+        for c in df.columns:
+            if(len(df[c].unique()) == 1):
+                alone.append(c)
+            elif len(df[c].unique()) == 2:
+                df = pd.get_dummies(df,columns=[c],drop_first=True)
+            elif len(df[c].unique()) <=8:
+                df = pd.get_dummies(df,columns=[c])
+            elif len(df[c].unique()) <=15:
+                labels = df['Label']
+                df.drop(columns=['Label'],axis=1,inplace=True)
+                encoder = ce.TargetEncoder(cols=[c])
+                encoder.fit(df,labels)
+                df = encoder.transform(df)
+                df['Label']=labels
+            else:
+                if(df[c].dtype == 'object' and c!='radiotap.dbm_antsignal'):
+                    print(c,df[c].unique(),len(df[c].unique()))
+        df.drop(columns=alone,axis=1,inplace=True)
+        df['radiotap.dbm_antsignal'] = df['radiotap.dbm_antsignal'].apply(self._config_signal)
+        labels = df['Label_1'].to_numpy()
+        adj = self._filling_adjacency_numpy(data)
+        df.drop(columns=['frame.time_delta','Label_1'],axis=1,inplace=True)
+        features = df.to_numpy()
+        scaler = StandardScaler()
+        features = scaler.fit_transform(features)
+        # scaler = MinMaxScaler()
+        # features = scaler.fit_transform(features)
+        self.save_samples(adj=adj,features=features,labels=labels)
+        return adj,features,labels
+    
+    def _filling_adjacency_numpy(self,data):
+        N = data.shape[0]
+        try:
+            adjacency = np.zeros((N,N), dtype=bool)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+        source_ips = data['ip.src'].to_numpy()
+        destination_ips = data['ip.dst'].to_numpy()
+        mask = ((source_ips[:, np.newaxis] == source_ips) | (source_ips[:, np.newaxis] == destination_ips) | (destination_ips[:, np.newaxis] == source_ips) | (destination_ips[:, np.newaxis] == destination_ips) )
+        adjacency[mask] = True
+        np.fill_diagonal(adjacency, True)
+        return adjacency
\ No newline at end of file
diff --git a/datasets/test_dataset.py b/datasets/repetition_datasets.py
similarity index 90%
rename from datasets/test_dataset.py
rename to datasets/repetition_datasets.py
index 97c72e89efdca8e20b5d0f25696146b88096ced5..14b125651b8695dbe55af39c24aa011ca5ec4cf6 100644
--- a/datasets/test_dataset.py
+++ b/datasets/repetition_datasets.py
@@ -8,7 +8,6 @@ import pickle
 from sklearn.preprocessing import LabelEncoder
 import time
 import datetime
-import progressbar
 import category_encoders as ce
 
 class Dataset:
@@ -21,10 +20,15 @@ class Dataset:
     def _get_files(self):
         return [os.path.join(self.directory,file) for file in os.listdir(self.directory) if os.path.isfile(os.path.join(self.directory, file)) and '.gitignore' not in file]
 
-    def save_samples(self,adj,features,labels,dim):
-        features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'features_{dim}.pkl')
-        adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'adjacency_{dim}.pkl')
-        labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'labels_{dim}.pkl')
+    def save_samples(self,adj,features,labels,repetition):
+        # features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'features_{repetition}.pkl')
+        # adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'adjacency_{repetition}.pkl')
+        # labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'labels_{repetition}.pkl')
+        
+        features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'features_{repetition}.pkl')
+        adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'adjacency_{repetition}.pkl')
+        labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019',f'labels_{repetition}.pkl')
+
 
         with open(adj_path,'wb') as f:
             pickle.dump(adj,f)
@@ -33,10 +37,10 @@ class Dataset:
         with open(labels_path,'wb') as f:
             pickle.dump(labels,f)
 
-    def load_samples(self,dim):
-        features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','features_{}.pkl'.format(dim))
-        adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','adjacency_{}.pkl'.format(dim))
-        labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','labels_{}.pkl'.format(dim))
+    def load_samples(self,repetition):
+        features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT',f'features_{repetition}.pkl')
+        adj_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT',f'adjacency_{repetition}.pkl')
+        labels_path= os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT',f'labels_{repetition}.pkl')
         with open(adj_path,'rb') as f:
             adj = pickle.load(f)
         with open(features_path,'rb') as f:
@@ -52,7 +56,7 @@ class CIC_DDoS2019(Dataset):
             directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CICDDoS2019','original')
         )
 
-    def build(self,n_nodes,n_classes=2,dim=20):
+    def build(self,n_nodes,n_classes=2,repetition=1):
         df = self._create_file_bc(n_nodes,n_classes)
         for column in df.columns:
             max_value = df.loc[df[column] != np.inf, column].max()
@@ -66,7 +70,7 @@ class CIC_DDoS2019(Dataset):
         features = df.to_numpy()
         scaler = MinMaxScaler()
         features = scaler.fit_transform(features)
-        self.save_samples(adj,features,labels,dim)
+        self.save_samples(adj,features,labels,repetition)
         return adj, features, labels
     
     def _load_file(self,path,max_per_class,list_classes=[]):
@@ -114,8 +118,8 @@ class NetFlowDataset(Dataset):
         super().__init__(features_path,adj_path,labels_path)
         self.file = file
 
-    def build(self,n_nodes,n_classes=2):
-        df = pd.read_csv(self.file)  
+    def build(self,n_nodes,n_classes=2,repetition=1):
+        df = pd.read_csv(self.file)
         df = df.groupby(['Label']).apply(lambda x: x.sample(int(n_nodes/n_classes))).reset_index(drop=True) 
         df = df.sample(frac=1).reset_index(drop=True)
         adj = self._filling_adjacency_numpy(df)
@@ -132,7 +136,7 @@ class NetFlowDataset(Dataset):
         scaler = MinMaxScaler()
         features = scaler.fit_transform(features)
         print("features:",features.shape)
-        self.save_samples(adj,features,labels)
+        self.save_samples(adj,features,labels,repetition)
         return adj,features,labels
 
     def _filling_adjacency_numpy(self,data):
@@ -156,7 +160,7 @@ class NetFlowDataset(Dataset):
         adjacency[mask] = True
         return adjacency
 
-class CIC_IDS2018(NetFlowDataset):
+class NF_CIC_IDS2018_v2(NetFlowDataset):
     def __init__(self):
         super().__init__(
             features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','features.pkl'),
@@ -165,7 +169,7 @@ class CIC_IDS2018(NetFlowDataset):
             file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','CIC_IDS2018','original','cic_ids2018.csv')
         )   
 
-class UNSW_NB15(NetFlowDataset):
+class NF_UNSW_NB15_v2(NetFlowDataset):
     def __init__(self):
         super().__init__(
             features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','UNSW_NB15','features.pkl'),
@@ -189,7 +193,7 @@ class Darknet(Dataset):
         else:
             return 1
 
-    def build(self,n_nodes,n_classes=2):
+    def build(self,n_nodes,n_classes=2,repetition=1):
         df = pd.read_csv(self.file)
         df.dropna(axis=0, inplace=True)
         df['Label'] = df['Label'].apply(self._to_binary_classification)
@@ -201,7 +205,7 @@ class Darknet(Dataset):
         columns_to_exclude = ['Flow ID', 'Src IP','Src Port', 'Dst IP','Dst Port', 'Timestamp','Label','Label.1','Protocol','Flow Duration']
         df.drop(columns_to_exclude, axis=1, inplace=True)
         features = df.to_numpy()
-        self.save_samples(adj,features,labels)
+        self.save_samples(adj,features,labels,repetition)
         print('features:',features.shape)
         return adj,features,labels
     
@@ -217,7 +221,7 @@ class Darknet(Dataset):
         adjacency[mask] = True
         return adjacency
 
-class BOT_IoT(NetFlowDataset):
+class NF_BOT_IoT_v2(NetFlowDataset):
     def __init__(self):
         super().__init__(
             features_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','features.pkl'),
@@ -226,7 +230,7 @@ class BOT_IoT(NetFlowDataset):
             file = os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','BOT_IOT','original','bot_iot.csv')
         )
 
-class TON_IoT(NetFlowDataset):
+class NF_TON_IoT_v2(NetFlowDataset):
     def __init__(self):
         # directory=os.path.join(os.path.dirname(os.path.abspath(__file__)),'examples','TON_IOT','original'),
         super().__init__(
@@ -249,7 +253,7 @@ class AWID3(Dataset):
         words = str(x).split('-')
         return np.mean([float(i)*-1 for i in words if i!=''])
     
-    def build(self,n_nodes):
+    def build(self,n_nodes,repetition):
         path = os.path.join(os.getcwd(),'Ghypeddings','datasets','examples','AWID3','original','awid3.csv')
         df = pd.read_csv(path)
         df['Label'] = df['Label'].apply(lambda x: 0 if 'Normal' in x else 1)
@@ -287,7 +291,7 @@ class AWID3(Dataset):
         features = scaler.fit_transform(features)
         # scaler = MinMaxScaler()
         # features = scaler.fit_transform(features)
-        self.save_samples(adj=adj,features=features,labels=labels)
+        self.save_samples(adj=adj,features=features,labels=labels,repetition=repetition)
         return adj,features,labels
     
     def _filling_adjacency_numpy(self,data):
diff --git a/datasets/utils.py b/datasets/utils.py
index cab0b352d0ee12f4fbb580e29583e2e676adfb81..a65a154f3cda8bc37e70344a727c1e8c690b6904 100644
--- a/datasets/utils.py
+++ b/datasets/utils.py
@@ -11,7 +11,7 @@ def hyperbolicity(adj, num_samples):
     curr_time = time.time()
     hyps = []
     G = nx.from_numpy_array(adj)
-    for i in tqdm(range(num_samples)):
+    for _ in tqdm(range(num_samples)):
         node_tuple = np.random.choice(G.nodes(), 4, replace=False)
         s = []
         try:
@@ -28,5 +28,5 @@ def hyperbolicity(adj, num_samples):
             hyps.append((s[-1] - s[-2]) / 2)
         except Exception as e:
             continue
-    print('Time for hyp: ', time.time() - curr_time)
+    print('Time for hyp: ', time.time() - curr_time , 'hyp:', max(hyps))
     return max(hyps)