Skip to content
Snippets Groups Projects
Commit 4d4c153c authored by Mohamed Lamine Messai's avatar Mohamed Lamine Messai
Browse files

Update 3 files

- /Flows-to-Graphs/migrate.ini
- /Flows-to-Graphs/migrate.py
- /Flows-to-Graphs/normalization_parameters.ini
parent bc2416dc
No related branches found
No related tags found
No related merge requests found
[DIRECTORIES]
# PATH of the original (not IGNNITION compatible) dataset
#original_dataset_path: ../preprocess_dataset/preprocessed_IDS2017/TRAIN
original_dataset_path: ./
# Output PATH of the migrated dataset
#output_path: ./data/train
output_path: ./data/test
"""
Copyright 2020 Universitat Politècnica de Catalunya
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tarfile
import networkx as nx
from random import random
import json
from networkx.readwrite import json_graph
import os
import csv
import sys
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import tensorflow as tf
import glob
import configparser
params_norm = configparser.ConfigParser()
params_norm._interpolation = configparser.ExtendedInterpolation()
params_norm.read('./normalization_parameters.ini')
params = configparser.ConfigParser()
params._interpolation = configparser.ExtendedInterpolation()
params.read('./migrate.ini')
# --------------------------------------
# IDS 2017
# MAP THAT TELLS US, GIVEN A FEATURE, ITS POSITION (IDS 2017)
features = ['Flow ID','Source IP','Source Port','Destination IP','Destination Port','Protocol','Timestamp','Flow Duration','Total Fwd Packets','Total Backward Packets','Total Length of Fwd Packets','Total Length of Bwd Packets','Fwd Packet Length Max','Fwd Packet Length Min','Fwd Packet Length Mean','Fwd Packet Length Std','Bwd Packet Length Max','Bwd Packet Length Min','Bwd Packet Length Mean','Bwd Packet Length Std','Flow Bytes/s','Flow Packets/s','Flow IAT Mean','Flow IAT Std','Flow IAT Max','Flow IAT Min','Fwd IAT Total','Fwd IAT Mean','Fwd IAT Std','Fwd IAT Max','Fwd IAT Min','Bwd IAT Total','Bwd IAT Mean','Bwd IAT Std','Bwd IAT Max','Bwd IAT Min','Fwd PSH Flags','Bwd PSH Flags','Fwd URG Flags','Bwd URG Flags','Fwd Header Length','Bwd Header Length','Fwd Packets/s','Bwd Packets/s','Min Packet Length','Max Packet Length','Packet Length Mean','Packet Length Std','Packet Length Variance','FIN Flag Count','SYN Flag Count','RST Flag Count','PSH Flag Count','ACK Flag Count','URG Flag Count','CWE Flag Count','ECE Flag Count','Down/Up Ratio','Average Packet Size','Avg Fwd Segment Size','Avg Bwd Segment Size','Fwd Avg Bytes/Bulk','Fwd Avg Packets/Bulk','Fwd Avg Bulk Rate','Bwd Avg Bytes/Bulk','Bwd Avg Packets/Bulk','Bwd Avg Bulk Rate','Subflow Fwd Packets','Subflow Fwd Bytes','Subflow Bwd Packets','Subflow Bwd Bytes','Init_Win_bytes_forward','Init_Win_bytes_backward','act_data_pkt_fwd','min_seg_size_forward','Active Mean','Active Std','Active Max','Active Min','Idle Mean','Idle Std','Idle Max','Idle Min','Label']
indices = range(len(features))
zip_iterator = zip(features,indices)
features_dict = dict(zip_iterator)
# ATTACKS IDS 2017
attack_names = ['HTTP Get Flood','ICMP Frag Flood','TCP Flood','UDP Flood','Port Scanning','Brute Force','Normal']
indices = range(len(attack_names))
zip_iterator = zip(attack_names,indices)
attacks_dict = dict(zip_iterator)
chosen_connection_features = ['Source Port', 'Destination Port', 'Protocol', 'Bwd Packet Length Min', 'Subflow Fwd Packets',
'Total Length of Fwd Packets', 'Fwd Packet Length Mean', 'Total Length of Fwd Packets',
'Fwd Packet Length Std', 'Fwd IAT Min', 'Flow IAT Min', 'Flow IAT Mean', 'Bwd Packet Length Std',
'Subflow Fwd Bytes', 'Flow Duration', 'Flow IAT Std', 'Active Min','Active Mean', 'Bwd IAT Mean',
'Subflow Bwd Bytes', 'Init_Win_bytes_forward', 'ACK Flag Count','Fwd PSH Flags','SYN Flag Count',
'Flow Packets/s', 'PSH Flag Count', 'Average Packet Size']
indices = range(len(chosen_connection_features))
zip_iterator = zip(chosen_connection_features, indices)
chosen_features_dict = dict(zip_iterator)
# possible_protocols = {'6':[0.0,0.0,1.0],'17':[0.0,1.0,0.0], '0':[1.0,0.0,0.0],'':[0.0,0.0,0.0]}
# --------------------------------------
def normalization_function(feature, name):
if name in chosen_connection_features and (name+'_mean') in params_norm['PARAMS'] and float(params_norm['PARAMS'][name + '_mean']) != 0:
feature = (feature - float(params_norm['PARAMS'][name + '_mean'])) / float(params_norm['PARAMS'][name + '_std'])
return feature
def transform_ips(ip):
# transform it into a 12 bit string
ip = ip.split('.')
for i in range(len(ip)):
ip[i] = '0'*(3 - len(ip[i])) + ip[i]
ip = ''.join(ip)
try:
result = [float(v) for v in ip if v != '.']
except:
result = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
return result
def get_feature(trace, feature_name, parse=True):
if parse:
if feature_name == 'Label':
attack = trace[-1]
return attacks_dict.get(attack)
else:
idx = features_dict[feature_name]
feature = trace[idx]
if 'ID' in feature_name:
return feature
elif 'IP' in feature_name:
return transform_ips(feature)
# elif feature_name == 'Protocol':
# # Transform to a one-hot encoding
# return possible_protocols.get(feature)
else:
try:
value = float(feature)
if value != float('+inf') and value != float('nan'):
return value
else:
return 0
except:
return 0
else:
idx = features_dict[feature_name]
return trace[idx]
# constructs a dictionary with all the chosen features of the ids 2017
def get_connection_features(trace, final_feature):
connection_features = {}
aux = []
for f in chosen_connection_features:
feat = get_feature(trace, f)
norm_feats = normalization_function(feat, f)
aux.append(norm_feats)
connection_features['Label'] = final_feature
connection_features['conect_feats'] = aux
return connection_features
def traces_to_graph(traces):
G = nx.DiGraph()
# G = nx.MultiDiGraph()
# G = nx.MultiGraph()
n = len(traces)
for i in range(n):
trace = traces[i]
dst_name = 'Destination IP'
src_name = 'Source IP'
# For now we create the IP features as a list of 128
if get_feature(trace, dst_name, parse=False) not in G.nodes():
G.add_node(get_feature(trace, dst_name, parse=False), entity='ip', ip_feats = list(np.ones(128)))
if get_feature(trace, src_name, parse=False) not in G.nodes():
G.add_node(get_feature(trace, src_name, parse=False), entity='ip', ip_feats = list(np.ones(128)))
label_num = get_feature(trace, 'Label')
final_label = np.zeros(7)
if label_num != -1: # if it is an attack
final_label[label_num] = 1
final_label = final_label.tolist()
connection_features = get_connection_features(trace, final_label)
connection_features['entity'] = 'connection'
G.add_node('con_' + str(i), **connection_features)
# these edges connect the ports with the IP node (connecting all the servers together)
G.add_edge('con_' + str(i), get_feature(trace, dst_name, parse=False))
G.add_edge('con_' + str(i), get_feature(trace, src_name, parse=False))
G.add_edge(get_feature(trace, dst_name, parse=False), 'con_' + str(i))
G.add_edge(get_feature(trace, src_name, parse=False), 'con_' + str(i))
return G
# This function must return the corresponding graphs
def generator(path):
files = glob.glob(path + '/*.csv')
for file in files:
print("Processing file:", file)
with open(file, encoding="utf8", errors='ignore') as csvfile:
data = csv.reader(csvfile, delimiter=',', quotechar='|')
current_time_traces = []
counter = 0
for row in data:
if len(row) > 1:
# remains to fix this criterion (for now we set the windows to be 200 connections big)
if counter >= 100:
if current_time_traces != []:
G = traces_to_graph(current_time_traces)
yield G
counter = 0
current_time_traces = []
current_time_traces.append(row)
counter += 1
def migrate_dataset(input_path, output_path, max_per_file=999999):
print("Starting to do the migration...")
gen = generator(input_path)
data = []
file_ctr = 0
counter = 0
while True:
try:
G = next(gen)
parser_graph = json_graph.node_link_data(G)
data.append(parser_graph)
# nx.draw(G, with_labels = True) #test
# plt.savefig("graph_" + str(counter) + "_" + str(file_ctr) + ".png") #test
# plt.clf() #test
if max_per_file is not None and counter == max_per_file:
with open(output_path + '/data_' + str(file_ctr) + '.json', 'w') as json_file:
json.dump(data, json_file)
data = []
counter = 0
file_ctr += 1
else:
counter +=1
#when finished, save all the remaining ones
except:
with open(output_path + '/data_' + str(file_ctr) + '.json', 'w') as json_file:
json.dump(data, json_file)
return
if __name__ == "__main__":
input_path = os.path.abspath(params['DIRECTORIES']['original_dataset_path'])
output_path = os.path.abspath(params['DIRECTORIES']['output_path'])
# Create the output directories if necessary
if not os.path.exists(output_path):
os.makedirs(output_path)
migrate_dataset(input_path, output_path)
[PARAMS]
Source Port_mean : 18167.63276
Destination Port_mean : 16094.79501
Protocol_mean : 0.447406838
Flow Duration_mean : 13610976.15
Total Fwd Packets_mean : 646.8312988
Total Backward Packets_mean : 955.8530285
Total Length of Fwd Packets_mean : 740210.5056
Total Length of Bwd Packets_mean : 1358409.19
Fwd Packet Length Max_mean : 252.4158673
Fwd Packet Length Min_mean : 67.52189701
Fwd Packet Length Mean_mean : 117.2691805
Fwd Packet Length Std_mean : 104.0511745
Bwd Packet Length Max_mean : 429.795201
Bwd Packet Length Min_mean : 70.07692911
Bwd Packet Length Mean_mean : 254.6167126
Bwd Packet Length Std_mean : 187.9319257
Flow Packets/s_mean : 17289.45029
Flow IAT Mean_mean : 2470068.371
Flow IAT Std_mean : 1028601.795
Flow IAT Max_mean : 5983190.561
Flow IAT Min_mean : 2430760.049
Fwd IAT Total_mean : 13313137.19
Fwd IAT Mean_mean : 692093.849
Fwd IAT Std_mean : 1244410.611
Fwd IAT Max_mean : 5561940.112
Fwd IAT Min_mean : 429052.5789
Bwd IAT Total_mean : 11865658.7
Bwd IAT Mean_mean : 660545.2231
Bwd IAT Std_mean : 1296381.833
Bwd IAT Max_mean : 4841263.865
Bwd IAT Min_mean : 340033.4224
Fwd Header Length_mean : 12935.77323
Bwd Header Length_mean : 19115.75058
Fwd Packets/s_mean : 9129.609587
Bwd Packets/s_mean : 8288.446874
Min Packet Length_mean : 31.64339014
Max Packet Length_mean : 435.3076046
Packet Length Mean_mean : 172.6083011
Packet Length Std_mean : 178.2846176
Packet Length Variance_mean : 111099.4291
FIN Flag Count_mean : 0.224771763
SYN Flag Count_mean : 0.499794728
RST Flag Count_mean : 0.123331882
PSH Flag Count_mean : 0.163616302
ACK Flag Count_mean : 0.499997159
Down/Up Ratio_mean : 1.331238751
Average Packet Size_mean : 186.2530563
Avg Fwd Segment Size_mean : 117.2691805
Avg Bwd Segment Size_mean : 254.6167126
Subflow Bwd Packets_mean : 646.8312988
Subflow Bwd Bytes_mean : 740210.5056
Init_Win_bytes_backward_mean : 29654.02749
act_data_pkt_fwd_mean : 507.7052312
Active Mean_mean : 2331317.526
Active Std_mean : 1571032.325
Active Max_mean : 3443968.906
Active Min_mean : 1897302.523
Idle Mean_mean : 4620657.243
Idle Std_mean : 2167540.689
Idle Max_mean : 5752134.989
Idle Min_mean : 3982483.244
Source Port_std : 44061.95506
Destination Port_std : 6209.519101
Protocol_std : 6.010393258
Flow Duration_std : 2897878.887
Total Fwd Packets_std : 45.6011236
Total Backward Packets_std : 68.79353933
Total Length of Fwd Packets_std : 24903.37472
Total Length of Bwd Packets_std : 83519.32163
Fwd Packet Length Max_std : 151.0941011
Fwd Packet Length Min_std : 15.09269663
Fwd Packet Length Mean_std : 60.31023126
Fwd Packet Length Std_std : 65.91337046
Bwd Packet Length Max_std : 316.7985955
Bwd Packet Length Min_std : 4.556741573
Bwd Packet Length Mean_std : 115.2953941
Bwd Packet Length Std_std : 134.7314455
Flow Packets/s_std : 1076.759428
Flow IAT Mean_std : 315402.3116
Flow IAT Std_std : 316010.858
Flow IAT Max_std : 1390749.078
Flow IAT Min_std : 169225.2489
Fwd IAT Total_std : 2356534
Fwd IAT Mean_std : 193229.1379
Fwd IAT Std_std : 199001.5513
Fwd IAT Max_std : 972956.4795
Fwd IAT Min_std : 102790.4461
Bwd IAT Total_std : 2129542.496
Bwd IAT Mean_std : 183055.4255
Bwd IAT Std_std : 275767.2574
Bwd IAT Max_std : 938252.2438
Bwd IAT Min_std : 42909.85449
Fwd Header Length_std : 925.0988764
Bwd Header Length_std : 1410.752809
Fwd Packets/s_std : 570.2192404
Bwd Packets/s_std : 506.5401875
Min Packet Length_std : 1.83988764
Max Packet Length_std : 333.5238764
Packet Length Mean_std : 86.7986432
Packet Length Std_std : 129.5051456
Packet Length Variance_std : 48556.98763
FIN Flag Count_std : 0.053370787
SYN Flag Count_std : 0.485674157
RST Flag Count_std : 0.015449438
PSH Flag Count_std : 0.02752809
ACK Flag Count_std : 0.498314607
Down/Up Ratio_std : 1.178932584
Average Packet Size_std : 96.07454031
Avg Fwd Segment Size_std : 60.31023126
Avg Bwd Segment Size_std : 115.2953941
Subflow Bwd Packets_std : 45.6011236
Subflow Bwd Bytes_std : 24903.37472
Init_Win_bytes_backward_std : 20786.8559
act_data_pkt_fwd_std : 18.74494382
Active Mean_std : 192130.6199
Active Std_std : 107419.2795
Active Max_std : 295761.361
Active Min_std : 114022.5817
Idle Mean_std : 543921.4623
Idle Std_std : 186999.9591
Idle Max_std : 700302.3222
Idle Min_std : 405814.1747
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment