From 44b3775097cc0a51f3c2116a6746d56fae460a9b Mon Sep 17 00:00:00 2001
From: Athmane Mansour Bahar <ja_mansourbahar@esi.dz>
Date: Thu, 15 Aug 2024 17:50:29 +0000
Subject: [PATCH] Upload New File

---
 trainer/utils/streamspot_parser.py | 53 ++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 trainer/utils/streamspot_parser.py

diff --git a/trainer/utils/streamspot_parser.py b/trainer/utils/streamspot_parser.py
new file mode 100644
index 0000000..04438eb
--- /dev/null
+++ b/trainer/utils/streamspot_parser.py
@@ -0,0 +1,53 @@
+import networkx as nx
+from tqdm import tqdm
+import json
+raw_path = '../data/streamspot/'
+
+NUM_GRAPHS = 600
+node_type_dict = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
+edge_type_dict = ['i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+                  'q', 't', 'u', 'v', 'w', 'y', 'z', 'A', 'C', 'D', 'E', 'G']
+node_type_set = set(node_type_dict)
+edge_type_set = set(edge_type_dict)
+
+count_graph = 0
+with open(raw_path + 'all.tsv', 'r', encoding='utf-8') as f:
+    lines = f.readlines()
+    g = nx.DiGraph()
+    node_map = {}
+    count_node = 0
+    for line in tqdm(lines):
+        src, src_type, dst, dst_type, etype, graph_id = line.strip('\n').split('\t')
+        graph_id = int(graph_id)
+        if src_type not in node_type_set or dst_type not in node_type_set:
+            continue
+        if etype not in edge_type_set:
+            continue
+        if graph_id != count_graph:
+            count_graph += 1
+            for n in g.nodes():
+                g.nodes[n]['type'] = node_type_dict.index(g.nodes[n]['type'])
+            for e in g.edges():
+                g.edges[e]['type'] = edge_type_dict.index(g.edges[e]['type'])
+            f1 = open(raw_path + str(count_graph) + '.json', 'w', encoding='utf-8')
+            json.dump(nx.node_link_data(g), f1)
+            assert graph_id == count_graph
+            g = nx.DiGraph()
+            count_node = 0
+        if src not in node_map:
+            node_map[src] = count_node
+            g.add_node(count_node, type=src_type)
+            count_node += 1
+        if dst not in node_map:
+            node_map[dst] = count_node
+            g.add_node(count_node, type=dst_type)
+            count_node += 1
+        if not g.has_edge(node_map[src], node_map[dst]):
+            g.add_edge(node_map[src], node_map[dst], type=etype)
+    count_graph += 1
+    for n in g.nodes():
+        g.nodes[n]['type'] = node_type_dict.index(g.nodes[n]['type'])
+    for e in g.edges():
+        g.edges[e]['type'] = edge_type_dict.index(g.edges[e]['type'])
+    f1 = open(raw_path + str(count_graph) + '.json', 'w', encoding='utf-8')
+    json.dump(nx.node_link_data(g), f1)
-- 
GitLab