From 1091bce01a6a1de1277765b3f8d23ed7bd95203d Mon Sep 17 00:00:00 2001
From: Tetiana Yemelianenko <tyemel.mzeom@gmail.com>
Date: Fri, 30 Aug 2024 09:36:32 +0000
Subject: [PATCH] Upload script for the dataset creation

---
 create_owl_dataset.py | 319 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 319 insertions(+)
 create mode 100644 create_owl_dataset.py

diff --git a/create_owl_dataset.py b/create_owl_dataset.py
new file mode 100644
index 0000000..28caf1b
--- /dev/null
+++ b/create_owl_dataset.py
@@ -0,0 +1,319 @@
+import os
+from PIL import Image
+from glob import glob
+import pandas as pd
+import shutil
+from annoy import AnnoyIndex
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+import torch
+import numpy as np
+from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
+
+TOP_COUNT = 30
+feature_dim = 512
+parent_dir = 'path_to_the_main_dir'
+
+#path to the directory with images annotated on image level
+parentpath = os.path.join(parent_dir, 'dataset/')
+#print(parentpath)
+
+#directory in which we save selected images
+owlpath = os.path.join(parent_dir, 'owl_dataset')
+ext = '.jpg' #"Image file extension [.jpg or .png]"
+
+#path to the directory with non-annotated data
+base_dir = 'path_to_the_non_annotated_dataset'
+
+if not os.path.isdir(owlpath):
+	os.mkdir(owlpath)
+
+im_path = os.path.join(owlpath, 'images')
+if not os.path.isdir(im_path): 
+	os.mkdir(im_path)
+lb_path = os.path.join(owlpath, 'labels')
+if not os.path.isdir(lb_path):
+	os.mkdir(lb_path)
+
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble").to(device)
+
+#path to the csv file with saved TOP 50 regions of interests with their coordinates  pre-calculated for the each painting from WikiArt dataset 
+df = pd.read_csv("path_to_the_objectnesses_fle/objectness_wikiart.csv")
+
+#path to the ANNOY index file for WikiArt dataset with information about similarity of the objects in the images
+t = AnnoyIndex(feature_dim, metric='angular')
+t.load('path_to_the_annoy_index/annoy_wikiart.ann')
+
+
+#receive similar images using ANNOY
+def get_similar_images_annoy(base_vector):
+	indices, dists = t.get_nns_by_vector(base_vector, TOP_COUNT, include_distances=True)
+	return indices, dists
+
+def get_preprocessed_image(pixel_values):
+	pixel_values = pixel_values.detach().cpu().squeeze().numpy()
+	unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None]
+	unnormalized_image = (unnormalized_image * 255).astype(np.uint8)
+	unnormalized_image = np.moveaxis(unnormalized_image, 0, -1)
+	unnormalized_image = Image.fromarray(unnormalized_image)
+	return unnormalized_image
+
+#object detection  using OWL-ViT
+def detectobject(imgpath, label, threshold = 0.4):
+	"""
+	Parameters
+	----------
+	imgpath: string with the path to the image
+	label: string with searched object
+	threshold
+
+	Returns
+	-------
+	boxes: list of found bounding boxes 
+	query_embeddings: list of embeddings of the found object
+	"""
+	
+	raw_image = Image.open(imgpath)
+	texts = [[label]]
+	inputs = processor(text=texts, images=raw_image, return_tensors="pt").to(device)
+	with torch.no_grad():
+		outputs = model(**inputs)
+	unnormalized_image = get_preprocessed_image(inputs.pixel_values)
+    	
+	# Convert outputs (bounding boxes and class logits)
+	target_sizes = torch.Tensor([unnormalized_image.size[::-1]])
+	image_features = outputs.image_embeds.reshape(3600, 768)
+	source_class_embeddings = model.class_predictor(image_features)[1]
+	probs = torch.max(outputs.logits, dim=-1)
+	pred_boxes = outputs.pred_boxes[0]
+	scores_ = torch.sigmoid(probs.values)[0]
+	query_embeddings = []
+	
+	boxes = []
+	for i in range(len(scores_)):
+		if scores_[i] > threshold:
+			query_embeddings.append(source_class_embeddings[i])
+			boxes.append(pred_boxes[i].detach().cpu().numpy())
+	raw_image.close()
+
+	return boxes, query_embeddings
+
+#Calculate the Intersection over Union (IoU) of two bounding boxes
+def calculate_iou(box1, box2):
+    	"""
+    	Parameters
+    	----------
+    	box1 : list, tuple or array-like
+        	The (x1, y1, x2, y2) coordinates of the first bounding box.
+    	box2 : list, tuple or array-like
+        	The (x1, y1, x2, y2) coordinates of the second bounding box.
+    
+    	Returns
+    	-------
+    	float
+        	The IoU of box1 and box2.
+    	"""
+    
+    	x1_1, y1_1, x2_1, y2_1 = box1
+    	x1_2, y1_2, x2_2, y2_2 = box2
+    
+    	# Calculate the coordinates of the intersection rectangle
+    	xi1 = max(x1_1, x1_2)
+    	yi1 = max(y1_1, y1_2)
+    	xi2 = min(x2_1, x2_2)
+    	yi2 = min(y2_1, y2_2)
+    
+    	# Calculate the area of the intersection rectangle
+    	inter_width = max(xi2 - xi1, 0)
+    	inter_height = max(yi2 - yi1, 0)
+    	inter_area = inter_width * inter_height
+    
+    	# Calculate the area of both bounding boxes
+    	box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
+    	box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
+    
+    	# Calculate the area of the union
+    	union_area = box1_area + box2_area - inter_area
+    
+    	# Calculate the IoU
+    	iou = inter_area / union_area if union_area != 0 else 0
+    
+    	return iou
+
+#for request images
+def center_to_corners_format(box):
+        center_x, center_y, width, height = box
+        x1 = max(center_x - 0.5 * width, 0)
+        y1 = max(center_y - 0.5 * height, 0)
+        x2 = min(center_x + 0.5 * width, 1)
+        y2 = min(center_y + 0.5 * height, 1)
+        
+        bboxes_corners = [x1, y1, x2, y2]
+        return bboxes_corners
+
+def rescale_owl(raw_image, box):
+    # rescale coordinates
+    img_h = raw_image.height
+    img_w = raw_image.width
+
+    width_ratio = 1
+    height_ratio = 1
+
+    if img_w < img_h:
+        width_ratio = img_w / img_h
+    elif img_h < img_w:
+        height_ratio = img_h / img_w
+
+    img_w = img_w / width_ratio
+    img_h = img_h / height_ratio
+
+    scale_fct = [img_w, img_h, img_w, img_h]
+    box = np.array(box) * np.array(scale_fct)
+    
+    return box
+
+def create_new_name(counter):
+        fname = "e"
+        fname += "0" * (6 - len(str(counter)))
+        fname += str(counter)
+        
+        return fname
+
+#creation of annotations
+def owl_annoy_annotation(labels, folders):
+	"""
+        Parameters
+        ----------
+        labels: list of labels used fot the annotations
+        folders: list of folders with files pre-selected and annotated on image level
+
+	----------
+	function copies images and create txt files with annotations in YOLO style
+        """
+	
+	#all objects with their boundary boxes
+	annotations = []
+
+	for i in range(len(labels)):
+		imgpaths = glob(parentpath + folders[i] + "/*" + ext)
+		label = labels[i]
+		folder = folders[i]
+		print(label)
+		iter = 0
+	        #for all images of the current label
+		for file in imgpaths:
+			iter += 1
+			print(iter)
+			try:
+				boxes, query_embeddings = detectobject(file, label, 0.4)
+			except:
+				print(file)
+				continue
+                	#if current type of object wasn't detected with OWL, we skip this image
+			if len(boxes) == 0:
+				continue
+                
+                	#for all found objects we use their embeddings to find similar objects in WikiArt dataset using ANNOY
+			for j in range(len(query_embeddings)):
+                        	#search objects similar to found embeddings using ANNOY
+				similar_img_ids, distances = get_similar_images_annoy(query_embeddings[j])
+				df_selected = df.iloc[similar_img_ids]
+                        	#path to the image
+				similar_images = list(df_selected['file_path'])
+                        	#coordinates of the bounding box for the object
+				cxs = list(df_selected['cx'])
+				cys = list(df_selected['cy'])
+				ws = list(df_selected['w'])
+				hs = list(df_selected['h'])
+
+				for k in range(TOP_COUNT):
+					found_file = os.path.join(base_dir, similar_images[k])
+					
+					raw_image = Image.open(found_file)
+					width, height = raw_image.size
+					raw_image.close()
+
+					try:
+						found_boxes, _  = detectobject(found_file, label, 0.4)
+					except:
+						print(found_file)
+
+					if  len(found_boxes) == 0:
+						continue
+					
+					box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]])
+
+					max_iou = 0
+					for p in range(len(found_boxes)):
+						iou = calculate_iou(box, center_to_corners_format(found_boxes[p]))
+						if iou > max_iou:
+							max_iou = iou
+                                
+					if max_iou > 0.8:
+						#save found boundary boxes in YOLO format
+						box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]])
+						box = rescale_owl(raw_image, box)
+						x = (box[0] + box[2]) / 2 / width
+						y = (box[1] + box[3]) / 2 / height
+						w = (box[2] - box[0]) / width
+						h = (box[3] - box[1]) / raw_image.height
+						annotations.append([found_file, folder, x, y, w, h])
+					raw_image.close()
+				#save boxes for the request image too
+				raw_image = Image.open(file)
+				width, height = raw_image.size
+				box = center_to_corners_format(boxes[j])
+				box = rescale_owl(raw_image, box)
+				x = (box[0] + box[2]) / 2 / width
+				y = (box[1] + box[3]) / 2 / height
+				w = (box[2] - box[0]) / width
+				h = (box[3] - box[1]) / raw_image.height
+				raw_image.close()
+				annotations.append([file, folder, x, y, w, h])
+
+	#get rid of from the duplicates and sort by file name the final list    
+	annotations = [list(x) for x in set(tuple(row) for row in annotations)]
+	annotations = sorted(annotations, key=lambda l:l[0])
+
+	#path to the file with list of classes
+	f = open("/home/tetiana/yolo/DEArt/deart_classes.txt", "r")
+	lines = f.readlines()
+	codes = dict() 
+
+	count = 0
+	for line in lines:
+		codes[line.strip()] = count 
+		count += 1
+
+	counter = 0
+	previous_filename = ""
+
+	#copy images and create files with annotations in YOLO style
+	for annotation in annotations:
+		filename = annotation[0]
+
+		if filename != previous_filename:
+			counter += 1
+			new_name = create_new_name(counter)
+			shutil.copyfile(filename, os.path.join(owlpath, "images", new_name + ".jpg"))
+
+			if os.path.exists(filename):
+				with open(os.path.join(owlpath, "labels", new_name + ".txt"), "w") as f:
+					f.write("%d %.06f %.06f %.06f %.06f\n" % (codes[annotation[1]], annotation[2], annotation[3], annotation[4], annotation[5]))
+		else:
+			with open(os.path.join(owlpath, "labels", new_name + ".txt"), "a") as f:
+				f.write("%d %.06f %.06f %.06f %.06f\n" % (codes[annotation[1]], annotation[2], annotation[3], annotation[4], annotation[5]))
+
+		previous_filename = filename
+
+
+#list of labels
+labels =  ['an apple', 'a banana', 'a butterfly', 'a boat', 'a cat', 'a cow', 'a crucifixion', 'a deer', 'a dog', 'a white dove', 'an eagle', 'a horse', 'a monkey', >
+           'a nude', 'a rooster', 'a serpent', 'a skull', 'a sheep', 'a swan', 'a trumpet']
+
+#name of folders with previously collected request images annotated on image level
+folders = ['apple', 'banana', 'butterfly', 'boat', 'cat', 'cow', 'crucifixion', 'deer', 'dog', 'dove', 'eagle', 'horse', 'monkey', 'orange', 
+           'nude', 'rooster', 'serpent', 'skull', 'sheep', 'swan', 'trumpet']
+
+owl_annoy_annotation(labels, folders)
-- 
GitLab