Upload New File

e97ba167 · Tetiana Yemelianenko · d84c52e7 · e97ba167
Commit e97ba167 authored 9 months ago by Tetiana Yemelianenko
--- a/create_owl_dataset.py
+++ b/create_owl_dataset.py
+import os
+from PIL import Image
+from glob import glob
+import pandas as pd
+import shutil
+from annoy import AnnoyIndex
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+import torch
+import numpy as np
+from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
+
+TOP_COUNT = 10
+feature_dim = 512
+parent_dir = 'path_to_the_main_dir'
+
+#path to the directory with images annotated on image level
+parentpath = os.path.join(parent_dir, 'dataset/')
+
+#directory in which we save selected images
+owlpath = os.path.join(parent_dir, 'owl_dataset')
+ext = '.jpg' #"Image file extension [.jpg or .png]"
+
+#path to the directory with non-annotated data
+base_dir = 'path_to_the_non_annotated_dataset'
+
+if not os.path.isdir(owlpath):
+	os.mkdir(owlpath)
+
+im_path = os.path.join(owlpath, 'images')
+if not os.path.isdir(im_path): 
+	os.mkdir(im_path)
+lb_path = os.path.join(owlpath, 'labels')
+if not os.path.isdir(lb_path):
+	os.mkdir(lb_path)
+
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble").to(device)
+
+#path to the csv file with saved TOP 50 regions of interests with their coordinates  pre-calculated for the each painting from WikiArt dataset 
+df = pd.read_csv("path_to_the_objectnesses_fle/objectness_wikiart.csv")
+
+#path to the ANNOY index file for WikiArt dataset with information about similarity of the objects in the images
+t = AnnoyIndex(feature_dim, metric='angular')
+t.load('path_to_the_annoy_index/annoy_wikiart.ann'')
+
+
+#receive similar images using ANNOY
+def get_similar_images_annoy(base_vector):
+	indices, dists = t.get_nns_by_vector(base_vector, TOP_COUNT, include_distances=True)
+	return indices, dists
+
+def get_preprocessed_image(pixel_values):
+	pixel_values = pixel_values.detach().cpu().squeeze().numpy()
+	unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None]
+	unnormalized_image = (unnormalized_image * 255).astype(np.uint8)
+	unnormalized_image = np.moveaxis(unnormalized_image, 0, -1)
+	unnormalized_image = Image.fromarray(unnormalized_image)
+	return unnormalized_image
+
+#object detection  using OWL-ViT
+def detectobject(imgpath, label, threshold = 0.4):
+	"""
+	Parameters
+	----------
+	imgpath: string with the path to the image
+	label: string with searched object
+	threshold
+
+	Returns
+	-------
+	boxes: list of found bounding boxes 
+	query_embeddings: list of embeddings of the found object
+	"""
+	
+	raw_image = Image.open(imgpath)
+	texts = [[label]]
+	inputs = processor(text=texts, images=raw_image, return_tensors="pt").to(device)
+	with torch.no_grad():
+		outputs = model(**inputs)
+	unnormalized_image = get_preprocessed_image(inputs.pixel_values)
+    	
+	# Convert outputs (bounding boxes and class logits)
+	target_sizes = torch.Tensor([unnormalized_image.size[::-1]])
+	image_features = outputs.image_embeds.reshape(3600, 768)
+	source_class_embeddings = model.class_predictor(image_features)[1]
+	probs = torch.max(outputs.logits, dim=-1)
+	pred_boxes = outputs.pred_boxes[0]
+	scores_ = torch.sigmoid(probs.values)[0]
+	query_embeddings = []
+	
+	boxes = []
+	for i in range(len(scores_)):
+		if scores_[i] > threshold:
+			query_embeddings.append(source_class_embeddings[i])
+			boxes.append(pred_boxes[i].detach().cpu().numpy())
+	raw_image.close()
+
+	return boxes, query_embeddings
+
+#Calculate the Intersection over Union (IoU) of two bounding boxes
+def calculate_iou(box1, box2):
+    	"""
+    	Parameters
+    	----------
+    	box1 : list, tuple or array-like
+        	The (x1, y1, x2, y2) coordinates of the first bounding box.
+    	box2 : list, tuple or array-like
+        	The (x1, y1, x2, y2) coordinates of the second bounding box.
+    
+    	Returns
+    	-------
+    	float
+        	The IoU of box1 and box2.
+    	"""
+    
+    	x1_1, y1_1, x2_1, y2_1 = box1
+    	x1_2, y1_2, x2_2, y2_2 = box2
+    
+    	# Calculate the coordinates of the intersection rectangle
+    	xi1 = max(x1_1, x1_2)
+    	yi1 = max(y1_1, y1_2)
+    	xi2 = min(x2_1, x2_2)
+    	yi2 = min(y2_1, y2_2)
+    
+    	# Calculate the area of the intersection rectangle
+    	inter_width = max(xi2 - xi1, 0)
+    	inter_height = max(yi2 - yi1, 0)
+    	inter_area = inter_width * inter_height
+    
+    	# Calculate the area of both bounding boxes
+    	box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
+    	box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
+    
+    	# Calculate the area of the union
+    	union_area = box1_area + box2_area - inter_area
+    
+    	# Calculate the IoU
+    	iou = inter_area / union_area if union_area != 0 else 0
+    
+    	return iou
+
+#for request images
+def center_to_corners_format(box):
+        center_x, center_y, width, height = box
+        x1 = max(center_x - 0.5 * width, 0)
+        y1 = max(center_y - 0.5 * height, 0)
+        x2 = min(center_x + 0.5 * width, 1)
+        y2 = min(center_y + 0.5 * height, 1)
+        
+        bboxes_corners = [x1, y1, x2, y2]
+        return bboxes_corners
+
+def rescale_owl(box, width, height):
+    # rescale coordinates
+    img_h = height
+    img_w = width
+
+    width_ratio = 1
+    height_ratio = 1
+
+    if img_w < img_h:
+        width_ratio = img_w / img_h
+    elif img_h < img_w:
+        height_ratio = img_h / img_w
+
+    img_w = img_w / width_ratio
+    img_h = img_h / height_ratio
+
+    scale_fct = [img_w, img_h, img_w, img_h]
+    box = np.array(box) * np.array(scale_fct)
+    
+    return box
+
+def create_new_name(counter):
+        fname = "e"
+        fname += "0" * (6 - len(str(counter)))
+        fname += str(counter)
+        
+        return fname
+
+def to_yolo_format(box, width, height):
+	"""
+        Parameters
+        ----------
+        box: bounding box to convert
+        wifth, height of the image
+
+        ----------
+        function convert from OWL to YOLO format
+        """
+	box = center_to_corners_format(box)
+	box = rescale_owl(box, width, height)
+	x = (box[0] + box[2]) / 2 / width
+	y = (box[1] + box[3]) / 2 / height
+	w = (box[2] - box[0]) / width
+	h = (box[3] - box[1]) / height
+	return x, y, w, h
+
+def find_for_label(label, folder, imgpaths, threshold=0.4):
+	"""
+        Parameters
+        ----------
+        label: current label for annotation
+	folder: current folder with images annotated on image level
+        imgpaths: list with paths to the images
+	threshold: current threshold
+        ----------
+        function for  creating annotations using files from folder annotated on image level
+	"""
+	annot = []
+	count = 0
+	iter = 0
+
+	for file in imgpaths:
+		iter += 1
+		print(iter)
+		try:
+			boxes, query_embeddings = detectobject(file, label, threshold)
+		except:
+			print(file)
+			continue
+		#if current type of object wasn't detected with OWL, we skip this image
+		if len(boxes) == 0:
+			continue
+		
+		#for all found objects we use their embeddings to find similar objects in WikiArt dataset using ANNOY
+		for j in range(len(query_embeddings)):
+			#search objects similar to found embeddings using ANNOY
+			similar_img_ids, distances = get_similar_images_annoy(query_embeddings[j])
+			df_selected = df.iloc[similar_img_ids]
+			#path to the image
+			similar_images = list(df_selected['file_path'])
+			#coordinates of the bounding box for the object
+			cxs = list(df_selected['cx'])
+			cys = list(df_selected['cy'])
+			ws = list(df_selected['w'])
+			hs = list(df_selected['h'])
+
+			for k in range(TOP_COUNT):
+				found_file = os.path.join(base_dir, similar_images[k])
+				raw_image = Image.open(found_file)
+				width, height = raw_image.size
+				raw_image.close()
+
+				try:
+					found_boxes, _  = detectobject(found_file, label, threshold)
+				except:
+					print(found_file)
+
+				if  len(found_boxes) == 0:
+					continue
+
+				box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]])
+
+				max_iou = 0
+				for p in range(len(found_boxes)):
+					iou = calculate_iou(box, center_to_corners_format(found_boxes[p]))
+					if iou > max_iou:
+						max_iou = iou
+
+				if max_iou > 0.8:
+					#save found boundary boxes in YOLO format
+					x,y,w,h = to_yolo_format([cxs[k], cys[k], ws[k], hs[k]], width, height)
+					annot.append([found_file, folder, x, y, w, h])
+					count += 1
+
+			#save boxes for the request image too
+			raw_image = Image.open(file)
+			width, height = raw_image.size
+			raw_image.close()
+			x,y,w,h = to_yolo_format(boxes[j], width, height)
+			annot.append([file, folder, x, y, w, h])
+			count += 1
+
+	return annot, count
+
+
+#creation of annotations
+def owl_annoy_annotation(labels, folders):
+	"""
+        Parameters
+        ----------
+        labels: list of labels used fot the annotations
+        folders: list of folders with files pre-selected and annotated on image level
+
+	----------
+	function copies images and create txt files with annotations in YOLO style
+        """
+	
+	#all objects with their boundary boxes
+	annotations = []
+
+	for i in range(len(labels)):
+		imgpaths = glob(parentpath + folders[i] + "/*" + ext)
+		label = labels[i]
+		folder = folders[i]
+		print('Current label: ' + label)
+#		request_count =  round(len(imgpaths)*1.2, 0)
+		request_count = 45
+		selected_threshold = 0.4
+
+		not_enough_count = True
+		while not_enough_count:
+			print('next round')
+			cur_annot, found_count = find_for_label(label, folder, imgpaths, selected_threshold)
+			print('found on this round: ' + str(found_count))
+
+			if found_count < request_count:
+				selected_threshold -= 0.1
+				if selected_threshold < 0.2:
+					not_enough_count = False
+			else:
+				not_enough_count = False
+
+		annotations += cur_annot
+
+	#get rid of from the duplicates and sort by file name the final list    
+	annotations = [list(x) for x in set(tuple(row) for row in annotations)]
+	annotations = sorted(annotations, key=lambda l:l[0])
+
+	#path to the file with list of classes
+	f = open("/home/tetiana/yolo/DEArt/deart_classes.txt", "r")
+	lines = f.readlines()
+	codes = dict() 
+
+	count = 0
+	for line in lines:
+		codes[line.strip()] = count 
+		count += 1
+
+	counter = 0
+	previous_filename = ""
+
+	#copy images and create files with annotations in YOLO style
+	for annotation in annotations:
+		filename = annotation[0]
+
+		if filename != previous_filename:
+			counter += 1
+			new_name = create_new_name(counter)
+			shutil.copyfile(filename, os.path.join(owlpath, "images", new_name + ".jpg"))
+
+			if os.path.exists(filename):
+				with open(os.path.join(owlpath, "labels", new_name + ".txt"), "w") as f:
+					f.write("%d %.06f %.06f %.06f %.06f\n" % (codes[annotation[1]], annotation[2], annotation[3], annotation[4], annotation[5]))
+		else:
+			with open(os.path.join(owlpath, "labels", new_name + ".txt"), "a") as f:
+				f.write("%d %.06f %.06f %.06f %.06f\n" % (codes[annotation[1]], annotation[2], annotation[3], annotation[4], annotation[5]))
+
+		previous_filename = filename
+
+
+#list of labels
+labels =  ['an apple', 'a banana', 'a butterfly', 'a boat', 'a cat', 'a cow', 'a crucifixion', 'a deer', 'a dog', 'a white dove',
+           'an eagle', 'a horse', 'a monkey', 'a nude', 'a rooster', 'a serpent', 'a skull', 'a sheep', 'a swan', 'a trumpet']
+#name of folders with previously collected request images annotated on image level
+folders = ['apple', 'banana', 'butterfly', 'boat', 'cat', 'cow', 'crucifixion', 'deer', 'dog', 'dove', 'eagle', 'horse',                    'monkey', 'orange', 'nude', 'rooster', 'serpent', 'skull', 'sheep', 'swan', 'trumpet']
+
+owl_annoy_annotation(labels, folders)