From 1091bce01a6a1de1277765b3f8d23ed7bd95203d Mon Sep 17 00:00:00 2001 From: Tetiana Yemelianenko <tyemel.mzeom@gmail.com> Date: Fri, 30 Aug 2024 09:36:32 +0000 Subject: [PATCH] Upload script for the dataset creation --- create_owl_dataset.py | 319 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 319 insertions(+) create mode 100644 create_owl_dataset.py diff --git a/create_owl_dataset.py b/create_owl_dataset.py new file mode 100644 index 0000000..28caf1b --- /dev/null +++ b/create_owl_dataset.py @@ -0,0 +1,319 @@ +import os +from PIL import Image +from glob import glob +import pandas as pd +import shutil +from annoy import AnnoyIndex +from transformers import Owlv2Processor, Owlv2ForObjectDetection +import torch +import numpy as np +from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD + +TOP_COUNT = 30 +feature_dim = 512 +parent_dir = 'path_to_the_main_dir' + +#path to the directory with images annotated on image level +parentpath = os.path.join(parent_dir, 'dataset/') +#print(parentpath) + +#directory in which we save selected images +owlpath = os.path.join(parent_dir, 'owl_dataset') +ext = '.jpg' #"Image file extension [.jpg or .png]" + +#path to the directory with non-annotated data +base_dir = 'path_to_the_non_annotated_dataset' + +if not os.path.isdir(owlpath): + os.mkdir(owlpath) + +im_path = os.path.join(owlpath, 'images') +if not os.path.isdir(im_path): + os.mkdir(im_path) +lb_path = os.path.join(owlpath, 'labels') +if not os.path.isdir(lb_path): + os.mkdir(lb_path) + +device = "cuda:0" if torch.cuda.is_available() else "cpu" +processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble") +model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble").to(device) + +#path to the csv file with saved TOP 50 regions of interests with their coordinates pre-calculated for the each painting from WikiArt dataset +df = pd.read_csv("path_to_the_objectnesses_fle/objectness_wikiart.csv") + +#path to the ANNOY index file for WikiArt dataset with information about similarity of the objects in the images +t = AnnoyIndex(feature_dim, metric='angular') +t.load('path_to_the_annoy_index/annoy_wikiart.ann') + + +#receive similar images using ANNOY +def get_similar_images_annoy(base_vector): + indices, dists = t.get_nns_by_vector(base_vector, TOP_COUNT, include_distances=True) + return indices, dists + +def get_preprocessed_image(pixel_values): + pixel_values = pixel_values.detach().cpu().squeeze().numpy() + unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None] + unnormalized_image = (unnormalized_image * 255).astype(np.uint8) + unnormalized_image = np.moveaxis(unnormalized_image, 0, -1) + unnormalized_image = Image.fromarray(unnormalized_image) + return unnormalized_image + +#object detection using OWL-ViT +def detectobject(imgpath, label, threshold = 0.4): + """ + Parameters + ---------- + imgpath: string with the path to the image + label: string with searched object + threshold + + Returns + ------- + boxes: list of found bounding boxes + query_embeddings: list of embeddings of the found object + """ + + raw_image = Image.open(imgpath) + texts = [[label]] + inputs = processor(text=texts, images=raw_image, return_tensors="pt").to(device) + with torch.no_grad(): + outputs = model(**inputs) + unnormalized_image = get_preprocessed_image(inputs.pixel_values) + + # Convert outputs (bounding boxes and class logits) + target_sizes = torch.Tensor([unnormalized_image.size[::-1]]) + image_features = outputs.image_embeds.reshape(3600, 768) + source_class_embeddings = model.class_predictor(image_features)[1] + probs = torch.max(outputs.logits, dim=-1) + pred_boxes = outputs.pred_boxes[0] + scores_ = torch.sigmoid(probs.values)[0] + query_embeddings = [] + + boxes = [] + for i in range(len(scores_)): + if scores_[i] > threshold: + query_embeddings.append(source_class_embeddings[i]) + boxes.append(pred_boxes[i].detach().cpu().numpy()) + raw_image.close() + + return boxes, query_embeddings + +#Calculate the Intersection over Union (IoU) of two bounding boxes +def calculate_iou(box1, box2): + """ + Parameters + ---------- + box1 : list, tuple or array-like + The (x1, y1, x2, y2) coordinates of the first bounding box. + box2 : list, tuple or array-like + The (x1, y1, x2, y2) coordinates of the second bounding box. + + Returns + ------- + float + The IoU of box1 and box2. + """ + + x1_1, y1_1, x2_1, y2_1 = box1 + x1_2, y1_2, x2_2, y2_2 = box2 + + # Calculate the coordinates of the intersection rectangle + xi1 = max(x1_1, x1_2) + yi1 = max(y1_1, y1_2) + xi2 = min(x2_1, x2_2) + yi2 = min(y2_1, y2_2) + + # Calculate the area of the intersection rectangle + inter_width = max(xi2 - xi1, 0) + inter_height = max(yi2 - yi1, 0) + inter_area = inter_width * inter_height + + # Calculate the area of both bounding boxes + box1_area = (x2_1 - x1_1) * (y2_1 - y1_1) + box2_area = (x2_2 - x1_2) * (y2_2 - y1_2) + + # Calculate the area of the union + union_area = box1_area + box2_area - inter_area + + # Calculate the IoU + iou = inter_area / union_area if union_area != 0 else 0 + + return iou + +#for request images +def center_to_corners_format(box): + center_x, center_y, width, height = box + x1 = max(center_x - 0.5 * width, 0) + y1 = max(center_y - 0.5 * height, 0) + x2 = min(center_x + 0.5 * width, 1) + y2 = min(center_y + 0.5 * height, 1) + + bboxes_corners = [x1, y1, x2, y2] + return bboxes_corners + +def rescale_owl(raw_image, box): + # rescale coordinates + img_h = raw_image.height + img_w = raw_image.width + + width_ratio = 1 + height_ratio = 1 + + if img_w < img_h: + width_ratio = img_w / img_h + elif img_h < img_w: + height_ratio = img_h / img_w + + img_w = img_w / width_ratio + img_h = img_h / height_ratio + + scale_fct = [img_w, img_h, img_w, img_h] + box = np.array(box) * np.array(scale_fct) + + return box + +def create_new_name(counter): + fname = "e" + fname += "0" * (6 - len(str(counter))) + fname += str(counter) + + return fname + +#creation of annotations +def owl_annoy_annotation(labels, folders): + """ + Parameters + ---------- + labels: list of labels used fot the annotations + folders: list of folders with files pre-selected and annotated on image level + + ---------- + function copies images and create txt files with annotations in YOLO style + """ + + #all objects with their boundary boxes + annotations = [] + + for i in range(len(labels)): + imgpaths = glob(parentpath + folders[i] + "/*" + ext) + label = labels[i] + folder = folders[i] + print(label) + iter = 0 + #for all images of the current label + for file in imgpaths: + iter += 1 + print(iter) + try: + boxes, query_embeddings = detectobject(file, label, 0.4) + except: + print(file) + continue + #if current type of object wasn't detected with OWL, we skip this image + if len(boxes) == 0: + continue + + #for all found objects we use their embeddings to find similar objects in WikiArt dataset using ANNOY + for j in range(len(query_embeddings)): + #search objects similar to found embeddings using ANNOY + similar_img_ids, distances = get_similar_images_annoy(query_embeddings[j]) + df_selected = df.iloc[similar_img_ids] + #path to the image + similar_images = list(df_selected['file_path']) + #coordinates of the bounding box for the object + cxs = list(df_selected['cx']) + cys = list(df_selected['cy']) + ws = list(df_selected['w']) + hs = list(df_selected['h']) + + for k in range(TOP_COUNT): + found_file = os.path.join(base_dir, similar_images[k]) + + raw_image = Image.open(found_file) + width, height = raw_image.size + raw_image.close() + + try: + found_boxes, _ = detectobject(found_file, label, 0.4) + except: + print(found_file) + + if len(found_boxes) == 0: + continue + + box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]]) + + max_iou = 0 + for p in range(len(found_boxes)): + iou = calculate_iou(box, center_to_corners_format(found_boxes[p])) + if iou > max_iou: + max_iou = iou + + if max_iou > 0.8: + #save found boundary boxes in YOLO format + box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]]) + box = rescale_owl(raw_image, box) + x = (box[0] + box[2]) / 2 / width + y = (box[1] + box[3]) / 2 / height + w = (box[2] - box[0]) / width + h = (box[3] - box[1]) / raw_image.height + annotations.append([found_file, folder, x, y, w, h]) + raw_image.close() + #save boxes for the request image too + raw_image = Image.open(file) + width, height = raw_image.size + box = center_to_corners_format(boxes[j]) + box = rescale_owl(raw_image, box) + x = (box[0] + box[2]) / 2 / width + y = (box[1] + box[3]) / 2 / height + w = (box[2] - box[0]) / width + h = (box[3] - box[1]) / raw_image.height + raw_image.close() + annotations.append([file, folder, x, y, w, h]) + + #get rid of from the duplicates and sort by file name the final list + annotations = [list(x) for x in set(tuple(row) for row in annotations)] + annotations = sorted(annotations, key=lambda l:l[0]) + + #path to the file with list of classes + f = open("/home/tetiana/yolo/DEArt/deart_classes.txt", "r") + lines = f.readlines() + codes = dict() + + count = 0 + for line in lines: + codes[line.strip()] = count + count += 1 + + counter = 0 + previous_filename = "" + + #copy images and create files with annotations in YOLO style + for annotation in annotations: + filename = annotation[0] + + if filename != previous_filename: + counter += 1 + new_name = create_new_name(counter) + shutil.copyfile(filename, os.path.join(owlpath, "images", new_name + ".jpg")) + + if os.path.exists(filename): + with open(os.path.join(owlpath, "labels", new_name + ".txt"), "w") as f: + f.write("%d %.06f %.06f %.06f %.06f\n" % (codes[annotation[1]], annotation[2], annotation[3], annotation[4], annotation[5])) + else: + with open(os.path.join(owlpath, "labels", new_name + ".txt"), "a") as f: + f.write("%d %.06f %.06f %.06f %.06f\n" % (codes[annotation[1]], annotation[2], annotation[3], annotation[4], annotation[5])) + + previous_filename = filename + + +#list of labels +labels = ['an apple', 'a banana', 'a butterfly', 'a boat', 'a cat', 'a cow', 'a crucifixion', 'a deer', 'a dog', 'a white dove', 'an eagle', 'a horse', 'a monkey', > + 'a nude', 'a rooster', 'a serpent', 'a skull', 'a sheep', 'a swan', 'a trumpet'] + +#name of folders with previously collected request images annotated on image level +folders = ['apple', 'banana', 'butterfly', 'boat', 'cat', 'cow', 'crucifixion', 'deer', 'dog', 'dove', 'eagle', 'horse', 'monkey', 'orange', + 'nude', 'rooster', 'serpent', 'skull', 'sheep', 'swan', 'trumpet'] + +owl_annoy_annotation(labels, folders) -- GitLab