diff --git a/create_owl_dataset.py b/create_owl_dataset.py deleted file mode 100644 index e0bf99e01f507467333b73b3490233744a67a3b7..0000000000000000000000000000000000000000 --- a/create_owl_dataset.py +++ /dev/null @@ -1,364 +0,0 @@ -import os -from PIL import Image -from glob import glob -import pandas as pd -import shutil -from annoy import AnnoyIndex -from transformers import Owlv2Processor, Owlv2ForObjectDetection -import torch -import numpy as np -from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD - -TOP_COUNT = 10 -feature_dim = 512 -parent_dir = '/home/tetiana/owl/' - -#path to the directory with images annotated on image level -parentpath = os.path.join(parent_dir, 'test/') - -#directory in wich we save selected images -owlpath = os.path.join(parent_dir, 't_dataset') -ext = '.jpg' #"Image file extension [.jpg or .png]" - -#path to the directory with non-annotated data -base_dir = '/home/tetiana/ty/projects/Archive_/' - -if not os.path.isdir(owlpath): - os.mkdir(owlpath) - -im_path = os.path.join(owlpath, 'images') -if not os.path.isdir(im_path): - os.mkdir(im_path) -lb_path = os.path.join(owlpath, 'labels') -if not os.path.isdir(lb_path): - os.mkdir(lb_path) - -device = "cuda:0" if torch.cuda.is_available() else "cpu" -processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble") -model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble").to(device) - -#path to the csv file with saved TOP 50 regions of interests with their coordinates pre-calculated for the each painting from WikiArt dataset -df = pd.read_csv("/home/tetiana/ty/projects/Archive_/objectness_wikiart_final_all_new.csv") - -#path to the ANNOY index file for WikiArt dataset with information about similarity of the objects in the images -t = AnnoyIndex(feature_dim, metric='angular') -t.load('/home/tetiana/ty/projects/Archive_/annoy_new.ann') - - -#receive similar images using ANNOY -def get_similar_images_annoy(base_vector): - indices, dists = t.get_nns_by_vector(base_vector, TOP_COUNT, include_distances=True) - return indices, dists - -def get_preprocessed_image(pixel_values): - pixel_values = pixel_values.detach().cpu().squeeze().numpy() - unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None] - unnormalized_image = (unnormalized_image * 255).astype(np.uint8) - unnormalized_image = np.moveaxis(unnormalized_image, 0, -1) - unnormalized_image = Image.fromarray(unnormalized_image) - return unnormalized_image - -#object detection using OWL-ViT -def detectobject(imgpath, label, threshold = 0.4): - """ - Parameters - ---------- - imgpath: string with the path to the image - label: string with searched object - threshold - - Returns - ------- - boxes: list of found bounding boxes - query_embeddings: list of embeddings of the found object - """ - - raw_image = Image.open(imgpath) - texts = [[label]] - inputs = processor(text=texts, images=raw_image, return_tensors="pt").to(device) - with torch.no_grad(): - outputs = model(**inputs) - unnormalized_image = get_preprocessed_image(inputs.pixel_values) - - # Convert outputs (bounding boxes and class logits) - target_sizes = torch.Tensor([unnormalized_image.size[::-1]]) - image_features = outputs.image_embeds.reshape(3600, 768) - source_class_embeddings = model.class_predictor(image_features)[1] - probs = torch.max(outputs.logits, dim=-1) - pred_boxes = outputs.pred_boxes[0] - scores_ = torch.sigmoid(probs.values)[0] - query_embeddings = [] - - boxes = [] - for i in range(len(scores_)): - if scores_[i] > threshold: - query_embeddings.append(source_class_embeddings[i]) - boxes.append(pred_boxes[i].detach().cpu().numpy()) - raw_image.close() - - return boxes, query_embeddings - -#Calculate the Intersection over Union (IoU) of two bounding boxes -def calculate_iou(box1, box2): - """ - Parameters - ---------- - box1 : list, tuple or array-like - The (x1, y1, x2, y2) coordinates of the first bounding box. - box2 : list, tuple or array-like - The (x1, y1, x2, y2) coordinates of the second bounding box. - - Returns - ------- - float - The IoU of box1 and box2. - """ - - x1_1, y1_1, x2_1, y2_1 = box1 - x1_2, y1_2, x2_2, y2_2 = box2 - - # Calculate the coordinates of the intersection rectangle - xi1 = max(x1_1, x1_2) - yi1 = max(y1_1, y1_2) - xi2 = min(x2_1, x2_2) - yi2 = min(y2_1, y2_2) - - # Calculate the area of the intersection rectangle - inter_width = max(xi2 - xi1, 0) - inter_height = max(yi2 - yi1, 0) - inter_area = inter_width * inter_height - - # Calculate the area of both bounding boxes - box1_area = (x2_1 - x1_1) * (y2_1 - y1_1) - box2_area = (x2_2 - x1_2) * (y2_2 - y1_2) - - # Calculate the area of the union - union_area = box1_area + box2_area - inter_area - - # Calculate the IoU - iou = inter_area / union_area if union_area != 0 else 0 - - return iou - -#for request images -def center_to_corners_format(box): - center_x, center_y, width, height = box - x1 = max(center_x - 0.5 * width, 0) - y1 = max(center_y - 0.5 * height, 0) - x2 = min(center_x + 0.5 * width, 1) - y2 = min(center_y + 0.5 * height, 1) - - bboxes_corners = [x1, y1, x2, y2] - return bboxes_corners - -def rescale_owl(box, width, height): - # rescale coordinates - img_h = height - img_w = width - - width_ratio = 1 - height_ratio = 1 - - if img_w < img_h: - width_ratio = img_w / img_h - elif img_h < img_w: - height_ratio = img_h / img_w - - img_w = img_w / width_ratio - img_h = img_h / height_ratio - - scale_fct = [img_w, img_h, img_w, img_h] - box = np.array(box) * np.array(scale_fct) - - return box - -def create_new_name(counter): - fname = "e" - fname += "0" * (6 - len(str(counter))) - fname += str(counter) - - return fname - -def to_yolo_format(box, width, height): - """ - Parameters - ---------- - box: bounding box to convert - wifth, height of the image - - ---------- - function convert from OWL to YOLO format - """ - box = center_to_corners_format(box) - box = rescale_owl(box, width, height) - x = (box[0] + box[2]) / 2 / width - y = (box[1] + box[3]) / 2 / height - w = (box[2] - box[0]) / width - h = (box[3] - box[1]) / height - return x, y, w, h - -def find_for_label(label, folder, imgpaths, threshold=0.4): - """ - Parameters - ---------- - label: current label for annotation - folder: current folder with images annotated on image level - imgpaths: list with paths to the images - threshold: current threshold - ---------- - function for creating annotations using files from folder annotated on image level - """ - annot = [] - count = 0 - iter = 0 - - for file in imgpaths: - iter += 1 - print(iter) - try: - boxes, query_embeddings = detectobject(file, label, threshold) - except: - print(file) - continue - #if current type of object wasn't detected with OWL, we skip this image - if len(boxes) == 0: - continue - - #for all found objects we use their embeddings to find similar objects in WikiArt dataset using ANNOY - for j in range(len(query_embeddings)): - #search objects similar to found embeddings using ANNOY - similar_img_ids, distances = get_similar_images_annoy(query_embeddings[j]) - df_selected = df.iloc[similar_img_ids] - #path to the image - similar_images = list(df_selected['file_path']) - #coordinates of the bounding box for the object - cxs = list(df_selected['cx']) - cys = list(df_selected['cy']) - ws = list(df_selected['w']) - hs = list(df_selected['h']) - - for k in range(TOP_COUNT): - found_file = os.path.join(base_dir, similar_images[k]) - raw_image = Image.open(found_file) - width, height = raw_image.size - raw_image.close() - - try: - found_boxes, _ = detectobject(found_file, label, threshold) - except: - print(found_file) - - if len(found_boxes) == 0: - continue - - box = center_to_corners_format([cxs[k], cys[k], ws[k], hs[k]]) - - max_iou = 0 - for p in range(len(found_boxes)): - iou = calculate_iou(box, center_to_corners_format(found_boxes[p])) - if iou > max_iou: - max_iou = iou - - if max_iou > 0.8: - #save found boundary boxes in YOLO format - x,y,w,h = to_yolo_format([cxs[k], cys[k], ws[k], hs[k]], width, height) - annot.append([found_file, folder, x, y, w, h]) - count += 1 - - #save boxes for the request image too - raw_image = Image.open(file) - width, height = raw_image.size - raw_image.close() - x,y,w,h = to_yolo_format(boxes[j], width, height) - annot.append([file, folder, x, y, w, h]) - count += 1 - - return annot, count - - -#creation of annotations -def owl_annoy_annotation(labels, folders): - """ - Parameters - ---------- - labels: list of labels used fot the annotations - folders: list of folders with files pre-selected and annotated on image level - - ---------- - function copies images and create txt files with annotations in YOLO style - """ - - #all objects with their boundary boxes - annotations = [] - - for i in range(len(labels)): - imgpaths = glob(parentpath + folders[i] + "/*" + ext) - label = labels[i] - folder = folders[i] - print('Current label: ' + label) -# request_count = round(len(imgpaths)*1.2, 0) - request_count = 45 - selected_threshold = 0.4 - - not_enough_count = True - while not_enough_count: - print('next round') - cur_annot, found_count = find_for_label(label, folder, imgpaths, selected_threshold) - print('found on this round: ' + str(found_count)) - - if found_count < request_count: - selected_threshold -= 0.1 - if selected_threshold < 0.2: - not_enough_count = False - else: - not_enough_count = False - - annotations += cur_annot - - #get rid of from the duplicates and sort by file name the final list - annotations = [list(x) for x in set(tuple(row) for row in annotations)] - annotations = sorted(annotations, key=lambda l:l[0]) - - #path to the file with list of classes - f = open("/home/tetiana/yolo/DEArt/deart_classes.txt", "r") - lines = f.readlines() - codes = dict() - - count = 0 - for line in lines: - codes[line.strip()] = count - count += 1 - - counter = 0 - previous_filename = "" - - #copy images and create files with annotations in YOLO style - for annotation in annotations: - filename = annotation[0] - - if filename != previous_filename: - counter += 1 - new_name = create_new_name(counter) - shutil.copyfile(filename, os.path.join(owlpath, "images", new_name + ".jpg")) - - if os.path.exists(filename): - with open(os.path.join(owlpath, "labels", new_name + ".txt"), "w") as f: - f.write("%d %.06f %.06f %.06f %.06f\n" % (codes[annotation[1]], annotation[2], annotation[3], annotation[4], annotation[5])) - else: - with open(os.path.join(owlpath, "labels", new_name + ".txt"), "a") as f: - f.write("%d %.06f %.06f %.06f %.06f\n" % (codes[annotation[1]], annotation[2], annotation[3], annotation[4], annotation[5])) - - previous_filename = filename - - -#list of labels -#labels = ['an apple', 'a banana', 'a butterfly', 'a boat', 'a cat', 'a cow', 'a crucifixion', 'a deer', 'a dog', 'a white dove', 'an eagle', 'a horse', 'a monkey', > -# 'a nude', 'a rooster', 'a serpent', 'a skull', 'a sheep', 'a swan', 'a trumpet'] -labels = ['a dog', 'a swan'] -#name of folders with previously collected request images annotated on image level -#folders = ['apple', 'banana', 'butterfly', 'boat', 'cat', 'cow', 'crucifixion', 'deer', 'dog', 'dove', 'eagle', 'horse', 'monkey', 'orange', -# 'nude', 'rooster', 'serpent', 'skull', 'sheep', 'swan', 'trumpet'] - -folders = ['dog', 'swan'] - -owl_annoy_annotation(labels, folders)