remove ycb

b24a4a40 · Guillaume Duret · 7e21c51d · 7e21c51d · 7e21c51d · 7e21c51d
Commit b24a4a40 authored 2 years ago by Guillaume Duret
--- a/datasets/ycb/dataset.py
+++ b/datasets/ycb/dataset.py
-import torch.utils.data as data
-from PIL import Image
-import os
-import os.path
-import torch
-import numpy as np
-import torchvision.transforms as transforms
-import argparse
-import time
-import random
-from lib.transformations import quaternion_from_euler, euler_matrix, random_quaternion, quaternion_matrix
-import numpy.ma as ma
-import copy
-import scipy.misc
-import scipy.io as scio
-class PoseDataset(data.Dataset):
-    def __init__(self, mode, num_pt, add_noise, root, noise_trans, refine):
-        if mode == 'train':
-            self.path = 'datasets/ycb/dataset_config/train_data_list.txt'
-        elif mode == 'test':
-            self.path = 'datasets/ycb/dataset_config/test_data_list.txt'
-        self.num_pt = num_pt
-        self.root = root
-        self.add_noise = add_noise
-        self.noise_trans = noise_trans
-        self.list = []
-        self.real = []
-        self.syn = []
-        input_file = open(self.path)
-        while 1:
-            input_line = input_file.readline()
-            if not input_line:
-                break
-            if input_line[-1:] == '\n':
-                input_line = input_line[:-1]
-            if input_line[:5] == 'data/':
-                self.real.append(input_line)
-            else:
-                self.syn.append(input_line)
-            self.list.append(input_line)
-        input_file.close()
-        self.length = len(self.list)
-        self.len_real = len(self.real)
-        self.len_syn = len(self.syn)
-        class_file = open('datasets/ycb/dataset_config/classes.txt')
-        class_id = 1
-        self.cld = {}
-        while 1:
-            class_input = class_file.readline()
-            if not class_input:
-                break
-            input_file = open('{0}/models/{1}/points.xyz'.format(self.root, class_input[:-1]))
-            self.cld[class_id] = []
-            while 1:
-                input_line = input_file.readline()
-                if not input_line:
-                    break
-                input_line = input_line[:-1].split(' ')
-                self.cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
-            self.cld[class_id] = np.array(self.cld[class_id])
-            input_file.close()
-            class_id += 1
-        self.cam_cx_1 = 312.9869
-        self.cam_cy_1 = 241.3109
-        self.cam_fx_1 = 1066.778
-        self.cam_fy_1 = 1067.487
-        self.cam_cx_2 = 323.7872
-        self.cam_cy_2 = 279.6921
-        self.cam_fx_2 = 1077.836
-        self.cam_fy_2 = 1078.189
-        self.xmap = np.array([[j for i in range(640)] for j in range(480)])
-        self.ymap = np.array([[i for i in range(640)] for j in range(480)])
-        self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05)
-        self.noise_img_loc = 0.0
-        self.noise_img_scale = 7.0
-        self.minimum_num_pt = 50
-        self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-        self.symmetry_obj_idx = [12, 15, 18, 19, 20]
-        self.num_pt_mesh_small = 500
-        self.num_pt_mesh_large = 2600
-        self.refine = refine
-        self.front_num = 2
-        print(len(self.list))
-    def __getitem__(self, index):
-        img = Image.open('{0}/{1}-color.png'.format(self.root, self.list[index]))
-        depth = np.array(Image.open('{0}/{1}-depth.png'.format(self.root, self.list[index])))
-        label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, self.list[index])))
-        meta = scio.loadmat('{0}/{1}-meta.mat'.format(self.root, self.list[index]))
-        if self.list[index][:8] != 'data_syn' and int(self.list[index][5:9]) >= 60:
-            cam_cx = self.cam_cx_2
-            cam_cy = self.cam_cy_2
-            cam_fx = self.cam_fx_2
-            cam_fy = self.cam_fy_2
-        else:
-            cam_cx = self.cam_cx_1
-            cam_cy = self.cam_cy_1
-            cam_fx = self.cam_fx_1
-            cam_fy = self.cam_fy_1
-        mask_back = ma.getmaskarray(ma.masked_equal(label, 0))
-        add_front = False
-        if self.add_noise:
-            for k in range(5):
-                seed = random.choice(self.syn)
-                front = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
-                front = np.transpose(front, (2, 0, 1))
-                f_label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, seed)))
-                front_label = np.unique(f_label).tolist()[1:]
-                if len(front_label) < self.front_num:
-                   continue
-                front_label = random.sample(front_label, self.front_num)
-                for f_i in front_label:
-                    mk = ma.getmaskarray(ma.masked_not_equal(f_label, f_i))
-                    if f_i == front_label[0]:
-                        mask_front = mk
-                    else:
-                        mask_front = mask_front * mk
-                t_label = label * mask_front
-                if len(t_label.nonzero()[0]) > 1000:
-                    label = t_label
-                    add_front = True
-                    break
-        obj = meta['cls_indexes'].flatten().astype(np.int32)
-        while 1:
-            idx = np.random.randint(0, len(obj))
-            mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
-            mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx]))
-            mask = mask_label * mask_depth
-            if len(mask.nonzero()[0]) > self.minimum_num_pt:
-                break
-        if self.add_noise:
-            img = self.trancolor(img)
-        rmin, rmax, cmin, cmax = get_bbox(mask_label)
-        img = np.transpose(np.array(img)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax]
-        if self.list[index][:8] == 'data_syn':
-            seed = random.choice(self.real)
-            back = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
-            back = np.transpose(back, (2, 0, 1))[:, rmin:rmax, cmin:cmax]
-            img_masked = back * mask_back[rmin:rmax, cmin:cmax] + img
-        else:
-            img_masked = img
-        if self.add_noise and add_front:
-            img_masked = img_masked * mask_front[rmin:rmax, cmin:cmax] + front[:, rmin:rmax, cmin:cmax] * ~(mask_front[rmin:rmax, cmin:cmax])
-        if self.list[index][:8] == 'data_syn':
-            img_masked = img_masked + np.random.normal(loc=0.0, scale=7.0, size=img_masked.shape)
-        # p_img = np.transpose(img_masked, (1, 2, 0))
-        # scipy.misc.imsave('temp/{0}_input.png'.format(index), p_img)
-        # scipy.misc.imsave('temp/{0}_label.png'.format(index), mask[rmin:rmax, cmin:cmax].astype(np.int32))
-        target_r = meta['poses'][:, :, idx][:, 0:3]
-        target_t = np.array([meta['poses'][:, :, idx][:, 3:4].flatten()])
-        add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])
-        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
-        if len(choose) > self.num_pt:
-            c_mask = np.zeros(len(choose), dtype=int)
-            c_mask[:self.num_pt] = 1
-            np.random.shuffle(c_mask)
-            choose = choose[c_mask.nonzero()]
-        else:
-            choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')
-        depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
-        xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
-        ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
-        choose = np.array([choose])
-        cam_scale = meta['factor_depth'][0][0]
-        pt2 = depth_masked / cam_scale
-        pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
-        pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
-        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
-        if self.add_noise:
-            cloud = np.add(cloud, add_t)
-        # fw = open('temp/{0}_cld.xyz'.format(index), 'w')
-        # for it in cloud:
-        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
-        # fw.close()
-        dellist = [j for j in range(0, len(self.cld[obj[idx]]))]
-        if self.refine:
-            dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_large)
-        else:
-            dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_small)
-        model_points = np.delete(self.cld[obj[idx]], dellist, axis=0)
-        # fw = open('temp/{0}_model_points.xyz'.format(index), 'w')
-        # for it in model_points:
-        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
-        # fw.close()
-        target = np.dot(model_points, target_r.T)
-        if self.add_noise:
-            target = np.add(target, target_t + add_t)
-        else:
-            target = np.add(target, target_t)
-        # fw = open('temp/{0}_tar.xyz'.format(index), 'w')
-        # for it in target:
-        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
-        # fw.close()
-        return torch.from_numpy(cloud.astype(np.float32)), \
-               torch.LongTensor(choose.astype(np.int32)), \
-               self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
-               torch.from_numpy(target.astype(np.float32)), \
-               torch.from_numpy(model_points.astype(np.float32)), \
-               torch.LongTensor([int(obj[idx]) - 1])
-    def __len__(self):
-        return self.length
-    def get_sym_list(self):
-        return self.symmetry_obj_idx
-    def get_num_points_mesh(self):
-        if self.refine:
-            return self.num_pt_mesh_large
-        else:
-            return self.num_pt_mesh_small
-border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
-img_width = 480
-img_length = 640
-def get_bbox(label):
-    rows = np.any(label, axis=1)
-    cols = np.any(label, axis=0)
-    rmin, rmax = np.where(rows)[0][[0, -1]]
-    cmin, cmax = np.where(cols)[0][[0, -1]]
-    rmax += 1
-    cmax += 1
-    r_b = rmax - rmin
-    for tt in range(len(border_list)):
-        if r_b > border_list[tt] and r_b < border_list[tt + 1]:
-            r_b = border_list[tt + 1]
-            break
-    c_b = cmax - cmin
-    for tt in range(len(border_list)):
-        if c_b > border_list[tt] and c_b < border_list[tt + 1]:
-            c_b = border_list[tt + 1]
-            break
-    center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
-    rmin = center[0] - int(r_b / 2)
-    rmax = center[0] + int(r_b / 2)
-    cmin = center[1] - int(c_b / 2)
-    cmax = center[1] + int(c_b / 2)
-    if rmin < 0:
-        delt = -rmin
-        rmin = 0
-        rmax += delt
-    if cmin < 0:
-        delt = -cmin
-        cmin = 0
-        cmax += delt
-    if rmax > img_width:
-        delt = rmax - img_width
-        rmax = img_width
-        rmin -= delt
-    if cmax > img_length:
-        delt = cmax - img_length
-        cmax = img_length
-        cmin -= delt
-    return rmin, rmax, cmin, cmax
--- a/datasets/ycb/dataset_config/classes.txt
+++ b/datasets/ycb/dataset_config/classes.txt
-002_master_chef_can
-003_cracker_box
-004_sugar_box
-005_tomato_soup_can
-006_mustard_bottle
-007_tuna_fish_can
-008_pudding_box
-009_gelatin_box
-010_potted_meat_can
-011_banana
-019_pitcher_base
-021_bleach_cleanser
-024_bowl
-025_mug
-035_power_drill
-036_wood_block
-037_scissors
-040_large_marker
-051_large_clamp
-052_extra_large_clamp
-061_foam_brick
--- a/datasets/ycb/dataset_config/test_data_list.txt
+++ b/datasets/ycb/dataset_config/test_data_list.txt
--- a/datasets/ycb/dataset_config/train_data_list.txt
+++ b/datasets/ycb/dataset_config/train_data_list.txt