origin Densefusion

7e21c51d · Guillaume Duret · 56aa04a0 · 7e21c51d · 7e21c51d · 7e21c51d
Commit 7e21c51d authored 2 years ago by Guillaume Duret
--- a/datasets/linemod/dataset.py
+++ b/datasets/linemod/dataset.py
+import torch.utils.data as data
+from PIL import Image
+import os
+import os.path
+import errno
+import torch
+import json
+import codecs
+import numpy as np
+import sys
+import torchvision.transforms as transforms
+import argparse
+import json
+import time
+import random
+import numpy.ma as ma
+import copy
+import scipy.misc
+import scipy.io as scio
+import yaml
+import cv2
+class PoseDataset(data.Dataset):
+    def __init__(self, mode, num, add_noise, root, noise_trans, refine):
+        self.objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15]
+        self.mode = mode
+        self.list_rgb = []
+        self.list_depth = []
+        self.list_label = []
+        self.list_obj = []
+        self.list_rank = []
+        self.meta = {}
+        self.pt = {}
+        self.root = root
+        self.noise_trans = noise_trans
+        self.refine = refine
+        item_count = 0
+        for item in self.objlist:
+            if self.mode == 'train':
+                input_file = open('{0}/data/{1}/train.txt'.format(self.root, '%02d' % item))
+            else:
+                input_file = open('{0}/data/{1}/test.txt'.format(self.root, '%02d' % item))
+            while 1:
+                item_count += 1
+                input_line = input_file.readline()
+                if self.mode == 'test' and item_count % 10 != 0:
+                    continue
+                if not input_line:
+                    break
+                if input_line[-1:] == '\n':
+                    input_line = input_line[:-1]
+                self.list_rgb.append('{0}/data/{1}/rgb/{2}.png'.format(self.root, '%02d' % item, input_line))
+                self.list_depth.append('{0}/data/{1}/depth/{2}.png'.format(self.root, '%02d' % item, input_line))
+                if self.mode == 'eval':
+                    self.list_label.append('{0}/segnet_results/{1}_label/{2}_label.png'.format(self.root, '%02d' % item, input_line))
+                else:
+                    self.list_label.append('{0}/data/{1}/mask/{2}.png'.format(self.root, '%02d' % item, input_line))
+                self.list_obj.append(item)
+                self.list_rank.append(int(input_line))
+            meta_file = open('{0}/data/{1}/gt.yml'.format(self.root, '%02d' % item), 'r')
+            self.meta[item] = yaml.load(meta_file)
+            self.pt[item] = ply_vtx('{0}/models/obj_{1}.ply'.format(self.root, '%02d' % item))
+            print("Object {0} buffer loaded".format(item))
+        self.length = len(self.list_rgb)
+        self.cam_cx = 325.26110
+        self.cam_cy = 242.04899
+        self.cam_fx = 572.41140
+        self.cam_fy = 573.57043
+        self.xmap = np.array([[j for i in range(640)] for j in range(480)])
+        self.ymap = np.array([[i for i in range(640)] for j in range(480)])
+        self.num = num
+        self.add_noise = add_noise
+        self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05)
+        self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        self.border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
+        self.num_pt_mesh_large = 500
+        self.num_pt_mesh_small = 500
+        self.symmetry_obj_idx = [7, 8]
+    def __getitem__(self, index):
+        img = Image.open(self.list_rgb[index])
+        ori_img = np.array(img)
+        depth = np.array(Image.open(self.list_depth[index]))
+        label = np.array(Image.open(self.list_label[index]))
+        obj = self.list_obj[index]
+        rank = self.list_rank[index]        
+        if obj == 2:
+            for i in range(0, len(self.meta[obj][rank])):
+                if self.meta[obj][rank][i]['obj_id'] == 2:
+                    meta = self.meta[obj][rank][i]
+                    break
+        else:
+            meta = self.meta[obj][rank][0]
+        mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
+        if self.mode == 'eval':
+            mask_label = ma.getmaskarray(ma.masked_equal(label, np.array(255)))
+        else:
+            mask_label = ma.getmaskarray(ma.masked_equal(label, np.array([255, 255, 255])))[:, :, 0]
+        mask = mask_label * mask_depth
+        if self.add_noise:
+            img = self.trancolor(img)
+        img = np.array(img)[:, :, :3]
+        img = np.transpose(img, (2, 0, 1))
+        img_masked = img
+        if self.mode == 'eval':
+            rmin, rmax, cmin, cmax = get_bbox(mask_to_bbox(mask_label))
+        else:
+            rmin, rmax, cmin, cmax = get_bbox(meta['obj_bb'])
+        img_masked = img_masked[:, rmin:rmax, cmin:cmax]
+        #p_img = np.transpose(img_masked, (1, 2, 0))
+        #scipy.misc.imsave('evaluation_result/{0}_input.png'.format(index), p_img)
+        target_r = np.resize(np.array(meta['cam_R_m2c']), (3, 3))
+        target_t = np.array(meta['cam_t_m2c'])
+        add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])
+        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
+        if len(choose) == 0:
+            cc = torch.LongTensor([0])
+            return(cc, cc, cc, cc, cc, cc)
+        if len(choose) > self.num:
+            c_mask = np.zeros(len(choose), dtype=int)
+            c_mask[:self.num] = 1
+            np.random.shuffle(c_mask)
+            choose = choose[c_mask.nonzero()]
+        else:
+            choose = np.pad(choose, (0, self.num - len(choose)), 'wrap')
+        depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
+        xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
+        ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
+        choose = np.array([choose])
+        cam_scale = 1.0
+        pt2 = depth_masked / cam_scale
+        pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
+        pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
+        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
+        cloud = cloud / 1000.0
+        if self.add_noise:
+            cloud = np.add(cloud, add_t)
+        #fw = open('evaluation_result/{0}_cld.xyz'.format(index), 'w')
+        #for it in cloud:
+        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
+        #fw.close()
+        model_points = self.pt[obj] / 1000.0
+        dellist = [j for j in range(0, len(model_points))]
+        dellist = random.sample(dellist, len(model_points) - self.num_pt_mesh_small)
+        model_points = np.delete(model_points, dellist, axis=0)
+        #fw = open('evaluation_result/{0}_model_points.xyz'.format(index), 'w')
+        #for it in model_points:
+        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
+        #fw.close()
+        target = np.dot(model_points, target_r.T)
+        if self.add_noise:
+            target = np.add(target, target_t / 1000.0 + add_t)
+            out_t = target_t / 1000.0 + add_t
+        else:
+            target = np.add(target, target_t / 1000.0)
+            out_t = target_t / 1000.0
+        #fw = open('evaluation_result/{0}_tar.xyz'.format(index), 'w')
+        #for it in target:
+        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
+        #fw.close()
+        return torch.from_numpy(cloud.astype(np.float32)), \
+               torch.LongTensor(choose.astype(np.int32)), \
+               self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
+               torch.from_numpy(target.astype(np.float32)), \
+               torch.from_numpy(model_points.astype(np.float32)), \
+               torch.LongTensor([self.objlist.index(obj)])
+    def __len__(self):
+        return self.length
+    def get_sym_list(self):
+        return self.symmetry_obj_idx
+    def get_num_points_mesh(self):
+        if self.refine:
+            return self.num_pt_mesh_large
+        else:
+            return self.num_pt_mesh_small
+border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
+img_width = 480
+img_length = 640
+def mask_to_bbox(mask):
+    mask = mask.astype(np.uint8)
+    contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    x = 0
+    y = 0
+    w = 0
+    h = 0
+    for contour in contours:
+        tmp_x, tmp_y, tmp_w, tmp_h = cv2.boundingRect(contour)
+        if tmp_w * tmp_h > w * h:
+            x = tmp_x
+            y = tmp_y
+            w = tmp_w
+            h = tmp_h
+    return [x, y, w, h]
+def get_bbox(bbox):
+    bbx = [bbox[1], bbox[1] + bbox[3], bbox[0], bbox[0] + bbox[2]]
+    if bbx[0] < 0:
+        bbx[0] = 0
+    if bbx[1] >= 480:
+        bbx[1] = 479
+    if bbx[2] < 0:
+        bbx[2] = 0
+    if bbx[3] >= 640:
+        bbx[3] = 639                
+    rmin, rmax, cmin, cmax = bbx[0], bbx[1], bbx[2], bbx[3]
+    r_b = rmax - rmin
+    for tt in range(len(border_list)):
+        if r_b > border_list[tt] and r_b < border_list[tt + 1]:
+            r_b = border_list[tt + 1]
+            break
+    c_b = cmax - cmin
+    for tt in range(len(border_list)):
+        if c_b > border_list[tt] and c_b < border_list[tt + 1]:
+            c_b = border_list[tt + 1]
+            break
+    center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
+    rmin = center[0] - int(r_b / 2)
+    rmax = center[0] + int(r_b / 2)
+    cmin = center[1] - int(c_b / 2)
+    cmax = center[1] + int(c_b / 2)
+    if rmin < 0:
+        delt = -rmin
+        rmin = 0
+        rmax += delt
+    if cmin < 0:
+        delt = -cmin
+        cmin = 0
+        cmax += delt
+    if rmax > 480:
+        delt = rmax - 480
+        rmax = 480
+        rmin -= delt
+    if cmax > 640:
+        delt = cmax - 640
+        cmax = 640
+        cmin -= delt
+    return rmin, rmax, cmin, cmax
+def ply_vtx(path):
+    f = open(path)
+    assert f.readline().strip() == "ply"
+    f.readline()
+    f.readline()
+    N = int(f.readline().split()[-1])
+    while f.readline().strip() != "end_header":
+        continue
+    pts = []
+    for _ in range(N):
+        pts.append(np.float32(f.readline().split()[:3]))
+    return np.array(pts)
--- a/datasets/linemod/dataset_config/models_info.yml
+++ b/datasets/linemod/dataset_config/models_info.yml
+1: {diameter: 102.09865663, min_x: -37.93430000, min_y: -38.79960000, min_z: -45.88450000, size_x: 75.86860000, size_y: 77.59920000, size_z: 91.76900000}
+2: {diameter: 247.50624233, min_x: -107.83500000, min_y: -60.92790000, min_z: -109.70500000, size_x: 215.67000000, size_y: 121.85570000, size_z: 219.41000000}
+3: {diameter: 167.35486092, min_x: -83.21620000, min_y: -82.65910000, min_z: -37.23640000, size_x: 166.43240000, size_y: 165.31820000, size_z: 74.47280000}
+4: {diameter: 172.49224865, min_x: -68.32970000, min_y: -71.51510000, min_z: -50.24850000, size_x: 136.65940000, size_y: 143.03020000, size_z: 100.49700000}
+5: {diameter: 201.40358597, min_x: -50.39580000, min_y: -90.89790000, min_z: -96.86700000, size_x: 100.79160000, size_y: 181.79580000, size_z: 193.73400000}
+6: {diameter: 154.54551808, min_x: -33.50540000, min_y: -63.81650000, min_z: -58.72830000, size_x: 67.01070000, size_y: 127.63300000, size_z: 117.45660000}
+7: {diameter: 124.26430816, min_x: -58.78990000, min_y: -45.75560000, min_z: -47.31120000, size_x: 117.57980000, size_y: 91.51120000, size_z: 94.62240000}
+8: {diameter: 261.47178102, min_x: -114.73800000, min_y: -37.73570000, min_z: -104.00100000, size_x: 229.47600000, size_y: 75.47140000, size_z: 208.00200000}
+9: {diameter: 108.99920102, min_x: -52.21460000, min_y: -38.70380000, min_z: -42.84850000, size_x: 104.42920000, size_y: 77.40760000, size_z: 85.69700000}
+10: {diameter: 164.62758848, min_x: -75.09230000, min_y: -53.53750000, min_z: -34.62070000, size_x: 150.18460000, size_y: 107.07500000, size_z: 69.24140000}
+11: {diameter: 175.88933422, min_x: -18.36050000, min_y: -38.93300000, min_z: -86.40790000, size_x: 36.72110000, size_y: 77.86600000, size_z: 172.81580000}
+12: {diameter: 145.54287471, min_x: -50.44390000, min_y: -54.24850000, min_z: -45.40000000, size_x: 100.88780000, size_y: 108.49700000, size_z: 90.80000000}
+13: {diameter: 278.07811733, min_x: -129.11300000, min_y: -59.24100000, min_z: -70.56620000, size_x: 258.22600000, size_y: 118.48210000, size_z: 141.13240000}
+14: {diameter: 282.60129399, min_x: -101.57300000, min_y: -58.87630000, min_z: -106.55800000, size_x: 203.14600000, size_y: 117.75250000, size_z: 213.11600000}
+15: {diameter: 212.35825148, min_x: -46.95910000, min_y: -73.71670000, min_z: -92.37370000, size_x: 93.91810000, size_y: 147.43340000, size_z: 184.74740000}
\ No newline at end of file
--- a/datasets/ycb/dataset.py
+++ b/datasets/ycb/dataset.py
+import torch.utils.data as data
+from PIL import Image
+import os
+import os.path
+import torch
+import numpy as np
+import torchvision.transforms as transforms
+import argparse
+import time
+import random
+from lib.transformations import quaternion_from_euler, euler_matrix, random_quaternion, quaternion_matrix
+import numpy.ma as ma
+import copy
+import scipy.misc
+import scipy.io as scio
+class PoseDataset(data.Dataset):
+    def __init__(self, mode, num_pt, add_noise, root, noise_trans, refine):
+        if mode == 'train':
+            self.path = 'datasets/ycb/dataset_config/train_data_list.txt'
+        elif mode == 'test':
+            self.path = 'datasets/ycb/dataset_config/test_data_list.txt'
+        self.num_pt = num_pt
+        self.root = root
+        self.add_noise = add_noise
+        self.noise_trans = noise_trans
+        self.list = []
+        self.real = []
+        self.syn = []
+        input_file = open(self.path)
+        while 1:
+            input_line = input_file.readline()
+            if not input_line:
+                break
+            if input_line[-1:] == '\n':
+                input_line = input_line[:-1]
+            if input_line[:5] == 'data/':
+                self.real.append(input_line)
+            else:
+                self.syn.append(input_line)
+            self.list.append(input_line)
+        input_file.close()
+        self.length = len(self.list)
+        self.len_real = len(self.real)
+        self.len_syn = len(self.syn)
+        class_file = open('datasets/ycb/dataset_config/classes.txt')
+        class_id = 1
+        self.cld = {}
+        while 1:
+            class_input = class_file.readline()
+            if not class_input:
+                break
+            input_file = open('{0}/models/{1}/points.xyz'.format(self.root, class_input[:-1]))
+            self.cld[class_id] = []
+            while 1:
+                input_line = input_file.readline()
+                if not input_line:
+                    break
+                input_line = input_line[:-1].split(' ')
+                self.cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
+            self.cld[class_id] = np.array(self.cld[class_id])
+            input_file.close()
+            class_id += 1
+        self.cam_cx_1 = 312.9869
+        self.cam_cy_1 = 241.3109
+        self.cam_fx_1 = 1066.778
+        self.cam_fy_1 = 1067.487
+        self.cam_cx_2 = 323.7872
+        self.cam_cy_2 = 279.6921
+        self.cam_fx_2 = 1077.836
+        self.cam_fy_2 = 1078.189
+        self.xmap = np.array([[j for i in range(640)] for j in range(480)])
+        self.ymap = np.array([[i for i in range(640)] for j in range(480)])
+        self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05)
+        self.noise_img_loc = 0.0
+        self.noise_img_scale = 7.0
+        self.minimum_num_pt = 50
+        self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        self.symmetry_obj_idx = [12, 15, 18, 19, 20]
+        self.num_pt_mesh_small = 500
+        self.num_pt_mesh_large = 2600
+        self.refine = refine
+        self.front_num = 2
+        print(len(self.list))
+    def __getitem__(self, index):
+        img = Image.open('{0}/{1}-color.png'.format(self.root, self.list[index]))
+        depth = np.array(Image.open('{0}/{1}-depth.png'.format(self.root, self.list[index])))
+        label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, self.list[index])))
+        meta = scio.loadmat('{0}/{1}-meta.mat'.format(self.root, self.list[index]))
+        if self.list[index][:8] != 'data_syn' and int(self.list[index][5:9]) >= 60:
+            cam_cx = self.cam_cx_2
+            cam_cy = self.cam_cy_2
+            cam_fx = self.cam_fx_2
+            cam_fy = self.cam_fy_2
+        else:
+            cam_cx = self.cam_cx_1
+            cam_cy = self.cam_cy_1
+            cam_fx = self.cam_fx_1
+            cam_fy = self.cam_fy_1
+        mask_back = ma.getmaskarray(ma.masked_equal(label, 0))
+        add_front = False
+        if self.add_noise:
+            for k in range(5):
+                seed = random.choice(self.syn)
+                front = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
+                front = np.transpose(front, (2, 0, 1))
+                f_label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, seed)))
+                front_label = np.unique(f_label).tolist()[1:]
+                if len(front_label) < self.front_num:
+                   continue
+                front_label = random.sample(front_label, self.front_num)
+                for f_i in front_label:
+                    mk = ma.getmaskarray(ma.masked_not_equal(f_label, f_i))
+                    if f_i == front_label[0]:
+                        mask_front = mk
+                    else:
+                        mask_front = mask_front * mk
+                t_label = label * mask_front
+                if len(t_label.nonzero()[0]) > 1000:
+                    label = t_label
+                    add_front = True
+                    break
+        obj = meta['cls_indexes'].flatten().astype(np.int32)
+        while 1:
+            idx = np.random.randint(0, len(obj))
+            mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
+            mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx]))
+            mask = mask_label * mask_depth
+            if len(mask.nonzero()[0]) > self.minimum_num_pt:
+                break
+        if self.add_noise:
+            img = self.trancolor(img)
+        rmin, rmax, cmin, cmax = get_bbox(mask_label)
+        img = np.transpose(np.array(img)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax]
+        if self.list[index][:8] == 'data_syn':
+            seed = random.choice(self.real)
+            back = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
+            back = np.transpose(back, (2, 0, 1))[:, rmin:rmax, cmin:cmax]
+            img_masked = back * mask_back[rmin:rmax, cmin:cmax] + img
+        else:
+            img_masked = img
+        if self.add_noise and add_front:
+            img_masked = img_masked * mask_front[rmin:rmax, cmin:cmax] + front[:, rmin:rmax, cmin:cmax] * ~(mask_front[rmin:rmax, cmin:cmax])
+        if self.list[index][:8] == 'data_syn':
+            img_masked = img_masked + np.random.normal(loc=0.0, scale=7.0, size=img_masked.shape)
+        # p_img = np.transpose(img_masked, (1, 2, 0))
+        # scipy.misc.imsave('temp/{0}_input.png'.format(index), p_img)
+        # scipy.misc.imsave('temp/{0}_label.png'.format(index), mask[rmin:rmax, cmin:cmax].astype(np.int32))
+        target_r = meta['poses'][:, :, idx][:, 0:3]
+        target_t = np.array([meta['poses'][:, :, idx][:, 3:4].flatten()])
+        add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])
+        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
+        if len(choose) > self.num_pt:
+            c_mask = np.zeros(len(choose), dtype=int)
+            c_mask[:self.num_pt] = 1
+            np.random.shuffle(c_mask)
+            choose = choose[c_mask.nonzero()]
+        else:
+            choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')
+        depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
+        xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
+        ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
+        choose = np.array([choose])
+        cam_scale = meta['factor_depth'][0][0]
+        pt2 = depth_masked / cam_scale
+        pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
+        pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
+        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
+        if self.add_noise:
+            cloud = np.add(cloud, add_t)
+        # fw = open('temp/{0}_cld.xyz'.format(index), 'w')
+        # for it in cloud:
+        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
+        # fw.close()
+        dellist = [j for j in range(0, len(self.cld[obj[idx]]))]
+        if self.refine:
+            dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_large)
+        else:
+            dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_small)
+        model_points = np.delete(self.cld[obj[idx]], dellist, axis=0)
+        # fw = open('temp/{0}_model_points.xyz'.format(index), 'w')
+        # for it in model_points:
+        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
+        # fw.close()
+        target = np.dot(model_points, target_r.T)
+        if self.add_noise:
+            target = np.add(target, target_t + add_t)
+        else:
+            target = np.add(target, target_t)
+        # fw = open('temp/{0}_tar.xyz'.format(index), 'w')
+        # for it in target:
+        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
+        # fw.close()
+        return torch.from_numpy(cloud.astype(np.float32)), \
+               torch.LongTensor(choose.astype(np.int32)), \
+               self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
+               torch.from_numpy(target.astype(np.float32)), \
+               torch.from_numpy(model_points.astype(np.float32)), \
+               torch.LongTensor([int(obj[idx]) - 1])
+    def __len__(self):
+        return self.length
+    def get_sym_list(self):
+        return self.symmetry_obj_idx
+    def get_num_points_mesh(self):
+        if self.refine:
+            return self.num_pt_mesh_large
+        else:
+            return self.num_pt_mesh_small
+border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
+img_width = 480
+img_length = 640
+def get_bbox(label):
+    rows = np.any(label, axis=1)
+    cols = np.any(label, axis=0)
+    rmin, rmax = np.where(rows)[0][[0, -1]]
+    cmin, cmax = np.where(cols)[0][[0, -1]]
+    rmax += 1
+    cmax += 1
+    r_b = rmax - rmin
+    for tt in range(len(border_list)):
+        if r_b > border_list[tt] and r_b < border_list[tt + 1]:
+            r_b = border_list[tt + 1]
+            break
+    c_b = cmax - cmin
+    for tt in range(len(border_list)):
+        if c_b > border_list[tt] and c_b < border_list[tt + 1]:
+            c_b = border_list[tt + 1]
+            break
+    center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
+    rmin = center[0] - int(r_b / 2)
+    rmax = center[0] + int(r_b / 2)
+    cmin = center[1] - int(c_b / 2)
+    cmax = center[1] + int(c_b / 2)
+    if rmin < 0:
+        delt = -rmin
+        rmin = 0
+        rmax += delt
+    if cmin < 0:
+        delt = -cmin
+        cmin = 0
+        cmax += delt
+    if rmax > img_width:
+        delt = rmax - img_width
+        rmax = img_width
+        rmin -= delt
+    if cmax > img_length:
+        delt = cmax - img_length
+        cmax = img_length
+        cmin -= delt
+    return rmin, rmax, cmin, cmax
--- a/datasets/ycb/dataset_config/classes.txt
+++ b/datasets/ycb/dataset_config/classes.txt
+002_master_chef_can
+003_cracker_box
+004_sugar_box
+005_tomato_soup_can
+006_mustard_bottle
+007_tuna_fish_can
+008_pudding_box
+009_gelatin_box
+010_potted_meat_can
+011_banana
+019_pitcher_base
+021_bleach_cleanser
+024_bowl
+025_mug
+035_power_drill
+036_wood_block
+037_scissors
+040_large_marker
+051_large_clamp
+052_extra_large_clamp
+061_foam_brick
--- a/datasets/ycb/dataset_config/test_data_list.txt
+++ b/datasets/ycb/dataset_config/test_data_list.txt
--- a/datasets/ycb/dataset_config/train_data_list.txt
+++ b/datasets/ycb/dataset_config/train_data_list.txt