From 25ceb527c2f3b4f5ba26a75fea03cdbf553b5a4f Mon Sep 17 00:00:00 2001 From: liuxingyu <lxy17@foxmail.com> Date: Wed, 2 Nov 2022 13:52:46 +0800 Subject: [PATCH] remove useless files --- .../gdrn_modeling/datasets/dataset_factory.py | 5 - core/gdrn_modeling/datasets/lm_blender.py | 549 ----------- .../datasets/lm_dataset_crop_d2.py | 555 ----------- .../gdrn_modeling/datasets/lm_new_duck_pbr.py | 491 ---------- core/gdrn_modeling/datasets/lm_syn_egl.py | 542 ----------- core/gdrn_modeling/datasets/lm_syn_imgn.py | 490 ---------- det/yolox/data/datasets/dataset_factory.py | 7 - det/yolox/data/datasets/lm_blender.py | 512 ---------- det/yolox/data/datasets/lm_dataset_d2.py | 886 ------------------ det/yolox/data/datasets/lm_syn_imgn.py | 473 ---------- 10 files changed, 4510 deletions(-) delete mode 100644 core/gdrn_modeling/datasets/lm_blender.py delete mode 100644 core/gdrn_modeling/datasets/lm_dataset_crop_d2.py delete mode 100644 core/gdrn_modeling/datasets/lm_new_duck_pbr.py delete mode 100644 core/gdrn_modeling/datasets/lm_syn_egl.py delete mode 100644 core/gdrn_modeling/datasets/lm_syn_imgn.py delete mode 100644 det/yolox/data/datasets/lm_blender.py delete mode 100644 det/yolox/data/datasets/lm_dataset_d2.py delete mode 100644 det/yolox/data/datasets/lm_syn_imgn.py diff --git a/core/gdrn_modeling/datasets/dataset_factory.py b/core/gdrn_modeling/datasets/dataset_factory.py index c12dd96..a14b6fa 100644 --- a/core/gdrn_modeling/datasets/dataset_factory.py +++ b/core/gdrn_modeling/datasets/dataset_factory.py @@ -8,12 +8,8 @@ import detectron2.utils.comm as comm import ref from detectron2.data import DatasetCatalog, MetadataCatalog from core.gdrn_modeling.datasets import ( - lm_syn_imgn, lm_dataset_d2, - lm_syn_egl, lm_pbr, - lm_blender, - lm_dataset_crop_d2, ycbv_pbr, ycbv_d2, ycbv_bop_test, @@ -22,7 +18,6 @@ from core.gdrn_modeling.datasets import ( hb_bop_test, hb_bench_driller_phone_d2, duck_frames, - lm_new_duck_pbr, tudl_pbr, tudl_d2, tudl_bop_test, diff --git a/core/gdrn_modeling/datasets/lm_blender.py b/core/gdrn_modeling/datasets/lm_blender.py deleted file mode 100644 index b19f97d..0000000 --- a/core/gdrn_modeling/datasets/lm_blender.py +++ /dev/null @@ -1,549 +0,0 @@ -import hashlib -import logging -import os -import os.path as osp -import sys - -cur_dir = osp.dirname(osp.abspath(__file__)) -PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../..")) -sys.path.insert(0, PROJ_ROOT) -import time -from collections import OrderedDict -import mmcv -import numpy as np -from tqdm import tqdm -from transforms3d.quaternions import mat2quat, quat2mat -import random -import ref -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode -from lib.pysixd import inout, misc -from lib.utils.mask_utils import ( - binary_mask_to_rle, - cocosegm2mask, - mask2bbox_xywh, -) -from lib.utils.utils import dprint, iprint, lazy_property - - -logger = logging.getLogger(__name__) -DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) - - -class LM_BLENDER_Dataset(object): - """lm blender data, from pvnet-rendering.""" - - def __init__(self, data_cfg): - """ - Set with_depth and with_masks default to True, - and decide whether to load them into dataloader/network later - with_masks: - """ - self.name = data_cfg["name"] - self.data_cfg = data_cfg - - self.objs = data_cfg["objs"] # selected objects - - self.ann_files = data_cfg["ann_files"] # json files with image ids and pose/bbox - self.image_prefixes = data_cfg["image_prefixes"] - - self.dataset_root = data_cfg["dataset_root"] # lm_renders_blender/ - self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models - self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 - - self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) - self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) - self.with_xyz = data_cfg["with_xyz"] - self.depth_factor = data_cfg["depth_factor"] # 1000.0 - - self.cam = data_cfg["cam"] # - self.height = data_cfg["height"] # 480 - self.width = data_cfg["width"] # 640 - - self.cache_dir = data_cfg["cache_dir"] # .cache - self.use_cache = data_cfg["use_cache"] # True - # sample uniformly to get n items - self.n_per_obj = data_cfg.get("n_per_obj", 10000) - self.filter_invalid = data_cfg["filter_invalid"] - ################################################## - if self.cam is None: - self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) - - # NOTE: careful! Only the selected objects - self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs] - # map selected objs to [0, num_objs-1] - self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map - self.label2cat = {label: cat for cat, label in self.cat2label.items()} - self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) - ########################################################## - - def __call__(self): # LM_BLENDER - """Load light-weight instance annotations of all images into a list of - dicts in Detectron2 format. - - Do not load heavy data into memory in this file, since we will - load the annotations of all images into memory. - """ - # cache the dataset_dicts to avoid loading masks from files - hashed_file_name = hashlib.md5( - ( - "".join([str(fn) for fn in self.objs]) - + "dataset_dicts_{}_{}_{}_{}_{}_{}_{}".format( - self.name, - self.dataset_root, - self.with_masks, - self.with_depth, - self.with_xyz, - self.n_per_obj, - __name__, - ) - ).encode("utf-8") - ).hexdigest() - cache_path = osp.join( - self.cache_dir, - "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), - ) - - if osp.exists(cache_path) and self.use_cache: - logger.info("load cached dataset dicts from {}".format(cache_path)) - return mmcv.load(cache_path) - - t_start = time.perf_counter() - - logger.info("loading dataset dicts: {}".format(self.name)) - self.num_instances_without_valid_segmentation = 0 - self.num_instances_without_valid_box = 0 - dataset_dicts = [] ####################################################### - assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}" - - for ann_file, scene_root in zip(tqdm(self.ann_files), self.image_prefixes): - # each scene is an object - assert osp.exists(ann_file), ann_file - scene_gt_dict = mmcv.load(ann_file) - # sample uniformly (equal space) - indices = list(scene_gt_dict.keys()) - if self.n_per_obj > 0: - sample_num = min(self.n_per_obj, len(scene_gt_dict)) - sel_indices_idx = np.linspace(0, len(scene_gt_dict) - 1, sample_num, dtype=np.int32) - sel_indices = [indices[int(_i)] for _i in sel_indices_idx] - else: - sel_indices = indices - - for str_im_id in tqdm(sel_indices): - int_im_id = int(str_im_id) - rgb_path = osp.join(scene_root, "{}.jpg").format(str_im_id) - assert osp.exists(rgb_path), rgb_path - - depth_path = osp.join(scene_root, "{}_depth_opengl.png".format(str_im_id)) - - obj_name = osp.basename(ann_file).split("_")[0] # obj_gt.json - obj_id = ref.lm_full.obj2id[obj_name] - if obj_name not in self.objs: - continue - - record = { - "dataset_name": self.name, - "file_name": osp.relpath(rgb_path, PROJ_ROOT), - "depth_file": osp.relpath(depth_path, PROJ_ROOT), - "height": self.height, - "width": self.width, - "image_id": int_im_id, - "scene_im_id": f"{obj_id}/{int_im_id}", - "cam": self.cam, - "img_type": "syn_blender", # has bg - } - - cur_label = self.obj2label[obj_name] # 0-based label - anno = scene_gt_dict[str_im_id][0] # only one object - R = np.array(anno["cam_R_m2c"]).reshape(3, 3) - t = np.array(anno["cam_t_m2c"]).reshape(-1) / 1000 - pose = np.hstack([R, t.reshape(3, 1)]) - quat = mat2quat(R).astype("float32") - proj = (record["cam"] @ t.T).T - proj = proj[:2] / proj[2] - - bbox_visib = anno["bbox_visib"] - x1, y1, w, h = bbox_visib - - cx, cy = proj - crop_x1 = round(np.clip(cx - 64, 0, self.width - 1)) - crop_x2 = round(np.clip(cx + 64, 0, self.width - 1)) - crop_y1 = round(np.clip(cy - 64, 0, self.height - 1)) - crop_y2 = round(np.clip(cy + 64, 0, self.height - 1)) - - # convert to xywh - crop_w = crop_x2 - crop_x1 - crop_h = crop_y2 - crop_y1 - bbox_128 = [crop_x1, crop_y1, crop_w, crop_h] - - if self.filter_invalid: - if h <= 1 or w <= 1: - self.num_instances_without_valid_box += 1 - continue - - mask_path = osp.join(scene_root, "{}_mask_opengl.png".format(str_im_id)) - mask = mmcv.imread(mask_path, "unchanged") - mask = (mask > 0).astype(np.uint8) - - area = mask.sum() - if area < 3: # filter out too small or nearly invisible instances - self.num_instances_without_valid_segmentation += 1 - continue - mask_rle = binary_mask_to_rle(mask, compressed=True) - - xyz_path = osp.join(scene_root, "{}_xyz_bop.pkl".format(str_im_id)) - assert osp.exists(xyz_path), xyz_path - - visib_fract = anno.get("visib_fract", 1.0) - inst = { - "category_id": cur_label, # 0-based label - "bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib - "bbox_mode": BoxMode.XYWH_ABS, - "bbox_crop": bbox_128, - "pose": pose, - "quat": quat, - "trans": t, - "centroid_2d": proj, # absolute (cx, cy) - "segmentation": mask_rle, - "xyz_path": xyz_path, - "visib_fract": visib_fract, - "mask_full": mask_rle, # NOTE! - } - - model_info = self.models_info[str(obj_id)] - inst["model_info"] = model_info - for key in ["bbox3d_and_center"]: - inst[key] = self.models[cur_label][key] - record["annotations"] = [inst] - dataset_dicts.append(record) - - if self.num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - self.num_instances_without_valid_segmentation - ) - ) - if self.num_instances_without_valid_box > 0: - logger.warning( - "Filtered out {} instances without valid box. " - "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) - ) - ########################################################################## - # if self.num_to_load > 0: - # self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) - # random.shuffle(dataset_dicts) - # dataset_dicts = dataset_dicts[: self.num_to_load] - logger.info( - "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start) - ) - - mmcv.dump(dataset_dicts, cache_path, protocol=4) - logger.info("Dumped dataset_dicts to {}".format(cache_path)) - return dataset_dicts - - @lazy_property - def models_info(self): - models_info_path = osp.join(self.models_root, "models_info.json") - assert osp.exists(models_info_path), models_info_path - models_info = mmcv.load(models_info_path) # key is str(obj_id) - return models_info - - @lazy_property - def models(self): - """Load models into a list.""" - cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs))) - if osp.exists(cache_path) and self.use_cache: - # dprint("{}: load cached object models from {}".format(self.name, cache_path)) - return mmcv.load(cache_path) - - models = [] - for obj_name in self.objs: - model = inout.load_ply( - osp.join( - self.models_root, - f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply", - ), - vertex_scale=self.scale_to_meter, - ) - # NOTE: the bbox3d_and_center is not obtained from centered vertices - # for BOP models, not a big problem since they had been centered - model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) - - models.append(model) - logger.info("cache models to {}".format(cache_path)) - mmcv.dump(models, cache_path, protocol=4) - return models - - def image_aspect_ratio(self): - return self.width / self.height # 4/3 - - -########### register datasets ############################################################ - - -def get_lm_metadata(obj_names, ref_key): - """task specific metadata.""" - - data_ref = ref.__dict__[ref_key] - - cur_sym_infos = {} # label based key - loaded_models_info = data_ref.get_models_info() - - for i, obj_name in enumerate(obj_names): - obj_id = data_ref.obj2id[obj_name] - model_info = loaded_models_info[str(obj_id)] - if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: - sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) - sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) - else: - sym_info = None - cur_sym_infos[i] = sym_info - - meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} - return meta - - -LM_13_OBJECTS = [ - "ape", - "benchvise", - "camera", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", - "iron", - "lamp", - "phone", -] # no bowl, cup -LM_OCC_OBJECTS = [ - "ape", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", -] -################################################################################ - -SPLITS_LM_BLENDER = dict( - lm_blender_13_train=dict( - name="lm_blender_13_train", # BB8 training set - dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_13_OBJECTS, # selected objects - ann_files=[ - osp.join( - DATASETS_ROOT, - "lm_renders_blender/renders/{}_gt.json".format(_obj), - ) - for _obj in LM_13_OBJECTS - ], - image_prefixes=[ - osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_13_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - with_xyz=True, - depth_factor=1000.0, - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=-1, # num per class, -1 for all 10k - filter_invalid=False, - ref_key="lm_full", - ), - lmo_blender_train=dict( - name="lmo_blender_train", - dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - objs=LM_OCC_OBJECTS, # selected objects - ann_files=[ - osp.join( - DATASETS_ROOT, - "lm_renders_blender/renders/{}_gt.json".format(_obj), - ) - for _obj in LM_OCC_OBJECTS - ], - image_prefixes=[ - osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_OCC_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - with_xyz=True, - depth_factor=1000.0, - cam=ref.lmo_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=-1, # n per class, -1 for all 10k - filter_invalid=False, - ref_key="lmo_full", - ), -) - -# single obj splits -for obj in ref.lm_full.objects: - for split in ["train"]: - for name_prefix in ["lm", "lmo"]: - name = "{}_blender_{}_{}".format(name_prefix, obj, split) - ref_key = f"{name_prefix}_full" - ann_files = [ - osp.join( - DATASETS_ROOT, - "lm_renders_blender/renders/{}_gt.json".format(obj), - ) - ] - if split in ["train"]: - filter_invalid = True - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM_BLENDER: - SPLITS_LM_BLENDER[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"), - models_root=osp.join(DATASETS_ROOT, f"BOP_DATASETS/{name_prefix}/models"), - objs=[obj], # only this obj - ann_files=ann_files, - image_prefixes=[osp.join(DATASETS_ROOT, f"lm_renders_blender/renders/{obj}")], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - with_xyz=True, - depth_factor=1000.0, - cam=ref.__dict__[ref_key].camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=-1, - filter_invalid=False, - ref_key=ref_key, - ) - - -def register_with_name_cfg(name, data_cfg=None): - """Assume pre-defined datasets live in `./datasets`. - - Args: - name: datasnet_name, - data_cfg: if name is in existing SPLITS, use pre-defined data_cfg - otherwise requires data_cfg - data_cfg can be set in cfg.DATA_CFG.name - """ - dprint("register dataset: {}".format(name)) - if name in SPLITS_LM_BLENDER: - used_cfg = SPLITS_LM_BLENDER[name] - else: - assert data_cfg is not None, f"dataset name {name} is not registered" - used_cfg = data_cfg - DatasetCatalog.register(name, LM_BLENDER_Dataset(used_cfg)) - # something like eval_types - MetadataCatalog.get(name).set( - ref_key=used_cfg["ref_key"], - objs=used_cfg["objs"], - eval_error_types=["ad", "rete", "proj"], - evaluator_type="bop", - **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), - ) - - -def get_available_datasets(): - return list(SPLITS_LM_BLENDER.keys()) - - -#### tests ############################################### -def test_vis(): - dset_name = sys.argv[1] - assert dset_name in DatasetCatalog.list() - - meta = MetadataCatalog.get(dset_name) - dprint("MetadataCatalog: ", meta) - objs = meta.objs - - t_start = time.perf_counter() - dicts = DatasetCatalog.get(dset_name) - logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) - - dirname = "output/{}-data-vis".format(dset_name) - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = read_image_mmcv(d["file_name"], format="BGR") - depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 - - anno = d["annotations"][0] # only one instance per image - imH, imW = img.shape[:2] - mask = cocosegm2mask(anno["segmentation"], imH, imW) - bbox = anno["bbox"] - bbox_mode = anno["bbox_mode"] - bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS)) - kpt3d = anno["bbox3d_and_center"] - quat = anno["quat"] - trans = anno["trans"] - R = quat2mat(quat) - # 0-based label - cat_id = anno["category_id"] - K = d["cam"] - kpt_2d = misc.project_pts(kpt3d, K, R, trans) - # # TODO: visualize pose and keypoints - label = objs[cat_id] - # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) - img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label]) - img_vis_kpt2d = img.copy() - img_vis_kpt2d = misc.draw_projected_box3d( - img_vis_kpt2d, - kpt_2d, - middle_color=None, - bottom_color=(128, 128, 128), - ) - - xyz_info = mmcv.load(anno["xyz_path"]) - xyz = np.zeros((imH, imW, 3), dtype=np.float32) - xyz_crop = xyz_info["xyz_crop"].astype(np.float32) - x1, y1, x2, y2 = xyz_info["xyxy"] - xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop - xyz_show = get_emb_show(xyz) - - grid_show( - [ - img[:, :, [2, 1, 0]], - img_vis[:, :, [2, 1, 0]], - img_vis_kpt2d[:, :, [2, 1, 0]], - depth, - xyz_show, - ], - ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"], - row=2, - col=3, - ) - - -if __name__ == "__main__": - """Test the dataset loader. - - Usage: - python -m core.datasets.lm_blender dataset_name - """ - from lib.vis_utils.image import grid_show - from lib.utils.setup_logger import setup_my_logger - - import detectron2.data.datasets # noqa # add pre-defined metadata - from lib.vis_utils.image import vis_image_mask_bbox_cv2 - from core.utils.utils import get_emb_show - from core.utils.data_utils import read_image_mmcv - - print("sys.argv:", sys.argv) - logger = setup_my_logger(name="core") - register_with_name_cfg(sys.argv[1]) - print("dataset catalog: ", DatasetCatalog.list()) - test_vis() diff --git a/core/gdrn_modeling/datasets/lm_dataset_crop_d2.py b/core/gdrn_modeling/datasets/lm_dataset_crop_d2.py deleted file mode 100644 index d210367..0000000 --- a/core/gdrn_modeling/datasets/lm_dataset_crop_d2.py +++ /dev/null @@ -1,555 +0,0 @@ -import hashlib -import logging -import os -import os.path as osp -import sys -import time -from collections import OrderedDict - -import mmcv -import numpy as np -from tqdm import tqdm -from transforms3d.quaternions import mat2quat, quat2mat -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode - -cur_dir = osp.dirname(osp.abspath(__file__)) -PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../..")) -sys.path.insert(0, PROJ_ROOT) - -import ref - -from lib.pysixd import inout, misc -from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask -from lib.utils.utils import dprint, iprint, lazy_property - - -logger = logging.getLogger(__name__) -DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) - - -class LM_CROP_Dataset(object): - """lm crop splits.""" - - def __init__(self, data_cfg): - """ - Set with_depth and with_masks default to True, - and decide whether to load them into dataloader/network later - with_masks: - """ - self.name = data_cfg["name"] - self.data_cfg = data_cfg - - self.objs = data_cfg["objs"] # selected objects - - self.ann_files = data_cfg["ann_files"] # idx files with image ids - self.image_prefixes = data_cfg["image_prefixes"] - - self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/lm/ - assert osp.exists(self.dataset_root), self.dataset_root - self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models - self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 - - self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) - self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) - self.depth_factor = data_cfg["depth_factor"] # 1000.0 - - self.cam_type = data_cfg["cam_type"] - self.cam = data_cfg["cam"] # - self.height = data_cfg["height"] # 480 - self.width = data_cfg["width"] # 640 - - self.cache_dir = data_cfg["cache_dir"] # .cache - self.use_cache = data_cfg["use_cache"] # True - self.num_to_load = data_cfg["num_to_load"] # -1 - self.filter_invalid = data_cfg["filter_invalid"] - self.filter_scene = data_cfg.get("filter_scene", False) - ################################################## - if self.cam is None: - assert self.cam_type in ["local", "dataset"] - if self.cam_type == "dataset": - self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) - elif self.cam_type == "local": - # self.cam = np.array([[539.8100, 0, 318.2700], [0, 539.8300, 239.5600], [0, 0, 1]]) - # yapf: disable - self.cam = np.array( - [[518.81993115, 0., 320.50653699], - [0., 518.86581081, 243.5604188 ], - [0., 0., 1. ]]) - # yapf: enable - # RMS: 0.14046169348724977 - # camera matrix: - # [[518.81993115 0. 320.50653699] - # [ 0. 518.86581081 243.5604188 ] - # [ 0. 0. 1. ]] - # distortion coefficients: [ 0.04147325 -0.21469544 -0.00053707 -0.00251986 0.17406399] - - # NOTE: careful! Only the selected objects - self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs] - # map selected objs to [0, num_objs-1] - self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map - self.label2cat = {label: cat for cat, label in self.cat2label.items()} - self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) - ########################################################## - - def __call__(self): # LM_CROP_Dataset - """Load light-weight instance annotations of all images into a list of - dicts in Detectron2 format. - - Do not load heavy data into memory in this file, since we will - load the annotations of all images into memory. - """ - # cache the dataset_dicts to avoid loading masks from files - hashed_file_name = hashlib.md5( - ( - "".join([str(fn) for fn in self.objs]) - + "dataset_dicts_{}_{}_{}_{}_{}".format( - self.name, self.dataset_root, self.with_masks, self.with_depth, self.cam_type - ) - ).encode("utf-8") - ).hexdigest() - cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name)) - - if osp.exists(cache_path) and self.use_cache: - logger.info("load cached dataset dicts from {}".format(cache_path)) - return mmcv.load(cache_path) - - t_start = time.perf_counter() - - logger.info("loading dataset dicts: {}".format(self.name)) - self.num_instances_without_valid_segmentation = 0 - self.num_instances_without_valid_box = 0 - dataset_dicts = [] ####################################################### - assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}" - unique_im_id = 0 - for ann_file, scene_root in zip(self.ann_files, self.image_prefixes): - # linemod each scene is an object - with open(ann_file, "r") as f_ann: - indices = [line.strip("\r\n") for line in f_ann.readlines()] # string ids - gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json")) - gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json")) # bbox_obj, bbox_visib - for im_id in tqdm(indices): - int_im_id = int(im_id) - rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id) - assert osp.exists(rgb_path), rgb_path - - depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) - - scene_id = int(rgb_path.split("/")[-3]) - scene_im_id = "{}/{}".format(scene_id, int_im_id) - if self.filter_scene: - if scene_id not in self.cat_ids: - continue - record = { - "dataset_name": self.name, - "file_name": osp.relpath(rgb_path, PROJ_ROOT), - "depth_file": osp.relpath(depth_path, PROJ_ROOT), - "height": self.height, - "width": self.width, - "image_id": unique_im_id, - "scene_im_id": scene_im_id, # for evaluation - "cam": self.cam, - "depth_factor": self.depth_factor, - "img_type": "real", - } - unique_im_id += 1 - insts = [] - for anno_i, anno in enumerate(gt_dict[im_id]): - obj_id = anno["obj_id"] - if obj_id not in self.cat_ids: - continue - cur_label = self.cat2label[obj_id] # 0-based label - R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) - t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 - pose = np.hstack([R, t.reshape(3, 1)]) - quat = mat2quat(R).astype("float32") - - proj = (record["cam"] @ t.T).T - proj = proj[:2] / proj[2] - - bbox_visib = gt_info_dict[im_id][anno_i]["bbox_visib"] - bbox_obj = gt_info_dict[im_id][anno_i]["bbox_obj"] - x1, y1, w, h = bbox_visib - - cx, cy = proj - crop_x1 = round(np.clip(cx - 64, 0, self.width - 1)) - crop_x2 = round(np.clip(cx + 64, 0, self.width - 1)) - crop_y1 = round(np.clip(cy - 64, 0, self.height - 1)) - crop_y2 = round(np.clip(cy + 64, 0, self.height - 1)) - - # convert to xywh - crop_w = crop_x2 - crop_x1 - crop_h = crop_y2 - crop_y1 - bbox_128 = [crop_x1, crop_y1, crop_w, crop_h] - - if self.filter_invalid: - if h <= 1 or w <= 1: - self.num_instances_without_valid_box += 1 - continue - - mask_file = osp.join(scene_root, "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i)) - mask_visib_file = osp.join(scene_root, "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i)) - assert osp.exists(mask_file), mask_file - assert osp.exists(mask_visib_file), mask_visib_file - # load mask visib TODO: load both mask_visib and mask_full - mask_single = mmcv.imread(mask_visib_file, "unchanged") - mask_single = mask_single.astype("bool") - area = mask_single.sum() - if area < 3: # filter out too small or nearly invisible instances - self.num_instances_without_valid_segmentation += 1 - continue - mask_rle = binary_mask_to_rle(mask_single, compressed=True) - # load mask full - mask_full = mmcv.imread(mask_file, "unchanged") - mask_full = mask_full.astype("bool") - mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) - - inst = { - "category_id": cur_label, # 0-based label - "bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib - "bbox_mode": BoxMode.XYWH_ABS, - "bbox_crop": bbox_128, - "pose": pose, - "quat": quat, - "trans": t, - "centroid_2d": proj, # absolute (cx, cy) - "segmentation": mask_rle, - "mask_full": mask_full_rle, - } - model_info = self.models_info[str(obj_id)] - inst["model_info"] = model_info - for key in ["bbox3d_and_center"]: - inst[key] = self.models[cur_label][key] - insts.append(inst) - if len(insts) == 0: # filter im without anno - continue - record["annotations"] = insts - dataset_dicts.append(record) - - if self.num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - self.num_instances_without_valid_segmentation - ) - ) - if self.num_instances_without_valid_box > 0: - logger.warning( - "Filtered out {} instances without valid box. " - "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) - ) - ########################################################################## - if self.num_to_load > 0: - self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) - dataset_dicts = dataset_dicts[: self.num_to_load] - logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) - - mmcv.mkdir_or_exist(osp.dirname(cache_path)) - mmcv.dump(dataset_dicts, cache_path, protocol=4) - logger.info("Dumped dataset_dicts to {}".format(cache_path)) - return dataset_dicts - - @lazy_property - def models_info(self): - models_info_path = osp.join(self.models_root, "models_info.json") - assert osp.exists(models_info_path), models_info_path - models_info = mmcv.load(models_info_path) # key is str(obj_id) - return models_info - - @lazy_property - def models(self): - """Load models into a list.""" - cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs))) - if osp.exists(cache_path) and self.use_cache: - # dprint("{}: load cached object models from {}".format(self.name, cache_path)) - return mmcv.load(cache_path) - - models = [] - for obj_name in self.objs: - model = inout.load_ply( - osp.join( - self.models_root, - f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply", - ), - vertex_scale=self.scale_to_meter, - ) - # NOTE: the bbox3d_and_center is not obtained from centered vertices - # for BOP models, not a big problem since they had been centered - model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) - - models.append(model) - logger.info("cache models to {}".format(cache_path)) - mmcv.mkdir_or_exist(osp.dirname(cache_path)) - mmcv.dump(models, cache_path, protocol=4) - return models - - def __len__(self): - # return len(self.images) - return self.num_to_load - - def image_aspect_ratio(self): - return self.width / self.height # 4/3 - - -########### register datasets ############################################################ - - -def get_lm_metadata(obj_names, ref_key): - """task specific metadata.""" - - data_ref = ref.__dict__[ref_key] - - cur_sym_infos = {} # label based key - loaded_models_info = data_ref.get_models_info() - - for i, obj_name in enumerate(obj_names): - obj_id = data_ref.obj2id[obj_name] - model_info = loaded_models_info[str(obj_id)] - if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: - sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) - sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) - else: - sym_info = None - cur_sym_infos[i] = sym_info - - meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} - return meta - - -LM_CROP_11_OBJECTS = [ - "ape", - "benchvise", - "camera", - "can", - "cat", - "driller", - "duck", - "holepuncher", - "iron", - "lamp", - "phone", -] # no bowl, cup, eggbox, glue -################################################################################ - -SPLITS_LM_CROP = dict( - lm_crop_11_train=dict( - name="lm_crop_11_train", - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_CROP_11_OBJECTS, # selected objects - ann_files=[ - osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format("train", _obj)) - for _obj in LM_CROP_11_OBJECTS - ], - image_prefixes=[ - osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj])) - for _obj in LM_CROP_11_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam_type="dataset", - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=True, - filter_invalid=False, - ref_key="lm_full", - ), - lm_crop_11_test=dict( - name="lm_crop_11_test", - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_CROP_11_OBJECTS, - ann_files=[ - osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format("test", _obj)) - for _obj in LM_CROP_11_OBJECTS - ], - # NOTE: scene root - image_prefixes=[ - osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj]) - for _obj in LM_CROP_11_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam_type="dataset", - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=True, - filter_invalid=False, - ref_key="lm_full", - ), -) - -# single obj splits -for obj in ref.lm_full.objects: - for split in ["train", "test"]: - name = "lm_crop_{}_{}".format(obj, split) - ann_files = [osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format(split, obj))] - if split in ["train"]: - filter_invalid = True - elif split in ["test"]: - filter_invalid = False - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM_CROP: - SPLITS_LM_CROP[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=[obj], # only this obj - ann_files=ann_files, - image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam_type="dataset", - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=False, - filter_scene=True, - ref_key="lm_full", - ) - - -def register_with_name_cfg(name, data_cfg=None): - """Assume pre-defined datasets live in `./datasets`. - - Args: - name: datasnet_name, - data_cfg: if name is in existing SPLITS, use pre-defined data_cfg - otherwise requires data_cfg - data_cfg can be set in cfg.DATA_CFG.name - """ - dprint("register dataset: {}".format(name)) - if name in SPLITS_LM_CROP: - used_cfg = SPLITS_LM_CROP[name] - else: - assert data_cfg is not None, f"dataset name {name} is not registered" - used_cfg = data_cfg - DatasetCatalog.register(name, LM_CROP_Dataset(used_cfg)) - # something like eval_types - MetadataCatalog.get(name).set( - ref_key=used_cfg["ref_key"], - objs=used_cfg["objs"], - eval_error_types=["ad", "rete", "proj"], - evaluator_type="bop", - **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), - ) - - -def get_available_datasets(): - return list(SPLITS_LM_CROP.keys()) - - -#### tests ############################################### -def test_vis(): - # python -m this_module lmo_test - dset_name = sys.argv[1] - assert dset_name in DatasetCatalog.list() - - meta = MetadataCatalog.get(dset_name) - dprint("MetadataCatalog: ", meta) - objs = meta.objs - - t_start = time.perf_counter() - dicts = DatasetCatalog.get(dset_name) - logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) - - dirname = "output/{}-data-vis".format(dset_name) - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = read_image_mmcv(d["file_name"], format="BGR") - depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 - - imH, imW = img.shape[:2] - annos = d["annotations"] - masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] - bboxes = [anno["bbox"] for anno in annos] - bbox_modes = [anno["bbox_mode"] for anno in annos] - bboxes_xyxy = np.array( - [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] - ) - kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] - quats = [anno["quat"] for anno in annos] - centers_2d = [anno["centroid_2d"] for anno in annos] - bboxes_128 = [] - for center_2d in centers_2d: - cx, cy = center_2d - bboxes_128.append([cx - 64, cy - 64, cx + 64, cy + 64]) - bboxes_128 = np.array(bboxes_128) - bboxes_128[:, 0] = np.clip(bboxes_128[:, 0], 0, imW - 1) - bboxes_128[:, 2] = np.clip(bboxes_128[:, 2], 0, imW - 1) - bboxes_128[:, 1] = np.clip(bboxes_128[:, 1], 0, imH - 1) - bboxes_128[:, 3] = np.clip(bboxes_128[:, 3], 0, imH - 1) - - transes = [anno["trans"] for anno in annos] - Rs = [quat2mat(quat) for quat in quats] - # 0-based label - cat_ids = [anno["category_id"] for anno in annos] - K = d["cam"] - kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] - # visualizer = Visualizer(img[:, :, [2,1,0]], metadata=meta) - # vis = visualizer.draw_dataset_dict(d) # TODO: add pose visualization and depth visualization - # # fpath = osp.join(dirname, osp.basename(d["file_name"])) - # # vis.save(fpath) - # img_vis = vis.get_image()[:, :, [2,1,0]] - # # TODO: visualize pose and keypoints - labels = [objs[cat_id] for cat_id in cat_ids] - # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) - img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels) - img_vis_kpts2d = img.copy() - for anno_i in range(len(annos)): - img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i]) - img_vis_bbox_128 = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_128, labels=labels) - # grid_show( - # [img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_bbox_64[:, :, ::-1], img_vis_kpts2d[:, :, [2, 1, 0]], depth], - # ["img", "vis_img", "img_vis_bbox_64", "img_vis_kpts2d", "depth"], - # row=2, - # col=3, - # ) - - grid_show( - [img_vis_bbox_128[:, :, ::-1], depth], - ["img_vis_bbox_128", "depth"], - row=1, - col=2, - ) - - -if __name__ == "__main__": - """Test the dataset loader. - - Usage: - python -m core.datasets.lm_dataset_d2 dataset_name - """ - from lib.vis_utils.image import grid_show - from lib.utils.setup_logger import setup_my_logger - from detectron2.utils.visualizer import Visualizer - import detectron2.data.datasets # noqa # add pre-defined metadata - from lib.pysixd import misc - from core.utils.data_utils import read_image_mmcv - from lib.vis_utils.image import vis_image_mask_bbox_cv2 - from lib.utils.mask_utils import cocosegm2mask - from lib.utils.bbox_utils import xywh_to_xyxy - - print("sys.argv:", sys.argv) - logger = setup_my_logger(name="core") - register_with_name_cfg(sys.argv[1]) - print("dataset catalog: ", DatasetCatalog.list()) - test_vis() diff --git a/core/gdrn_modeling/datasets/lm_new_duck_pbr.py b/core/gdrn_modeling/datasets/lm_new_duck_pbr.py deleted file mode 100644 index b3f8ddd..0000000 --- a/core/gdrn_modeling/datasets/lm_new_duck_pbr.py +++ /dev/null @@ -1,491 +0,0 @@ -import hashlib -import logging -import os -import os.path as osp -import sys -import time -from collections import OrderedDict -import mmcv -import numpy as np -from tqdm import tqdm -from transforms3d.quaternions import mat2quat, quat2mat -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode - -cur_dir = osp.dirname(osp.abspath(__file__)) -PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../..")) -sys.path.insert(0, PROJ_ROOT) - -import ref -from lib.pysixd import inout, misc -from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask -from lib.utils.utils import dprint, lazy_property - - -logger = logging.getLogger(__name__) -DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) - - -class LM_NEW_DUCK_PBR_Dataset: - def __init__(self, data_cfg): - """ - Set with_depth and with_masks default to True, - and decide whether to load them into dataloader/network later - with_masks: - """ - self.name = data_cfg["name"] - self.data_cfg = data_cfg - - self.objs = data_cfg["objs"] # selected objects - - self.dataset_root = data_cfg.get( - "dataset_root", - osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"), - ) - assert osp.exists(self.dataset_root), self.dataset_root - self.models_root = data_cfg["models_root"] # duck_fabi/models - self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 - - self.with_masks = data_cfg["with_masks"] - self.with_depth = data_cfg["with_depth"] - - self.height = data_cfg["height"] - self.width = data_cfg["width"] - - self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache - self.use_cache = data_cfg.get("use_cache", True) - self.num_to_load = data_cfg.get("num_to_load", -1) # -1 - self.filter_invalid = data_cfg.get("filter_invalid", True) - ################################################## - - # NOTE: careful! Only the selected objects - self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_duck_fabi.id2obj.items() if obj_name in self.objs] - # map selected objs to [0, num_objs-1] - self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map - self.label2cat = {label: cat for cat, label in self.cat2label.items()} - self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) - ########################################################## - - self.scenes = [f"{i:06d}" for i in range(50)] - - def __call__(self): - """Load light-weight instance annotations of all images into a list of - dicts in Detectron2 format. - - Do not load heavy data into memory in this file, since we will - load the annotations of all images into memory. - """ - # cache the dataset_dicts to avoid loading masks from files - hashed_file_name = hashlib.md5( - ( - "".join([str(fn) for fn in self.objs]) - + "dataset_dicts_{}_{}_{}_{}_{}".format( - self.name, - self.dataset_root, - self.with_masks, - self.with_depth, - __name__, - ) - ).encode("utf-8") - ).hexdigest() - cache_path = osp.join( - self.cache_dir, - "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), - ) - - if osp.exists(cache_path) and self.use_cache: - logger.info("load cached dataset dicts from {}".format(cache_path)) - return mmcv.load(cache_path) - - t_start = time.perf_counter() - - logger.info("loading dataset dicts: {}".format(self.name)) - self.num_instances_without_valid_segmentation = 0 - self.num_instances_without_valid_box = 0 - dataset_dicts = [] # ###################################################### - # it is slow because of loading and converting masks to rle - for scene in tqdm(self.scenes): - scene_id = int(scene) - scene_root = osp.join(self.dataset_root, scene) - - gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json")) - gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json")) - cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json")) - - for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"): - int_im_id = int(str_im_id) - rgb_path = osp.join(scene_root, "rgb/{:06d}.jpg").format(int_im_id) - assert osp.exists(rgb_path), rgb_path - - depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) - - scene_im_id = f"{scene_id}/{int_im_id}" - - K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) - depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"] # 10000 - - record = { - "dataset_name": self.name, - "file_name": osp.relpath(rgb_path, PROJ_ROOT), - "depth_file": osp.relpath(depth_path, PROJ_ROOT), - "height": self.height, - "width": self.width, - "image_id": int_im_id, - "scene_im_id": scene_im_id, # for evaluation - "cam": K, - "depth_factor": depth_factor, - "img_type": "syn_pbr", # NOTE: has background - } - insts = [] - for anno_i, anno in enumerate(gt_dict[str_im_id]): - obj_id = anno["obj_id"] - if obj_id not in self.cat_ids: - continue - cur_label = self.cat2label[obj_id] # 0-based label - R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) - t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 - pose = np.hstack([R, t.reshape(3, 1)]) - quat = mat2quat(R).astype("float32") - - proj = (record["cam"] @ t.T).T - proj = proj[:2] / proj[2] - - bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"] - bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"] - x1, y1, w, h = bbox_visib - if self.filter_invalid: - if h <= 1 or w <= 1: - self.num_instances_without_valid_box += 1 - continue - - mask_file = osp.join( - scene_root, - "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i), - ) - mask_visib_file = osp.join( - scene_root, - "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i), - ) - assert osp.exists(mask_file), mask_file - assert osp.exists(mask_visib_file), mask_visib_file - # load mask visib - mask_single = mmcv.imread(mask_visib_file, "unchanged") - mask_single = mask_single.astype("bool") - area = mask_single.sum() - if area < 30: # filter out too small or nearly invisible instances - self.num_instances_without_valid_segmentation += 1 - continue - mask_rle = binary_mask_to_rle(mask_single, compressed=True) - - # load mask full - mask_full = mmcv.imread(mask_file, "unchanged") - mask_full = mask_full.astype("bool") - mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) - - visib_fract = gt_info_dict[str_im_id][anno_i].get("visib_fract", 1.0) - - inst = { - "category_id": cur_label, # 0-based label - "bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib - "bbox_mode": BoxMode.XYWH_ABS, - "pose": pose, - "quat": quat, - "trans": t, - "centroid_2d": proj, # absolute (cx, cy) - "segmentation": mask_rle, - "mask_full": mask_full_rle, - "visib_fract": visib_fract, - } - - model_info = self.models_info[str(obj_id)] - inst["model_info"] = model_info - for key in ["bbox3d_and_center"]: - inst[key] = self.models[cur_label][key] - insts.append(inst) - if len(insts) == 0: # filter im without anno - continue - record["annotations"] = insts - dataset_dicts.append(record) - - if self.num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - self.num_instances_without_valid_segmentation - ) - ) - if self.num_instances_without_valid_box > 0: - logger.warning( - "Filtered out {} instances without valid box. " - "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) - ) - ########################################################################## - if self.num_to_load > 0: - self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) - dataset_dicts = dataset_dicts[: self.num_to_load] - logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) - - mmcv.mkdir_or_exist(osp.dirname(cache_path)) - mmcv.dump(dataset_dicts, cache_path, protocol=4) - logger.info("Dumped dataset_dicts to {}".format(cache_path)) - return dataset_dicts - - @lazy_property - def models_info(self): - models_info_path = osp.join(self.models_root, "models_info.json") - assert osp.exists(models_info_path), models_info_path - models_info = mmcv.load(models_info_path) # key is str(obj_id) - return models_info - - @lazy_property - def models(self): - """Load models into a list.""" - cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs))) - if osp.exists(cache_path) and self.use_cache: - # logger.info("load cached object models from {}".format(cache_path)) - return mmcv.load(cache_path) - - models = [] - for obj_name in self.objs: - model = inout.load_ply( - osp.join( - self.models_root, - f"obj_{ref.lm_duck_fabi.obj2id[obj_name]:06d}.ply", - ), - vertex_scale=self.scale_to_meter, - ) - # NOTE: the bbox3d_and_center is not obtained from centered vertices - # for BOP models, not a big problem since they had been centered - model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) - models.append(model) - logger.info("cache models to {}".format(cache_path)) - mmcv.dump(models, cache_path, protocol=4) - return models - - def image_aspect_ratio(self): - return self.width / self.height - - -########### register datasets ############################################################ - - -def get_lm_metadata(obj_names, ref_key): - """task specific metadata.""" - - data_ref = ref.__dict__[ref_key] - - cur_sym_infos = {} # label based key - loaded_models_info = data_ref.get_models_info() - - for i, obj_name in enumerate(obj_names): - obj_id = data_ref.obj2id[obj_name] - model_info = loaded_models_info[str(obj_id)] - if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: - sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) - sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) - else: - sym_info = None - cur_sym_infos[i] = sym_info - - meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} - return meta - - -LM_13_OBJECTS = [ - "ape", - "benchvise", - "camera", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", - "iron", - "lamp", - "phone", -] # no bowl, cup -LM_OCC_OBJECTS = [ - "ape", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", -] -################################################################################ - - -SPLITS_LM_NEW_DUCK_PBR = dict( - lm_new_duck_pbr_13_train=dict( - name="lm_new_duck_pbr_13_train", - objs=LM_13_OBJECTS, # selected objects - dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"), - models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"), - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=720, - width=1280, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=True, - ref_key="lm_duck_fabi", - ), - lm_new_duck_pbr_8_train=dict( - name="lm_new_duck_pbr_8_train", - objs=LM_OCC_OBJECTS, # selected objects - dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"), - models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"), - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=720, - width=1280, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=True, - ref_key="lm_duck_fabi", # TODO: maybe have bug - ), -) - -# single obj splits -for obj in ref.lm_duck_fabi.objects: - for split in ["train"]: - name = "lm_new_duck_pbr_{}_{}".format(obj, split) - if split in ["train"]: - filter_invalid = True - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM_NEW_DUCK_PBR: - SPLITS_LM_NEW_DUCK_PBR[name] = dict( - name=name, - objs=[obj], # only this obj - dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"), - models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"), - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=720, - width=1280, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=filter_invalid, - ref_key="lm_duck_fabi", - ) - - -def register_with_name_cfg(name, data_cfg=None): - """Assume pre-defined datasets live in `./datasets`. - - Args: - name: datasnet_name, - data_cfg: if name is in existing SPLITS, use pre-defined data_cfg - otherwise requires data_cfg - data_cfg can be set in cfg.DATA_CFG.name - """ - dprint("register dataset: {}".format(name)) - if name in SPLITS_LM_NEW_DUCK_PBR: - used_cfg = SPLITS_LM_NEW_DUCK_PBR[name] - else: - assert data_cfg is not None, f"dataset name {name} is not registered" - used_cfg = data_cfg - DatasetCatalog.register(name, LM_NEW_DUCK_PBR_Dataset(used_cfg)) - # something like eval_types - MetadataCatalog.get(name).set( - ref_key=used_cfg["ref_key"], - objs=used_cfg["objs"], - eval_error_types=["ad", "rete", "proj"], - evaluator_type="bop", - **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), - ) - - -def get_available_datasets(): - return list(SPLITS_LM_NEW_DUCK_PBR.keys()) - - -#### tests ############################################### -def test_vis(): - dset_name = sys.argv[1] - assert dset_name in DatasetCatalog.list() - - meta = MetadataCatalog.get(dset_name) - dprint("MetadataCatalog: ", meta) - objs = meta.objs - - t_start = time.perf_counter() - dicts = DatasetCatalog.get(dset_name) - logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) - - dirname = "output/{}-data-vis".format(dset_name) - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = read_image_mmcv(d["file_name"], format="BGR") - depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0 - - imH, imW = img.shape[:2] - annos = d["annotations"] - masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] - bboxes = [anno["bbox"] for anno in annos] - bbox_modes = [anno["bbox_mode"] for anno in annos] - bboxes_xyxy = np.array( - [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] - ) - kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] - quats = [anno["quat"] for anno in annos] - transes = [anno["trans"] for anno in annos] - Rs = [quat2mat(quat) for quat in quats] - # 0-based label - cat_ids = [anno["category_id"] for anno in annos] - K = d["cam"] - kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] - # # TODO: visualize pose and keypoints - labels = [objs[cat_id] for cat_id in cat_ids] - for _i in range(len(annos)): - img_vis = vis_image_mask_bbox_cv2( - img, - masks[_i : _i + 1], - bboxes=bboxes_xyxy[_i : _i + 1], - labels=labels[_i : _i + 1], - ) - img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) - - grid_show( - [ - img[:, :, [2, 1, 0]], - img_vis[:, :, [2, 1, 0]], - img_vis_kpts2d[:, :, [2, 1, 0]], - depth, - ], - ["img", "vis_img", "img_vis_kpts2d", "depth"], - row=2, - col=2, - ) - - -if __name__ == "__main__": - """Test the dataset loader. - - Usage: - python -m core.gdrn_modeling.datasets.lm_new_duck_pbr dataset_name - """ - from lib.vis_utils.image import grid_show - from lib.utils.setup_logger import setup_my_logger - - import detectron2.data.datasets # noqa # add pre-defined metadata - from lib.vis_utils.image import vis_image_mask_bbox_cv2 - from core.utils.data_utils import read_image_mmcv - - print("sys.argv:", sys.argv) - logger = setup_my_logger(name="core") - register_with_name_cfg(sys.argv[1]) - print("dataset catalog: ", DatasetCatalog.list()) - - test_vis() diff --git a/core/gdrn_modeling/datasets/lm_syn_egl.py b/core/gdrn_modeling/datasets/lm_syn_egl.py deleted file mode 100644 index af5f9ad..0000000 --- a/core/gdrn_modeling/datasets/lm_syn_egl.py +++ /dev/null @@ -1,542 +0,0 @@ -import hashlib -import logging -import os -import os.path as osp -import sys - -cur_dir = osp.dirname(osp.abspath(__file__)) -PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../..")) -sys.path.insert(0, PROJ_ROOT) -import time -from collections import OrderedDict -import mmcv -import numpy as np -from tqdm import tqdm -from transforms3d.quaternions import mat2quat, quat2mat -import ref -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode -from lib.pysixd import inout, misc -from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask, mask2bbox_xywh -from lib.utils.utils import dprint, iprint, lazy_property - - -logger = logging.getLogger(__name__) -DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) - - -class LM_SYN_EGL_Dataset(object): - """lm synthetic data by egl renderer.""" - - def __init__(self, data_cfg): - """ - Set with_depth and with_masks default to True, - and decide whether to load them into dataloader/network later - with_masks: - """ - self.name = data_cfg["name"] - self.data_cfg = data_cfg - - self.objs = data_cfg["objs"] # selected objects - - self.dataset_root = data_cfg.get("dataset_root", osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl")) - self.xyz_root = data_cfg.get("xyz_root", osp.join(self.dataset_root, "xyz_crop")) - assert osp.exists(self.dataset_root), self.dataset_root - - self.gt_path = osp.join(self.dataset_root, "gt.json") - assert osp.exists(self.gt_path) - - self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models - self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 - - self.with_masks = data_cfg["with_masks"] - self.with_depth = data_cfg["with_depth"] - self.depth_factor = data_cfg.get("depth_factor", 10000.0) - - self.height = data_cfg["height"] # 480 - self.width = data_cfg["width"] # 640 - - self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache - self.use_cache = data_cfg.get("use_cache", True) - self.num_to_load = data_cfg["num_to_load"] # -1 - self.filter_invalid = data_cfg.get("filter_invalid", True) - ################################################## - self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) - - # NOTE: careful! Only the selected objects - self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs] - # map selected objs to [0, num_objs-1] - self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map - self.label2cat = {label: cat for cat, label in self.cat2label.items()} - self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) - ########################################################## - - def __call__(self): # LM_SYN_EGL_Dataset - """Load light-weight instance annotations of all images into a list of - dicts in Detectron2 format. - - Do not load heavy data into memory in this file, since we will - load the annotations of all images into memory. - """ - # cache the dataset_dicts to avoid loading masks from files - hashed_file_name = hashlib.md5( - ( - "".join([str(fn) for fn in self.objs]) - + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( - self.name, - self.dataset_root, - self.with_masks, - self.with_depth, - self.num_to_load, - __name__, - ) - ).encode("utf-8") - ).hexdigest() - cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name)) - - if osp.exists(cache_path) and self.use_cache: - logger.info("load cached dataset dicts from {}".format(cache_path)) - return mmcv.load(cache_path) - - t_start = time.perf_counter() - - logger.info("loading dataset dicts: {}".format(self.name)) - self.num_instances_without_valid_segmentation = 0 - self.num_instances_without_valid_box = 0 - dataset_dicts = [] ####################################################### - gt_dict = mmcv.load(self.gt_path) - unique_im_id = 0 - for str_im_id, annos in tqdm(gt_dict.items()): - int_im_id = int(str_im_id) - - rgb_path = osp.join(self.dataset_root, "rgb/{:06d}.jpg".format(int_im_id)) - assert osp.exists(rgb_path), rgb_path - - depth_path = osp.join(self.dataset_root, "depth/{:06d}.png".format(int_im_id)) - record = { - "dataset_name": self.name, - "file_name": osp.relpath(rgb_path, PROJ_ROOT), - "depth_file": osp.relpath(depth_path, PROJ_ROOT), - "height": self.height, - "width": self.width, - "image_id": unique_im_id, - "scene_im_id": f"0/{int_im_id}", # for pose evaluation - "cam": self.cam, - "depth_factor": self.depth_factor, - "img_type": "syn_egl", # NOTE: has background - } - unique_im_id += 1 - insts = [] - for anno_i, anno in enumerate(annos): - obj_id = anno["obj_id"] - if obj_id not in self.cat_ids: - continue - cur_label = self.cat2label[obj_id] # 0-based label - pose = np.array(anno["pose"]) - R = pose[:3, :3] - t = pose[:3, 3] - quat = mat2quat(R).astype("float32") - proj = (record["cam"] @ t.T).T - proj = proj[:2] / proj[2] - - mask_vis_rle = anno["mask_visib"] - mask_full_rle = anno["mask_full"] - bbox = anno["bbox"] - - x1, y1, w, h = bbox - if self.filter_invalid: - if h <= 1 or w <= 1: - self.num_instances_without_valid_box += 1 - continue - mask_vis = cocosegm2mask(mask_vis_rle, self.height, self.width) - vis_area = mask_vis.sum() - if vis_area < 30: # filter out too small or nearly invisible instances - self.num_instances_without_valid_segmentation += 1 - continue - mask_full = cocosegm2mask(mask_full_rle, self.height, self.width) - full_area = mask_full.sum() - if full_area > 0: - visib_fract = vis_area / full_area - else: - visib_fract = 0 - - xyz_path = osp.join(self.xyz_root, f"{int_im_id:06d}_{anno_i:06d}-xyz.pkl") - assert osp.exists(xyz_path), xyz_path - inst = { - "category_id": cur_label, # 0-based label - "bbox": bbox, # TODO: load both bbox_obj and bbox_visib - "bbox_mode": BoxMode.XYWH_ABS, - "pose": pose, - "quat": quat, - "trans": t, - "centroid_2d": proj, # absolute (cx, cy) - "segmentation": mask_vis_rle, - "mask_full": mask_full_rle, - "visib_fract": visib_fract, - "xyz_path": xyz_path, - } - - model_info = self.models_info[str(obj_id)] - inst["model_info"] = model_info - # TODO: using full mask and full xyz - for key in ["bbox3d_and_center"]: - inst[key] = self.models[cur_label][key] - insts.append(inst) - if len(insts) == 0: # filter im without anno - continue - record["annotations"] = insts - dataset_dicts.append(record) - - if self.num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - self.num_instances_without_valid_segmentation - ) - ) - if self.num_instances_without_valid_box > 0: - logger.warning( - "Filtered out {} instances without valid box. " - "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) - ) - ########################################################################## - if self.num_to_load > 0: - self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) - dataset_dicts = dataset_dicts[: self.num_to_load] - logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) - - mmcv.mkdir_or_exist(osp.dirname(cache_path)) - mmcv.dump(dataset_dicts, cache_path, protocol=4) - logger.info("Dumped dataset_dicts to {}".format(cache_path)) - return dataset_dicts - - @lazy_property - def models_info(self): - models_info_path = osp.join(self.models_root, "models_info.json") - assert osp.exists(models_info_path), models_info_path - models_info = mmcv.load(models_info_path) # key is str(obj_id) - return models_info - - @lazy_property - def models(self): - """Load models into a list.""" - cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs))) - if osp.exists(cache_path) and self.use_cache: - # dprint("{}: load cached object models from {}".format(self.name, cache_path)) - return mmcv.load(cache_path) - - models = [] - for obj_name in self.objs: - model = inout.load_ply( - osp.join( - self.models_root, - f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply", - ), - vertex_scale=self.scale_to_meter, - ) - # NOTE: the bbox3d_and_center is not obtained from centered vertices - # for BOP models, not a big problem since they had been centered - model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) - - models.append(model) - logger.info("cache models to {}".format(cache_path)) - mmcv.dump(models, cache_path, protocol=4) - return models - - def image_aspect_ratio(self): - return self.width / self.height # 4/3 - - -########### register datasets ############################################################ - - -def get_lm_metadata(obj_names, ref_key): - """task specific metadata.""" - - data_ref = ref.__dict__[ref_key] - - cur_sym_infos = {} # label based key - loaded_models_info = data_ref.get_models_info() - - for i, obj_name in enumerate(obj_names): - obj_id = data_ref.obj2id[obj_name] - model_info = loaded_models_info[str(obj_id)] - if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: - sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) - sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) - else: - sym_info = None - cur_sym_infos[i] = sym_info - - meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} - return meta - - -LM_13_OBJECTS = [ - "ape", - "benchvise", - "camera", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", - "iron", - "lamp", - "phone", -] # no bowl, cup -LM_OCC_OBJECTS = [ - "ape", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", -] -lm_model_root = "BOP_DATASETS/lm/models/" -lmo_model_root = "BOP_DATASETS/lmo/models/" -################################################################################ - -SPLITS_LM_EGL = dict( - lm_egl_13_train=dict( - name="lm_egl_13_train", - objs=LM_13_OBJECTS, # selected objects - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl/xyz_crop"), - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=10000.0, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=True, - ref_key="lm_full", - ), - lmo_egl_train=dict( - name="lmo_egl_train", - objs=LM_OCC_OBJECTS, # selected objects - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - # NOTE: soft link to lm/train_egl - xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl/xyz_crop"), - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=10000.0, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=True, - ref_key="lmo_full", - ), -) - -# single obj splits -for obj in ref.lm_full.objects: - for split in ["train"]: - name = "lm_egl_{}_{}".format(obj, split) - if split in ["train"]: - filter_invalid = True - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM_EGL: - SPLITS_LM_EGL[name] = dict( - name=name, - objs=[obj], # only this obj - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl/xyz_crop"), - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=10000.0, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=filter_invalid, - ref_key="lm_full", - ) - -# lmo single objs -for obj in ref.lmo_full.objects: - for split in ["train"]: - name = "lmo_egl_{}_{}".format(obj, split) - if split in ["train"]: - filter_invalid = True - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM_EGL: - SPLITS_LM_EGL[name] = dict( - name=name, - objs=[obj], # only this obj - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - # NOTE: soft link to lm/train_egl - xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl/xyz_crop"), - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=10000.0, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=filter_invalid, - ref_key="lmo_full", - ) - - -def register_with_name_cfg(name, data_cfg=None): - """Assume pre-defined datasets live in `./datasets`. - - Args: - name: datasnet_name, - data_cfg: if name is in existing SPLITS, use pre-defined data_cfg - otherwise requires data_cfg - data_cfg can be set in cfg.DATA_CFG.name - """ - dprint("register dataset: {}".format(name)) - if name in SPLITS_LM_EGL: - used_cfg = SPLITS_LM_EGL[name] - else: - assert data_cfg is not None, f"dataset name {name} is not registered" - used_cfg = data_cfg - DatasetCatalog.register(name, LM_SYN_EGL_Dataset(used_cfg)) - # something like eval_types - MetadataCatalog.get(name).set( - ref_key=used_cfg["ref_key"], - objs=used_cfg["objs"], - eval_error_types=["ad", "rete", "proj"], - evaluator_type="bop", - **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), - ) - - -def get_available_datasets(): - return list(SPLITS_LM_EGL.keys()) - - -#### tests ############################################### -def test_vis(): - dset_name = sys.argv[1] - assert dset_name in DatasetCatalog.list() - - meta = MetadataCatalog.get(dset_name) - dprint("MetadataCatalog: ", meta) - objs = meta.objs - - t_start = time.perf_counter() - dicts = DatasetCatalog.get(dset_name) - logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) - - dirname = "output/{}-data-vis".format(dset_name) - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = read_image_mmcv(d["file_name"], format="BGR") - depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0 - - imH, imW = img.shape[:2] - annos = d["annotations"] - masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] - bboxes = [anno["bbox"] for anno in annos] - bbox_modes = [anno["bbox_mode"] for anno in annos] - bboxes_xyxy = np.array( - [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] - ) - kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] - quats = [anno["quat"] for anno in annos] - transes = [anno["trans"] for anno in annos] - Rs = [quat2mat(quat) for quat in quats] - # 0-based label - cat_ids = [anno["category_id"] for anno in annos] - K = d["cam"] - kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] - # # TODO: visualize pose and keypoints - labels = [objs[cat_id] for cat_id in cat_ids] - for _i in range(len(annos)): - img_vis = vis_image_mask_bbox_cv2( - img, - masks[_i : _i + 1], - bboxes=bboxes_xyxy[_i : _i + 1], - labels=labels[_i : _i + 1], - ) - img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) - xyz_path = annos[_i]["xyz_path"] - xyz_info = mmcv.load(xyz_path) - x1, y1, x2, y2 = xyz_info["xyxy"] - xyz_crop = xyz_info["xyz_crop"].astype(np.float32) - xyz = np.zeros((imH, imW, 3), dtype=np.float32) - xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop - xyz_show = get_emb_show(xyz) - xyz_crop_show = get_emb_show(xyz_crop) - img_xyz = img.copy() / 255.0 - mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") - fg_idx = np.where(mask_xyz != 0) - img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3] - img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :] - img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :] - # diff mask - diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1] - - grid_show( - [ - img[:, :, [2, 1, 0]], - img_vis[:, :, [2, 1, 0]], - img_vis_kpts2d[:, :, [2, 1, 0]], - depth, - # xyz_show, - diff_mask_xyz, - xyz_crop_show, - img_xyz[:, :, [2, 1, 0]], - img_xyz_crop[:, :, [2, 1, 0]], - img_vis_crop, - ], - [ - "img", - "vis_img", - "img_vis_kpts2d", - "depth", - "diff_mask_xyz", - "xyz_crop_show", - "img_xyz", - "img_xyz_crop", - "img_vis_crop", - ], - row=3, - col=3, - ) - - -if __name__ == "__main__": - """Test the dataset loader. - - Usage: - python -m this_module dataset_name - """ - from lib.vis_utils.image import grid_show - from lib.utils.setup_logger import setup_my_logger - - import detectron2.data.datasets # noqa # add pre-defined metadata - from lib.vis_utils.image import vis_image_mask_bbox_cv2 - from core.utils.utils import get_emb_show - from core.utils.data_utils import read_image_mmcv - - print("sys.argv:", sys.argv) - logger = setup_my_logger(name="core") - register_with_name_cfg(sys.argv[1]) - print("dataset catalog: ", DatasetCatalog.list()) - - test_vis() diff --git a/core/gdrn_modeling/datasets/lm_syn_imgn.py b/core/gdrn_modeling/datasets/lm_syn_imgn.py deleted file mode 100644 index 3b7bf9a..0000000 --- a/core/gdrn_modeling/datasets/lm_syn_imgn.py +++ /dev/null @@ -1,490 +0,0 @@ -import hashlib -import logging -import os -import os.path as osp -import sys - -cur_dir = osp.dirname(osp.abspath(__file__)) -PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../..")) -sys.path.insert(0, PROJ_ROOT) -import time -from collections import OrderedDict -import mmcv -import numpy as np -from tqdm import tqdm -from transforms3d.quaternions import mat2quat, quat2mat -import random -import ref -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode -from lib.pysixd import inout, misc -from lib.utils.mask_utils import ( - binary_mask_to_rle, - cocosegm2mask, - mask2bbox_xywh, -) -from lib.utils.utils import dprint, iprint, lazy_property - - -logger = logging.getLogger(__name__) -DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) - - -class LM_SYN_IMGN_Dataset(object): - """lm synthetic data, imgn(imagine) from DeepIM.""" - - def __init__(self, data_cfg): - """ - Set with_depth and with_masks default to True, - and decide whether to load them into dataloader/network later - with_masks: - """ - self.name = data_cfg["name"] - self.data_cfg = data_cfg - - self.objs = data_cfg["objs"] # selected objects - - self.ann_files = data_cfg["ann_files"] # idx files with image ids - self.image_prefixes = data_cfg["image_prefixes"] - self.xyz_prefixes = data_cfg["xyz_prefixes"] - - self.dataset_root = data_cfg["dataset_root"] # lm_imgn - self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models - self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 - - self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) - self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) - self.depth_factor = data_cfg["depth_factor"] # 1000.0 - - self.cam = data_cfg["cam"] # - self.height = data_cfg["height"] # 480 - self.width = data_cfg["width"] # 640 - - self.cache_dir = data_cfg["cache_dir"] # .cache - self.use_cache = data_cfg["use_cache"] # True - # sample uniformly to get n items - self.n_per_obj = data_cfg.get("n_per_obj", 1000) - self.filter_invalid = data_cfg["filter_invalid"] - self.filter_scene = data_cfg.get("filter_scene", False) - ################################################## - if self.cam is None: - self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) - - # NOTE: careful! Only the selected objects - self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs] - # map selected objs to [0, num_objs-1] - self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map - self.label2cat = {label: cat for cat, label in self.cat2label.items()} - self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) - ########################################################## - - def __call__(self): # LM_SYN_IMGN_Dataset - """Load light-weight instance annotations of all images into a list of - dicts in Detectron2 format. - - Do not load heavy data into memory in this file, since we will - load the annotations of all images into memory. - """ - # cache the dataset_dicts to avoid loading masks from files - hashed_file_name = hashlib.md5( - ( - "".join([str(fn) for fn in self.objs]) - + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( - self.name, - self.dataset_root, - self.with_masks, - self.with_depth, - self.n_per_obj, - __name__, - ) - ).encode("utf-8") - ).hexdigest() - cache_path = osp.join( - self.dataset_root, - "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), - ) - - if osp.exists(cache_path) and self.use_cache: - logger.info("load cached dataset dicts from {}".format(cache_path)) - return mmcv.load(cache_path) - - t_start = time.perf_counter() - - logger.info("loading dataset dicts: {}".format(self.name)) - self.num_instances_without_valid_segmentation = 0 - self.num_instances_without_valid_box = 0 - dataset_dicts = [] ####################################################### - assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}" - assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}" - for ann_file, scene_root, xyz_root in zip(self.ann_files, self.image_prefixes, self.xyz_prefixes): - # linemod each scene is an object - with open(ann_file, "r") as f_ann: - indices = [line.strip("\r\n").split()[-1] for line in f_ann.readlines()] # string ids - # sample uniformly (equal space) - if self.n_per_obj > 0: - sample_num = min(self.n_per_obj, len(indices)) - sel_indices_idx = np.linspace(0, len(indices) - 1, sample_num, dtype=np.int32) - sel_indices = [indices[int(_i)] for _i in sel_indices_idx] - else: - sel_indices = indices - - for im_id in tqdm(sel_indices): - rgb_path = osp.join(scene_root, "{}-color.png").format(im_id) - assert osp.exists(rgb_path), rgb_path - - depth_path = osp.join(scene_root, "{}-depth.png".format(im_id)) - - obj_name = im_id.split("/")[0] - if obj_name == "benchviseblue": - obj_name = "benchvise" - obj_id = ref.lm_full.obj2id[obj_name] - if self.filter_scene: - if obj_name not in self.objs: - continue - record = { - "dataset_name": self.name, - "file_name": osp.relpath(rgb_path, PROJ_ROOT), - "depth_file": osp.relpath(depth_path, PROJ_ROOT), - "height": self.height, - "width": self.width, - "image_id": im_id.split("/")[-1], - "scene_im_id": im_id, - "cam": self.cam, - "img_type": "syn", - } - - cur_label = self.obj2label[obj_name] # 0-based label - pose_path = osp.join(scene_root, "{}-pose.txt".format(im_id)) - pose = np.loadtxt(pose_path, skiprows=1) - R = pose[:3, :3] - t = pose[:3, 3] - quat = mat2quat(R).astype("float32") - proj = (record["cam"] @ t.T).T - proj = proj[:2] / proj[2] - - depth = mmcv.imread(depth_path, "unchanged") / 1000.0 - mask = (depth > 0).astype(np.uint8) - - bbox_obj = mask2bbox_xywh(mask) - x1, y1, w, h = bbox_obj - if self.filter_invalid: - if h <= 1 or w <= 1: - self.num_instances_without_valid_box += 1 - continue - area = mask.sum() - if area < 3: # filter out too small or nearly invisible instances - self.num_instances_without_valid_segmentation += 1 - continue - mask_rle = binary_mask_to_rle(mask, compressed=True) - - xyz_path = osp.join(xyz_root, f"{im_id}-xyz.pkl") - assert osp.exists(xyz_path), xyz_path - inst = { - "category_id": cur_label, # 0-based label - "bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib - "bbox_mode": BoxMode.XYWH_ABS, - "pose": pose, - "quat": quat, - "trans": t, - "centroid_2d": proj, # absolute (cx, cy) - "segmentation": mask_rle, - "mask_full": mask_rle, # only one object - "visib_fract": 1.0, - "xyz_path": xyz_path, - } - - model_info = self.models_info[str(obj_id)] - inst["model_info"] = model_info - # TODO: using full mask and full xyz - for key in ["bbox3d_and_center"]: - inst[key] = self.models[cur_label][key] - record["annotations"] = [inst] - dataset_dicts.append(record) - - if self.num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - self.num_instances_without_valid_segmentation - ) - ) - if self.num_instances_without_valid_box > 0: - logger.warning( - "Filtered out {} instances without valid box. " - "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) - ) - ########################################################################## - # if self.num_to_load > 0: - # self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) - # random.shuffle(dataset_dicts) - # dataset_dicts = dataset_dicts[: self.num_to_load] - logger.info( - "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start) - ) - - mmcv.dump(dataset_dicts, cache_path, protocol=4) - logger.info("Dumped dataset_dicts to {}".format(cache_path)) - return dataset_dicts - - @lazy_property - def models_info(self): - models_info_path = osp.join(self.models_root, "models_info.json") - assert osp.exists(models_info_path), models_info_path - models_info = mmcv.load(models_info_path) # key is str(obj_id) - return models_info - - @lazy_property - def models(self): - """Load models into a list.""" - cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs))) - if osp.exists(cache_path) and self.use_cache: - # logger.info("load cached object models from {}".format(cache_path)) - return mmcv.load(cache_path) - - models = [] - for obj_name in self.objs: - model = inout.load_ply( - osp.join( - self.models_root, - f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply", - ), - vertex_scale=self.scale_to_meter, - ) - # NOTE: the bbox3d_and_center is not obtained from centered vertices - # for BOP models, not a big problem since they had been centered - model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) - - models.append(model) - logger.info("cache models to {}".format(cache_path)) - mmcv.dump(models, cache_path, protocol=4) - return models - - def image_aspect_ratio(self): - # return 1 - return self.width / self.height # 4/3 - - -########### register datasets ############################################################ - - -def get_lm_metadata(obj_names, ref_key): - """task specific metadata.""" - - data_ref = ref.__dict__[ref_key] - - cur_sym_infos = {} # label based key - loaded_models_info = data_ref.get_models_info() - - for i, obj_name in enumerate(obj_names): - obj_id = data_ref.obj2id[obj_name] - model_info = loaded_models_info[str(obj_id)] - if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: - sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) - sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) - else: - sym_info = None - cur_sym_infos[i] = sym_info - - meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} - return meta - - -LM_13_OBJECTS = [ - "ape", - "benchvise", - "camera", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", - "iron", - "lamp", - "phone", -] # no bowl, cup -################################################################################ - -SPLITS_LM_IMGN_13 = dict( - lm_imgn_13_train_1k_per_obj=dict( - name="lm_imgn_13_train_1k_per_obj", - dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_13_OBJECTS, # selected objects - ann_files=[ - osp.join( - DATASETS_ROOT, - "lm_imgn/image_set/{}_{}.txt".format("train", _obj), - ) - for _obj in LM_13_OBJECTS - ], - image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn") for _obj in LM_13_OBJECTS], - xyz_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/xyz_crop_imgn/") for _obj in LM_13_OBJECTS], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=1000, # 1000 per class - filter_scene=True, - filter_invalid=False, - ref_key="lm_full", - ) -) - -# single obj splits -for obj in ref.lm_full.objects: - for split in ["train"]: - name = "lm_imgn_13_{}_{}_1k".format(obj, split) - ann_files = [osp.join(DATASETS_ROOT, "lm_imgn/image_set/{}_{}.txt".format(split, obj))] - if split in ["train"]: - filter_invalid = True - elif split in ["test"]: - filter_invalid = False - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM_IMGN_13: - SPLITS_LM_IMGN_13[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=[obj], # only this obj - ann_files=ann_files, - image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn/")], - xyz_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/xyz_crop_imgn/")], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=1000, - filter_invalid=False, - filter_scene=True, - ref_key="lm_full", - ) - - -def register_with_name_cfg(name, data_cfg=None): - """Assume pre-defined datasets live in `./datasets`. - - Args: - name: datasnet_name, - data_cfg: if name is in existing SPLITS, use pre-defined data_cfg - otherwise requires data_cfg - data_cfg can be set in cfg.DATA_CFG.name - """ - dprint("register dataset: {}".format(name)) - if name in SPLITS_LM_IMGN_13: - used_cfg = SPLITS_LM_IMGN_13[name] - else: - assert data_cfg is not None, f"dataset name {name} is not registered" - used_cfg = data_cfg - DatasetCatalog.register(name, LM_SYN_IMGN_Dataset(used_cfg)) - # something like eval_types - MetadataCatalog.get(name).set( - ref_key=used_cfg["ref_key"], - objs=used_cfg["objs"], - eval_error_types=["ad", "rete", "proj"], - evaluator_type="bop", - **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), - ) - - -def get_available_datasets(): - return list(SPLITS_LM_IMGN_13.keys()) - - -#### tests ############################################### -def test_vis(): - dset_name = sys.argv[1] - assert dset_name in DatasetCatalog.list() - - meta = MetadataCatalog.get(dset_name) - dprint("MetadataCatalog: ", meta) - objs = meta.objs - - t_start = time.perf_counter() - dicts = DatasetCatalog.get(dset_name) - logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) - - dirname = "output/{}-data-vis".format(dset_name) - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = read_image_mmcv(d["file_name"], format="BGR") - depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 - - anno = d["annotations"][0] # only one instance per image - imH, imW = img.shape[:2] - mask = cocosegm2mask(anno["segmentation"], imH, imW) - bbox = anno["bbox"] - bbox_mode = anno["bbox_mode"] - bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS)) - kpt3d = anno["bbox3d_and_center"] - quat = anno["quat"] - trans = anno["trans"] - R = quat2mat(quat) - # 0-based label - cat_id = anno["category_id"] - K = d["cam"] - kpt_2d = misc.project_pts(kpt3d, K, R, trans) - # # TODO: visualize pose and keypoints - label = objs[cat_id] - # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) - img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label]) - img_vis_kpt2d = img.copy() - img_vis_kpt2d = misc.draw_projected_box3d( - img_vis_kpt2d, - kpt_2d, - middle_color=None, - bottom_color=(128, 128, 128), - ) - - xyz_info = mmcv.load(anno["xyz_path"]) - xyz = np.zeros((imH, imW, 3), dtype=np.float32) - xyz_crop = xyz_info["xyz_crop"].astype(np.float32) - x1, y1, x2, y2 = xyz_info["xyxy"] - xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop - xyz_show = get_emb_show(xyz) - - grid_show( - [ - img[:, :, [2, 1, 0]], - img_vis[:, :, [2, 1, 0]], - img_vis_kpt2d[:, :, [2, 1, 0]], - depth, - xyz_show, - ], - ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"], - row=2, - col=3, - ) - - -if __name__ == "__main__": - """Test the dataset loader. - - Usage: - python -m core.datasets.lm_syn_imgn dataset_name - """ - from lib.vis_utils.image import grid_show - from lib.utils.setup_logger import setup_my_logger - - import detectron2.data.datasets # noqa # add pre-defined metadata - from lib.vis_utils.image import vis_image_mask_bbox_cv2 - from core.utils.utils import get_emb_show - from core.utils.data_utils import read_image_mmcv - - print("sys.argv:", sys.argv) - logger = setup_my_logger(name="core") - register_with_name_cfg(sys.argv[1]) - print("dataset catalog: ", DatasetCatalog.list()) - test_vis() diff --git a/det/yolox/data/datasets/dataset_factory.py b/det/yolox/data/datasets/dataset_factory.py index b59dc64..a1f952b 100644 --- a/det/yolox/data/datasets/dataset_factory.py +++ b/det/yolox/data/datasets/dataset_factory.py @@ -3,12 +3,7 @@ import os.path as osp import mmcv from detectron2.data import DatasetCatalog from . import ( - lm_syn_imgn, - lm_dataset_d2, - # lm_syn_egl, lm_pbr, - lm_blender, - # lm_dataset_crop_d2, ycbv_pbr, ycbv_d2, ycbv_bop_test, @@ -16,8 +11,6 @@ from . import ( hb_bop_val, hb_bop_test, hb_bench_driller_phone_d2, - # duck_frames, - # lm_new_duck_pbr, tudl_train_real, tudl_pbr, tudl_bop_test, diff --git a/det/yolox/data/datasets/lm_blender.py b/det/yolox/data/datasets/lm_blender.py deleted file mode 100644 index ca3ce35..0000000 --- a/det/yolox/data/datasets/lm_blender.py +++ /dev/null @@ -1,512 +0,0 @@ -import hashlib -import logging -import os -import os.path as osp -import sys - -import time -from collections import OrderedDict -import mmcv -import numpy as np -from tqdm import tqdm -from transforms3d.quaternions import mat2quat, quat2mat -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode - -cur_dir = osp.dirname(osp.abspath(__file__)) -PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../..")) -sys.path.insert(0, PROJ_ROOT) -import ref -from lib.pysixd import inout, misc -from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask -from lib.utils.utils import dprint, lazy_property - -logger = logging.getLogger(__name__) -DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) - - -class LM_BLENDER_Dataset(object): - """lm blender data, from pvnet-rendering.""" - - def __init__(self, data_cfg): - """ - Set with_depth and with_masks default to True, - and decide whether to load them into dataloader/network later - with_masks: - """ - self.name = data_cfg["name"] - self.data_cfg = data_cfg - - self.objs = data_cfg["objs"] # selected objects - - self.ann_files = data_cfg["ann_files"] # json files with image ids and pose/bbox - self.image_prefixes = data_cfg["image_prefixes"] - - self.dataset_root = data_cfg["dataset_root"] # lm_renders_blender/ - self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models - self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 - - self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) - self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) - self.depth_factor = data_cfg["depth_factor"] # 1000.0 - - self.cam = data_cfg["cam"] # - self.height = data_cfg["height"] # 480 - self.width = data_cfg["width"] # 640 - - self.cache_dir = data_cfg["cache_dir"] # .cache - self.use_cache = data_cfg["use_cache"] # True - # sample uniformly to get n items - self.n_per_obj = data_cfg.get("n_per_obj", 10000) - self.filter_invalid = data_cfg["filter_invalid"] - ################################################## - if self.cam is None: - self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) - - # NOTE: careful! Only the selected objects - self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs] - # map selected objs to [0, num_objs-1] - self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map - self.label2cat = {label: cat for cat, label in self.cat2label.items()} - self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) - ########################################################## - - def __call__(self): # LM_BLENDER - """Load light-weight instance annotations of all images into a list of - dicts in Detectron2 format. - - Do not load heavy data into memory in this file, since we will - load the annotations of all images into memory. - """ - # cache the dataset_dicts to avoid loading masks from files - hashed_file_name = hashlib.md5( - ( - "".join([str(fn) for fn in self.objs]) - + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( - self.name, - self.dataset_root, - self.with_masks, - self.with_depth, - self.n_per_obj, - __name__, - ) - ).encode("utf-8") - ).hexdigest() - cache_path = osp.join( - self.cache_dir, - "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), - ) - - if osp.exists(cache_path) and self.use_cache: - logger.info("load cached dataset dicts from {}".format(cache_path)) - return mmcv.load(cache_path) - - t_start = time.perf_counter() - - logger.info("loading dataset dicts: {}".format(self.name)) - self.num_instances_without_valid_segmentation = 0 - self.num_instances_without_valid_box = 0 - dataset_dicts = [] ####################################################### - assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}" - - for ann_file, scene_root in zip(tqdm(self.ann_files), self.image_prefixes): - # each scene is an object - assert osp.exists(ann_file), ann_file - scene_gt_dict = mmcv.load(ann_file) - # sample uniformly (equal space) - indices = list(scene_gt_dict.keys()) - if self.n_per_obj > 0: - sample_num = min(self.n_per_obj, len(scene_gt_dict)) - sel_indices_idx = np.linspace(0, len(scene_gt_dict) - 1, sample_num, dtype=np.int32) - sel_indices = [indices[int(_i)] for _i in sel_indices_idx] - else: - sel_indices = indices - - for str_im_id in tqdm(sel_indices): - int_im_id = int(str_im_id) - rgb_path = osp.join(scene_root, "{}.jpg").format(str_im_id) - assert osp.exists(rgb_path), rgb_path - - depth_path = osp.join(scene_root, "{}_depth_opengl.png".format(str_im_id)) - - obj_name = osp.basename(ann_file).split("_")[0] # obj_gt.json - obj_id = ref.lm_full.obj2id[obj_name] - if obj_name not in self.objs: - continue - - record = { - "dataset_name": self.name, - "file_name": osp.relpath(rgb_path, PROJ_ROOT), - "depth_file": osp.relpath(depth_path, PROJ_ROOT), - "height": self.height, - "width": self.width, - "image_id": int_im_id, - "scene_im_id": f"{obj_id}/{int_im_id}", - "cam": self.cam, - "img_type": "syn_blender", # has bg - } - - cur_label = self.obj2label[obj_name] # 0-based label - anno = scene_gt_dict[str_im_id][0] # only one object - R = np.array(anno["cam_R_m2c"]).reshape(3, 3) - t = np.array(anno["cam_t_m2c"]).reshape(-1) / 1000 - pose = np.hstack([R, t.reshape(3, 1)]) - quat = mat2quat(R).astype("float32") - proj = (record["cam"] @ t.T).T - proj = proj[:2] / proj[2] - - bbox_visib = anno["bbox_visib"] - x1, y1, w, h = bbox_visib - if self.filter_invalid: - if h <= 1 or w <= 1: - self.num_instances_without_valid_box += 1 - continue - - mask_path = osp.join(scene_root, "{}_mask_opengl.png".format(str_im_id)) - mask = mmcv.imread(mask_path, "unchanged") - mask = (mask > 0).astype(np.uint8) - - area = mask.sum() - if area < 3: # filter out too small or nearly invisible instances - self.num_instances_without_valid_segmentation += 1 - continue - mask_rle = binary_mask_to_rle(mask, compressed=True) - - visib_fract = anno.get("visib_fract", 1.0) - inst = { - "category_id": cur_label, # 0-based label - "bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib - "bbox_mode": BoxMode.XYWH_ABS, - "pose": pose, - "quat": quat, - "trans": t, - "centroid_2d": proj, # absolute (cx, cy) - "segmentation": mask_rle, - "visib_fract": visib_fract, - } - - model_info = self.models_info[str(obj_id)] - inst["model_info"] = model_info - for key in ["bbox3d_and_center"]: - inst[key] = self.models[cur_label][key] - record["annotations"] = [inst] - dataset_dicts.append(record) - - if self.num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - self.num_instances_without_valid_segmentation - ) - ) - if self.num_instances_without_valid_box > 0: - logger.warning( - "Filtered out {} instances without valid box. " - "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) - ) - ########################################################################## - # if self.num_to_load > 0: - # self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) - # random.shuffle(dataset_dicts) - # dataset_dicts = dataset_dicts[: self.num_to_load] - logger.info( - "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start) - ) - - mmcv.dump(dataset_dicts, cache_path, protocol=4) - logger.info("Dumped dataset_dicts to {}".format(cache_path)) - return dataset_dicts - - @lazy_property - def models_info(self): - models_info_path = osp.join(self.models_root, "models_info.json") - assert osp.exists(models_info_path), models_info_path - models_info = mmcv.load(models_info_path) # key is str(obj_id) - return models_info - - @lazy_property - def models(self): - """Load models into a list.""" - cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs))) - if osp.exists(cache_path) and self.use_cache: - # dprint("{}: load cached object models from {}".format(self.name, cache_path)) - return mmcv.load(cache_path) - - models = [] - for obj_name in self.objs: - model = inout.load_ply( - osp.join( - self.models_root, - f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply", - ), - vertex_scale=self.scale_to_meter, - ) - # NOTE: the bbox3d_and_center is not obtained from centered vertices - # for BOP models, not a big problem since they had been centered - model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) - - models.append(model) - logger.info("cache models to {}".format(cache_path)) - mmcv.dump(models, cache_path, protocol=4) - return models - - def image_aspect_ratio(self): - return self.width / self.height # 4/3 - - -########### register datasets ############################################################ - - -def get_lm_metadata(obj_names, ref_key): - """task specific metadata.""" - - data_ref = ref.__dict__[ref_key] - - cur_sym_infos = {} # label based key - loaded_models_info = data_ref.get_models_info() - - for i, obj_name in enumerate(obj_names): - obj_id = data_ref.obj2id[obj_name] - model_info = loaded_models_info[str(obj_id)] - if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: - sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) - sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) - else: - sym_info = None - cur_sym_infos[i] = sym_info - - meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} - return meta - - -LM_13_OBJECTS = [ - "ape", - "benchvise", - "camera", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", - "iron", - "lamp", - "phone", -] # no bowl, cup -LM_OCC_OBJECTS = [ - "ape", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", -] -################################################################################ - -SPLITS_LM_BLENDER = dict( - lm_blender_13_train=dict( - name="lm_blender_13_train", # BB8 training set - dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_13_OBJECTS, # selected objects - ann_files=[ - osp.join( - DATASETS_ROOT, - "lm_renders_blender/renders/{}_gt.json".format(_obj), - ) - for _obj in LM_13_OBJECTS - ], - image_prefixes=[ - osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_13_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=-1, # num per class, -1 for all 10k - filter_invalid=False, - ref_key="lm_full", - ), - lmo_blender_train=dict( - name="lmo_blender_train", - dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - objs=LM_OCC_OBJECTS, # selected objects - ann_files=[ - osp.join( - DATASETS_ROOT, - "lm_renders_blender/renders/{}_gt.json".format(_obj), - ) - for _obj in LM_OCC_OBJECTS - ], - image_prefixes=[ - osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_OCC_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam=ref.lmo_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=-1, # n per class, -1 for all 10k - filter_invalid=False, - ref_key="lmo_full", - ), -) - -# single obj splits -for obj in ref.lm_full.objects: - for split in ["train"]: - for name_prefix in ["lm", "lmo"]: - name = "{}_blender_{}_{}".format(name_prefix, obj, split) - ref_key = f"{name_prefix}_full" - ann_files = [ - osp.join( - DATASETS_ROOT, - "lm_renders_blender/renders/{}_gt.json".format(obj), - ) - ] - if split in ["train"]: - filter_invalid = True - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM_BLENDER: - SPLITS_LM_BLENDER[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"), - models_root=osp.join(DATASETS_ROOT, f"BOP_DATASETS/{name_prefix}/models"), - objs=[obj], # only this obj - ann_files=ann_files, - image_prefixes=[osp.join(DATASETS_ROOT, f"lm_renders_blender/renders/{obj}")], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam=ref.__dict__[ref_key].camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=-1, - filter_invalid=False, - ref_key=ref_key, - ) - - -def register_with_name_cfg(name, data_cfg=None): - """Assume pre-defined datasets live in `./datasets`. - - Args: - name: datasnet_name, - data_cfg: if name is in existing SPLITS, use pre-defined data_cfg - otherwise requires data_cfg - data_cfg can be set in cfg.DATA_CFG.name - """ - dprint("register dataset: {}".format(name)) - if name in SPLITS_LM_BLENDER: - used_cfg = SPLITS_LM_BLENDER[name] - else: - assert data_cfg is not None, f"dataset name {name} is not registered" - used_cfg = data_cfg - DatasetCatalog.register(name, LM_BLENDER_Dataset(used_cfg)) - # something like eval_types - MetadataCatalog.get(name).set( - ref_key=used_cfg["ref_key"], - objs=used_cfg["objs"], - eval_error_types=["ad", "rete", "proj"], - evaluator_type="coco_bop", - **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), - ) - - -def get_available_datasets(): - return list(SPLITS_LM_BLENDER.keys()) - - -#### tests ############################################### -def test_vis(): - dset_name = sys.argv[1] - assert dset_name in DatasetCatalog.list() - - meta = MetadataCatalog.get(dset_name) - dprint("MetadataCatalog: ", meta) - objs = meta.objs - - t_start = time.perf_counter() - dicts = DatasetCatalog.get(dset_name) - logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) - - dirname = "output/{}-data-vis".format(dset_name) - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = read_image_mmcv(d["file_name"], format="BGR") - depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 - - anno = d["annotations"][0] # only one instance per image - imH, imW = img.shape[:2] - mask = cocosegm2mask(anno["segmentation"], imH, imW) - bbox = anno["bbox"] - bbox_mode = anno["bbox_mode"] - bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS)) - kpt3d = anno["bbox3d_and_center"] - quat = anno["quat"] - trans = anno["trans"] - R = quat2mat(quat) - # 0-based label - cat_id = anno["category_id"] - K = d["cam"] - kpt_2d = misc.project_pts(kpt3d, K, R, trans) - # # TODO: visualize pose and keypoints - label = objs[cat_id] - # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) - img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label]) - img_vis_kpt2d = img.copy() - img_vis_kpt2d = misc.draw_projected_box3d( - img_vis_kpt2d, - kpt_2d, - middle_color=None, - bottom_color=(128, 128, 128), - ) - - grid_show( - [ - img[:, :, [2, 1, 0]], - img_vis[:, :, [2, 1, 0]], - img_vis_kpt2d[:, :, [2, 1, 0]], - depth, - ], - ["img", "vis_img", "img_vis_kpts2d", "depth"], - row=2, - col=2, - ) - - -if __name__ == "__main__": - """Test the dataset loader. - - Usage: - python -m core.datasets.lm_blender dataset_name - """ - from lib.vis_utils.image import grid_show - from lib.utils.setup_logger import setup_my_logger - - import detectron2.data.datasets # noqa # add pre-defined metadata - from lib.vis_utils.image import vis_image_mask_bbox_cv2 - from core.utils.data_utils import read_image_mmcv - - print("sys.argv:", sys.argv) - logger = setup_my_logger(name="core") - register_with_name_cfg(sys.argv[1]) - print("dataset catalog: ", DatasetCatalog.list()) - test_vis() diff --git a/det/yolox/data/datasets/lm_dataset_d2.py b/det/yolox/data/datasets/lm_dataset_d2.py deleted file mode 100644 index dae9314..0000000 --- a/det/yolox/data/datasets/lm_dataset_d2.py +++ /dev/null @@ -1,886 +0,0 @@ -import hashlib -import logging -import os -import os.path as osp -import sys -import time -from collections import OrderedDict -import mmcv -import numpy as np -from tqdm import tqdm -from transforms3d.quaternions import mat2quat, quat2mat -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode - -cur_dir = osp.dirname(osp.abspath(__file__)) -PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../..")) -sys.path.insert(0, PROJ_ROOT) - -import ref - -from lib.pysixd import inout, misc -from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask -from lib.utils.utils import dprint, iprint, lazy_property - - -logger = logging.getLogger(__name__) -DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) - - -class LM_Dataset(object): - """lm splits.""" - - def __init__(self, data_cfg): - """ - Set with_depth and with_masks default to True, - and decide whether to load them into dataloader/network later - with_masks: - """ - self.name = data_cfg["name"] - self.data_cfg = data_cfg - - self.objs = data_cfg["objs"] # selected objects - - self.ann_files = data_cfg["ann_files"] # idx files with image ids - self.image_prefixes = data_cfg["image_prefixes"] - self.xyz_prefixes = data_cfg["xyz_prefixes"] - - self.dataset_root = data_cfg["dataset_root"] # BOP_DATASETS/lm/ - assert osp.exists(self.dataset_root), self.dataset_root - self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models - self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 - - self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) - self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) - - self.height = data_cfg["height"] # 480 - self.width = data_cfg["width"] # 640 - - self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache")) # .cache - self.use_cache = data_cfg.get("use_cache", True) - self.num_to_load = data_cfg["num_to_load"] # -1 - self.filter_invalid = data_cfg["filter_invalid"] - self.filter_scene = data_cfg.get("filter_scene", False) - self.debug_im_id = data_cfg.get("debug_im_id", None) - ################################################## - - # NOTE: careful! Only the selected objects - self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs] - # map selected objs to [0, num_objs-1] - self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map - self.label2cat = {label: cat for cat, label in self.cat2label.items()} - self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) - ########################################################## - - def __call__(self): # LM_Dataset - """Load light-weight instance annotations of all images into a list of - dicts in Detectron2 format. - - Do not load heavy data into memory in this file, since we will - load the annotations of all images into memory. - """ - # cache the dataset_dicts to avoid loading masks from files - hashed_file_name = hashlib.md5( - ( - "".join([str(fn) for fn in self.objs]) - + "dataset_dicts_{}_{}_{}_{}_{}".format( - self.name, - self.dataset_root, - self.with_masks, - self.with_depth, - __name__, - ) - ).encode("utf-8") - ).hexdigest() - cache_path = osp.join( - self.cache_dir, - "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), - ) - - if osp.exists(cache_path) and self.use_cache: - logger.info("load cached dataset dicts from {}".format(cache_path)) - return mmcv.load(cache_path) - - t_start = time.perf_counter() - - logger.info("loading dataset dicts: {}".format(self.name)) - self.num_instances_without_valid_segmentation = 0 - self.num_instances_without_valid_box = 0 - dataset_dicts = [] # ###################################################### - assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}" - assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}" - unique_im_id = 0 - for ann_file, scene_root, xyz_root in zip(tqdm(self.ann_files), self.image_prefixes, self.xyz_prefixes): - # linemod each scene is an object - with open(ann_file, "r") as f_ann: - indices = [line.strip("\r\n") for line in f_ann.readlines()] # string ids - gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json")) - gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json")) # bbox_obj, bbox_visib - cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json")) - for im_id in tqdm(indices): - int_im_id = int(im_id) - str_im_id = str(int_im_id) - rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id) - assert osp.exists(rgb_path), rgb_path - - depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) - - scene_id = int(rgb_path.split("/")[-3]) - scene_im_id = f"{scene_id}/{int_im_id}" - - if self.debug_im_id is not None: - if self.debug_im_id != scene_im_id: - continue - - K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) - depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"] - if self.filter_scene: - if scene_id not in self.cat_ids: - continue - record = { - "dataset_name": self.name, - "file_name": osp.relpath(rgb_path, PROJ_ROOT), - "depth_file": osp.relpath(depth_path, PROJ_ROOT), - "height": self.height, - "width": self.width, - "image_id": unique_im_id, - "scene_im_id": scene_im_id, # for evaluation - "cam": K, - "depth_factor": depth_factor, - "img_type": "real", - } - unique_im_id += 1 - insts = [] - for anno_i, anno in enumerate(gt_dict[str_im_id]): - obj_id = anno["obj_id"] - if obj_id not in self.cat_ids: - continue - cur_label = self.cat2label[obj_id] # 0-based label - R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) - t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 - pose = np.hstack([R, t.reshape(3, 1)]) - quat = mat2quat(R).astype("float32") - - proj = (record["cam"] @ t.T).T - proj = proj[:2] / proj[2] - - bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"] - bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"] - x1, y1, w, h = bbox_visib - if self.filter_invalid: - if h <= 1 or w <= 1: - self.num_instances_without_valid_box += 1 - continue - - mask_file = osp.join( - scene_root, - "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i), - ) - mask_visib_file = osp.join( - scene_root, - "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i), - ) - assert osp.exists(mask_file), mask_file - assert osp.exists(mask_visib_file), mask_visib_file - # load mask visib - mask_single = mmcv.imread(mask_visib_file, "unchanged") - mask_single = mask_single.astype("bool") - area = mask_single.sum() - if area < 3: # filter out too small or nearly invisible instances - self.num_instances_without_valid_segmentation += 1 - continue - mask_rle = binary_mask_to_rle(mask_single, compressed=True) - # load mask full - mask_full = mmcv.imread(mask_file, "unchanged") - mask_full = mask_full.astype("bool") - mask_full_rle = binary_mask_to_rle(mask_full, compressed=True) - - inst = { - "category_id": cur_label, # 0-based label - "bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib - "bbox_mode": BoxMode.XYWH_ABS, - "pose": pose, - "quat": quat, - "trans": t, - "centroid_2d": proj, # absolute (cx, cy) - "segmentation": mask_rle, - "mask_full": mask_full_rle, - } - - if "test" not in self.name.lower(): - # if True: - xyz_path = osp.join(xyz_root, f"{int_im_id:06d}_{anno_i:06d}.pkl") - assert osp.exists(xyz_path), xyz_path - inst["xyz_path"] = xyz_path - - model_info = self.models_info[str(obj_id)] - inst["model_info"] = model_info - # TODO: using full mask and full xyz - for key in ["bbox3d_and_center"]: - inst[key] = self.models[cur_label][key] - insts.append(inst) - if len(insts) == 0: # filter im without anno - continue - record["annotations"] = insts - dataset_dicts.append(record) - - if self.num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - self.num_instances_without_valid_segmentation - ) - ) - if self.num_instances_without_valid_box > 0: - logger.warning( - "Filtered out {} instances without valid box. " - "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) - ) - ########################################################################## - if self.num_to_load > 0: - self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) - dataset_dicts = dataset_dicts[: self.num_to_load] - logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)) - - mmcv.mkdir_or_exist(osp.dirname(cache_path)) - mmcv.dump(dataset_dicts, cache_path, protocol=4) - logger.info("Dumped dataset_dicts to {}".format(cache_path)) - return dataset_dicts - - @lazy_property - def models_info(self): - models_info_path = osp.join(self.models_root, "models_info.json") - assert osp.exists(models_info_path), models_info_path - models_info = mmcv.load(models_info_path) # key is str(obj_id) - return models_info - - @lazy_property - def models(self): - """Load models into a list.""" - cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs))) - if osp.exists(cache_path) and self.use_cache: - # dprint("{}: load cached object models from {}".format(self.name, cache_path)) - return mmcv.load(cache_path) - - models = [] - for obj_name in self.objs: - model = inout.load_ply( - osp.join( - self.models_root, - f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply", - ), - vertex_scale=self.scale_to_meter, - ) - # NOTE: the bbox3d_and_center is not obtained from centered vertices - # for BOP models, not a big problem since they had been centered - model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) - - models.append(model) - logger.info("cache models to {}".format(cache_path)) - mmcv.mkdir_or_exist(osp.dirname(cache_path)) - mmcv.dump(models, cache_path, protocol=4) - return models - - def image_aspect_ratio(self): - return self.width / self.height # 4/3 - - -########### register datasets ############################################################ - - -def get_lm_metadata(obj_names, ref_key): - """task specific metadata.""" - - data_ref = ref.__dict__[ref_key] - - cur_sym_infos = {} # label based key - loaded_models_info = data_ref.get_models_info() - - for i, obj_name in enumerate(obj_names): - obj_id = data_ref.obj2id[obj_name] - model_info = loaded_models_info[str(obj_id)] - if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: - sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) - sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) - else: - sym_info = None - cur_sym_infos[i] = sym_info - - meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} - return meta - - -LM_13_OBJECTS = [ - "ape", - "benchvise", - "camera", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", - "iron", - "lamp", - "phone", -] # no bowl, cup -LM_OCC_OBJECTS = [ - "ape", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", -] -################################################################################ - -SPLITS_LM = dict( - lm_13_train=dict( - name="lm_13_train", - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_13_OBJECTS, # selected objects - ann_files=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "train"), - ) - for _obj in LM_13_OBJECTS - ], - image_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj]), - ) - for _obj in LM_13_OBJECTS - ], - xyz_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]), - ) - for _obj in LM_13_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=True, - filter_invalid=True, - ref_key="lm_full", - ), - lm_13_test=dict( - name="lm_13_test", - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_13_OBJECTS, - ann_files=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "test"), - ) - for _obj in LM_13_OBJECTS - ], - # NOTE: scene root - image_prefixes=[ - osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj]) - for _obj in LM_13_OBJECTS - ], - xyz_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]), - ) - for _obj in LM_13_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=True, - filter_invalid=False, - ref_key="lm_full", - ), - lmo_train=dict( - name="lmo_train", - # use lm real all (8 objects) to train for lmo - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_OCC_OBJECTS, # selected objects - ann_files=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "all"), - ) - for _obj in LM_OCC_OBJECTS - ], - image_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/test/{:06d}".format(ref.lmo_full.obj2id[_obj]), - ) - for _obj in LM_OCC_OBJECTS - ], - xyz_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lmo_full.obj2id[_obj]), - ) - for _obj in LM_OCC_OBJECTS - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=True, - filter_invalid=True, - ref_key="lmo_full", - ), - lmo_NoBopTest_train=dict( - name="lmo_NoBopTest_train", - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - objs=LM_OCC_OBJECTS, - ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt")], - image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)], - xyz_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2), - ) - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=False, - filter_invalid=True, - ref_key="lmo_full", - ), - lmo_test=dict( - name="lmo_test", - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - objs=LM_OCC_OBJECTS, - ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_test.txt")], - # NOTE: scene root - image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)], - xyz_prefixes=[None], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=False, - filter_invalid=False, - ref_key="lmo_full", - ), - lmo_bop_test=dict( - name="lmo_bop_test", - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - objs=LM_OCC_OBJECTS, - ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt")], - # NOTE: scene root - image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)], - xyz_prefixes=[None], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=False, - filter_invalid=False, - ref_key="lmo_full", - ), -) - -# single obj splits for lm real -for obj in ref.lm_full.objects: - for split in ["train", "test", "all"]: - name = "lm_real_{}_{}".format(obj, split) - ann_files = [ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/image_set/{}_{}.txt".format(obj, split), - ) - ] - if split in ["train", "all"]: # all is used to train lmo - filter_invalid = True - elif split in ["test"]: - filter_invalid = False - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM: - SPLITS_LM[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=[obj], # only this obj - ann_files=ann_files, - image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])], - xyz_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]), - ) - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=filter_invalid, - filter_scene=True, - ref_key="lm_full", - ) - -# single obj splits for lmo_NoBopTest_train -for obj in ref.lmo_full.objects: - for split in ["train"]: - name = "lmo_NoBopTest_{}_{}".format(obj, split) - if split in ["train"]: - filter_invalid = True - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM: - SPLITS_LM[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - objs=[obj], - ann_files=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt", - ) - ], - # NOTE: scene root - image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)], - xyz_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2), - ) - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=False, - filter_invalid=filter_invalid, - ref_key="lmo_full", - ) - -# single obj splits for lmo_test -for obj in ref.lmo_full.objects: - for split in ["test"]: - name = "lmo_{}_{}".format(obj, split) - if split in ["train", "all"]: # all is used to train lmo - filter_invalid = True - elif split in ["test"]: - filter_invalid = False - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM: - SPLITS_LM[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - objs=[obj], - ann_files=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lmo/image_set/lmo_test.txt", - ) - ], - # NOTE: scene root - image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)], - xyz_prefixes=[None], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=False, - filter_invalid=False, - ref_key="lmo_full", - ) - -# single obj splits for lmo_bop_test -for obj in ref.lmo_full.objects: - for split in ["test"]: - name = "lmo_{}_bop_{}".format(obj, split) - if split in ["train", "all"]: # all is used to train lmo - filter_invalid = True - elif split in ["test"]: - filter_invalid = False - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM: - SPLITS_LM[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"), - objs=[obj], - ann_files=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt", - ) - ], - # NOTE: scene root - image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)], - xyz_prefixes=[None], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_scene=False, - filter_invalid=False, - ref_key="lmo_full", - ) - -# ================ add single image dataset for debug ======================================= -debug_im_ids = { - "train": {obj: [] for obj in ref.lm_full.objects}, - "test": {obj: [] for obj in ref.lm_full.objects}, -} -for obj in ref.lm_full.objects: - for split in ["train", "test"]: - cur_ann_file = osp.join(DATASETS_ROOT, f"BOP_DATASETS/lm/image_set/{obj}_{split}.txt") - ann_files = [cur_ann_file] - - im_ids = [] - with open(cur_ann_file, "r") as f: - for line in f: - # scene_id(obj_id)/im_id - im_ids.append("{}/{}".format(ref.lm_full.obj2id[obj], int(line.strip("\r\n")))) - - debug_im_ids[split][obj] = im_ids - for debug_im_id in debug_im_ids[split][obj]: - name = "lm_single_{}{}_{}".format(obj, debug_im_id.split("/")[1], split) - if name not in SPLITS_LM: - SPLITS_LM[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=[obj], # only this obj - ann_files=ann_files, - image_prefixes=[ - osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj]) - ], - xyz_prefixes=[ - osp.join( - DATASETS_ROOT, - "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]), - ) - ], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - num_to_load=-1, - filter_invalid=False, - filter_scene=True, - ref_key="lm_full", - debug_im_id=debug_im_id, # NOTE: debug im id - ) - - -def register_with_name_cfg(name, data_cfg=None): - """Assume pre-defined datasets live in `./datasets`. - - Args: - name: datasnet_name, - data_cfg: if name is in existing SPLITS, use pre-defined data_cfg - otherwise requires data_cfg - data_cfg can be set in cfg.DATA_CFG.name - """ - dprint("register dataset: {}".format(name)) - if name in SPLITS_LM: - used_cfg = SPLITS_LM[name] - else: - assert data_cfg is not None, f"dataset name {name} is not registered" - used_cfg = data_cfg - DatasetCatalog.register(name, LM_Dataset(used_cfg)) - # something like eval_types - MetadataCatalog.get(name).set( - id="linemod", # NOTE: for pvnet to determine module - ref_key=used_cfg["ref_key"], - objs=used_cfg["objs"], - eval_error_types=["ad", "rete", "proj"], - evaluator_type="bop", - **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), - ) - - -def get_available_datasets(): - return list(SPLITS_LM.keys()) - - -#### tests ############################################### -def test_vis(): - dset_name = sys.argv[1] - assert dset_name in DatasetCatalog.list() - - meta = MetadataCatalog.get(dset_name) - dprint("MetadataCatalog: ", meta) - objs = meta.objs - - t_start = time.perf_counter() - dicts = DatasetCatalog.get(dset_name) - logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) - - dirname = "output/{}-data-vis".format(dset_name) - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = read_image_mmcv(d["file_name"], format="BGR") - depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 - - imH, imW = img.shape[:2] - annos = d["annotations"] - masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos] - bboxes = [anno["bbox"] for anno in annos] - bbox_modes = [anno["bbox_mode"] for anno in annos] - bboxes_xyxy = np.array( - [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)] - ) - kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] - quats = [anno["quat"] for anno in annos] - transes = [anno["trans"] for anno in annos] - Rs = [quat2mat(quat) for quat in quats] - # 0-based label - cat_ids = [anno["category_id"] for anno in annos] - K = d["cam"] - kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)] - # # TODO: visualize pose and keypoints - labels = [objs[cat_id] for cat_id in cat_ids] - for _i in range(len(annos)): - img_vis = vis_image_mask_bbox_cv2( - img, - masks[_i : _i + 1], - bboxes=bboxes_xyxy[_i : _i + 1], - labels=labels[_i : _i + 1], - ) - img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) - if "test" not in dset_name.lower(): - xyz_path = annos[_i]["xyz_path"] - xyz_info = mmcv.load(xyz_path) - x1, y1, x2, y2 = xyz_info["xyxy"] - xyz_crop = xyz_info["xyz_crop"].astype(np.float32) - xyz = np.zeros((imH, imW, 3), dtype=np.float32) - xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop - xyz_show = get_emb_show(xyz) - xyz_crop_show = get_emb_show(xyz_crop) - img_xyz = img.copy() / 255.0 - mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") - fg_idx = np.where(mask_xyz != 0) - img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3] - img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :] - img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :] - # diff mask - diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1] - - grid_show( - [ - img[:, :, [2, 1, 0]], - img_vis[:, :, [2, 1, 0]], - img_vis_kpts2d[:, :, [2, 1, 0]], - depth, - # xyz_show, - diff_mask_xyz, - xyz_crop_show, - img_xyz[:, :, [2, 1, 0]], - img_xyz_crop[:, :, [2, 1, 0]], - img_vis_crop, - ], - [ - "img", - "vis_img", - "img_vis_kpts2d", - "depth", - "diff_mask_xyz", - "xyz_crop_show", - "img_xyz", - "img_xyz_crop", - "img_vis_crop", - ], - row=3, - col=3, - ) - else: - grid_show( - [ - img[:, :, [2, 1, 0]], - img_vis[:, :, [2, 1, 0]], - img_vis_kpts2d[:, :, [2, 1, 0]], - depth, - ], - ["img", "vis_img", "img_vis_kpts2d", "depth"], - row=2, - col=2, - ) - - -if __name__ == "__main__": - """Test the dataset loader. - - python this_file.py dataset_name - """ - from lib.vis_utils.image import grid_show - from lib.utils.setup_logger import setup_my_logger - - import detectron2.data.datasets # noqa # add pre-defined metadata - from lib.vis_utils.image import vis_image_mask_bbox_cv2 - from core.utils.utils import get_emb_show - from core.utils.data_utils import read_image_mmcv - - print("sys.argv:", sys.argv) - logger = setup_my_logger(name="core") - register_with_name_cfg(sys.argv[1]) - print("dataset catalog: ", DatasetCatalog.list()) - - test_vis() diff --git a/det/yolox/data/datasets/lm_syn_imgn.py b/det/yolox/data/datasets/lm_syn_imgn.py deleted file mode 100644 index 1732c29..0000000 --- a/det/yolox/data/datasets/lm_syn_imgn.py +++ /dev/null @@ -1,473 +0,0 @@ -import hashlib -import logging -import os -import os.path as osp -import sys - -import time -from collections import OrderedDict -import mmcv -import numpy as np -from tqdm import tqdm -from transforms3d.quaternions import mat2quat, quat2mat -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode - -cur_dir = osp.dirname(osp.abspath(__file__)) -PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../..")) -sys.path.insert(0, PROJ_ROOT) -import ref - -from lib.pysixd import inout, misc -from lib.utils.mask_utils import ( - binary_mask_to_rle, - cocosegm2mask, - mask2bbox_xywh, -) -from lib.utils.utils import dprint, lazy_property - - -logger = logging.getLogger(__name__) -DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets")) - - -class LM_SYN_IMGN_Dataset(object): - """lm synthetic data, imgn(imagine) from DeepIM.""" - - def __init__(self, data_cfg): - """ - Set with_depth and with_masks default to True, - and decide whether to load them into dataloader/network later - with_masks: - """ - self.name = data_cfg["name"] - self.data_cfg = data_cfg - - self.objs = data_cfg["objs"] # selected objects - - self.ann_files = data_cfg["ann_files"] # idx files with image ids - self.image_prefixes = data_cfg["image_prefixes"] - - self.dataset_root = data_cfg["dataset_root"] # lm_imgn - self.models_root = data_cfg["models_root"] # BOP_DATASETS/lm/models - self.scale_to_meter = data_cfg["scale_to_meter"] # 0.001 - - self.with_masks = data_cfg["with_masks"] # True (load masks but may not use it) - self.with_depth = data_cfg["with_depth"] # True (load depth path here, but may not use it) - self.depth_factor = data_cfg["depth_factor"] # 1000.0 - - self.cam = data_cfg["cam"] # - self.height = data_cfg["height"] # 480 - self.width = data_cfg["width"] # 640 - - self.cache_dir = data_cfg["cache_dir"] # .cache - self.use_cache = data_cfg["use_cache"] # True - # sample uniformly to get n items - self.n_per_obj = data_cfg.get("n_per_obj", 1000) - self.filter_invalid = data_cfg["filter_invalid"] - self.filter_scene = data_cfg.get("filter_scene", False) - ################################################## - if self.cam is None: - self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) - - # NOTE: careful! Only the selected objects - self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs] - # map selected objs to [0, num_objs-1] - self.cat2label = {v: i for i, v in enumerate(self.cat_ids)} # id_map - self.label2cat = {label: cat for cat, label in self.cat2label.items()} - self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs)) - ########################################################## - - def __call__(self): # LM_SYN_IMGN_Dataset - """Load light-weight instance annotations of all images into a list of - dicts in Detectron2 format. - - Do not load heavy data into memory in this file, since we will - load the annotations of all images into memory. - """ - # cache the dataset_dicts to avoid loading masks from files - hashed_file_name = hashlib.md5( - ( - "".join([str(fn) for fn in self.objs]) - + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( - self.name, - self.dataset_root, - self.with_masks, - self.with_depth, - self.n_per_obj, - __name__, - ) - ).encode("utf-8") - ).hexdigest() - cache_path = osp.join( - self.dataset_root, - "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name), - ) - - if osp.exists(cache_path) and self.use_cache: - logger.info("load cached dataset dicts from {}".format(cache_path)) - return mmcv.load(cache_path) - - t_start = time.perf_counter() - - logger.info("loading dataset dicts: {}".format(self.name)) - self.num_instances_without_valid_segmentation = 0 - self.num_instances_without_valid_box = 0 - dataset_dicts = [] ####################################################### - assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}" - for ann_file, scene_root in zip(self.ann_files, self.image_prefixes): - # linemod each scene is an object - with open(ann_file, "r") as f_ann: - indices = [line.strip("\r\n").split()[-1] for line in f_ann.readlines()] # string ids - # sample uniformly (equal space) - if self.n_per_obj > 0: - sample_num = min(self.n_per_obj, len(indices)) - sel_indices_idx = np.linspace(0, len(indices) - 1, sample_num, dtype=np.int32) - sel_indices = [indices[int(_i)] for _i in sel_indices_idx] - else: - sel_indices = indices - - for im_id in tqdm(sel_indices): - rgb_path = osp.join(scene_root, "{}-color.png").format(im_id) - assert osp.exists(rgb_path), rgb_path - - depth_path = osp.join(scene_root, "{}-depth.png".format(im_id)) - - obj_name = im_id.split("/")[0] - if obj_name == "benchviseblue": - obj_name = "benchvise" - obj_id = ref.lm_full.obj2id[obj_name] - if self.filter_scene: - if obj_name not in self.objs: - continue - record = { - "dataset_name": self.name, - "file_name": osp.relpath(rgb_path, PROJ_ROOT), - "depth_file": osp.relpath(depth_path, PROJ_ROOT), - "height": self.height, - "width": self.width, - "image_id": im_id.split("/")[-1], - "scene_im_id": im_id, - "cam": self.cam, - "img_type": "syn", - } - - cur_label = self.obj2label[obj_name] # 0-based label - pose_path = osp.join(scene_root, "{}-pose.txt".format(im_id)) - pose = np.loadtxt(pose_path, skiprows=1) - R = pose[:3, :3] - t = pose[:3, 3] - quat = mat2quat(R).astype("float32") - proj = (record["cam"] @ t.T).T - proj = proj[:2] / proj[2] - - depth = mmcv.imread(depth_path, "unchanged") / 1000.0 - mask = (depth > 0).astype(np.uint8) - - bbox_obj = mask2bbox_xywh(mask) - x1, y1, w, h = bbox_obj - if self.filter_invalid: - if h <= 1 or w <= 1: - self.num_instances_without_valid_box += 1 - continue - area = mask.sum() - if area < 3: # filter out too small or nearly invisible instances - self.num_instances_without_valid_segmentation += 1 - continue - mask_rle = binary_mask_to_rle(mask, compressed=True) - - inst = { - "category_id": cur_label, # 0-based label - "bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib - "bbox_mode": BoxMode.XYWH_ABS, - "pose": pose, - "quat": quat, - "trans": t, - "centroid_2d": proj, # absolute (cx, cy) - "segmentation": mask_rle, - } - - model_info = self.models_info[str(obj_id)] - inst["model_info"] = model_info - # TODO: using full mask - for key in ["bbox3d_and_center"]: - inst[key] = self.models[cur_label][key] - record["annotations"] = [inst] - dataset_dicts.append(record) - - if self.num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - self.num_instances_without_valid_segmentation - ) - ) - if self.num_instances_without_valid_box > 0: - logger.warning( - "Filtered out {} instances without valid box. " - "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) - ) - ########################################################################## - # if self.num_to_load > 0: - # self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) - # random.shuffle(dataset_dicts) - # dataset_dicts = dataset_dicts[: self.num_to_load] - logger.info( - "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start) - ) - - mmcv.dump(dataset_dicts, cache_path, protocol=4) - logger.info("Dumped dataset_dicts to {}".format(cache_path)) - return dataset_dicts - - @lazy_property - def models_info(self): - models_info_path = osp.join(self.models_root, "models_info.json") - assert osp.exists(models_info_path), models_info_path - models_info = mmcv.load(models_info_path) # key is str(obj_id) - return models_info - - @lazy_property - def models(self): - """Load models into a list.""" - cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs))) - if osp.exists(cache_path) and self.use_cache: - # logger.info("load cached object models from {}".format(cache_path)) - return mmcv.load(cache_path) - - models = [] - for obj_name in self.objs: - model = inout.load_ply( - osp.join( - self.models_root, - f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply", - ), - vertex_scale=self.scale_to_meter, - ) - # NOTE: the bbox3d_and_center is not obtained from centered vertices - # for BOP models, not a big problem since they had been centered - model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"]) - - models.append(model) - logger.info("cache models to {}".format(cache_path)) - mmcv.dump(models, cache_path, protocol=4) - return models - - def image_aspect_ratio(self): - # return 1 - return self.width / self.height # 4/3 - - -########### register datasets ############################################################ - - -def get_lm_metadata(obj_names, ref_key): - """task specific metadata.""" - - data_ref = ref.__dict__[ref_key] - - cur_sym_infos = {} # label based key - loaded_models_info = data_ref.get_models_info() - - for i, obj_name in enumerate(obj_names): - obj_id = data_ref.obj2id[obj_name] - model_info = loaded_models_info[str(obj_id)] - if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info: - sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01) - sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32) - else: - sym_info = None - cur_sym_infos[i] = sym_info - - meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos} - return meta - - -LM_13_OBJECTS = [ - "ape", - "benchvise", - "camera", - "can", - "cat", - "driller", - "duck", - "eggbox", - "glue", - "holepuncher", - "iron", - "lamp", - "phone", -] # no bowl, cup -################################################################################ - -SPLITS_LM_IMGN_13 = dict( - lm_imgn_13_train_1k_per_obj=dict( - name="lm_imgn_13_train_1k_per_obj", # BB8 training set - dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=LM_13_OBJECTS, # selected objects - ann_files=[ - osp.join( - DATASETS_ROOT, - "lm_imgn/image_set/{}_{}.txt".format("train", _obj), - ) - for _obj in LM_13_OBJECTS - ], - image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn") for _obj in LM_13_OBJECTS], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=1000, # 1000 per class - filter_scene=True, - filter_invalid=False, - ref_key="lm_full", - ) -) - -# single obj splits -for obj in ref.lm_full.objects: - for split in ["train"]: - name = "lm_imgn_13_{}_{}_1k".format(obj, split) - ann_files = [osp.join(DATASETS_ROOT, "lm_imgn/image_set/{}_{}.txt".format(split, obj))] - if split in ["train"]: - filter_invalid = True - elif split in ["test"]: - filter_invalid = False - else: - raise ValueError("{}".format(split)) - if name not in SPLITS_LM_IMGN_13: - SPLITS_LM_IMGN_13[name] = dict( - name=name, - dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"), - models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"), - objs=[obj], # only this obj - ann_files=ann_files, - image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn/")], - scale_to_meter=0.001, - with_masks=True, # (load masks but may not use it) - with_depth=True, # (load depth path here, but may not use it) - depth_factor=1000.0, - cam=ref.lm_full.camera_matrix, - height=480, - width=640, - cache_dir=osp.join(PROJ_ROOT, ".cache"), - use_cache=True, - n_per_obj=1000, - filter_invalid=False, - filter_scene=True, - ref_key="lm_full", - ) - - -def register_with_name_cfg(name, data_cfg=None): - """Assume pre-defined datasets live in `./datasets`. - - Args: - name: datasnet_name, - data_cfg: if name is in existing SPLITS, use pre-defined data_cfg - otherwise requires data_cfg - data_cfg can be set in cfg.DATA_CFG.name - """ - dprint("register dataset: {}".format(name)) - if name in SPLITS_LM_IMGN_13: - used_cfg = SPLITS_LM_IMGN_13[name] - else: - assert data_cfg is not None, f"dataset name {name} is not registered" - used_cfg = data_cfg - DatasetCatalog.register(name, LM_SYN_IMGN_Dataset(used_cfg)) - # something like eval_types - MetadataCatalog.get(name).set( - ref_key=used_cfg["ref_key"], - objs=used_cfg["objs"], - eval_error_types=["ad", "rete", "proj"], - evaluator_type="coco_bop", - **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]), - ) - - -def get_available_datasets(): - return list(SPLITS_LM_IMGN_13.keys()) - - -#### tests ############################################### -def test_vis(): - dset_name = sys.argv[1] - assert dset_name in DatasetCatalog.list() - - meta = MetadataCatalog.get(dset_name) - dprint("MetadataCatalog: ", meta) - objs = meta.objs - - t_start = time.perf_counter() - dicts = DatasetCatalog.get(dset_name) - logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) - - dirname = "output/{}-data-vis".format(dset_name) - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = read_image_mmcv(d["file_name"], format="BGR") - depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 - - anno = d["annotations"][0] # only one instance per image - imH, imW = img.shape[:2] - mask = cocosegm2mask(anno["segmentation"], imH, imW) - bbox = anno["bbox"] - bbox_mode = anno["bbox_mode"] - bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS)) - kpt3d = anno["bbox3d_and_center"] - quat = anno["quat"] - trans = anno["trans"] - R = quat2mat(quat) - # 0-based label - cat_id = anno["category_id"] - K = d["cam"] - kpt_2d = misc.project_pts(kpt3d, K, R, trans) - # # TODO: visualize pose and keypoints - label = objs[cat_id] - # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) - img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label]) - img_vis_kpt2d = img.copy() - img_vis_kpt2d = misc.draw_projected_box3d( - img_vis_kpt2d, - kpt_2d, - middle_color=None, - bottom_color=(128, 128, 128), - ) - - grid_show( - [ - img[:, :, [2, 1, 0]], - img_vis[:, :, [2, 1, 0]], - img_vis_kpt2d[:, :, [2, 1, 0]], - depth, - ], - ["img", "vis_img", "img_vis_kpts2d", "depth"], - row=2, - col=2, - ) - - -if __name__ == "__main__": - """Test the dataset loader. - - Usage: - python -m det.yolov4.datasets.lm_syn_imgn dataset_name - """ - from lib.vis_utils.image import grid_show - from lib.utils.setup_logger import setup_my_logger - - import detectron2.data.datasets # noqa # add pre-defined metadata - from lib.vis_utils.image import vis_image_mask_bbox_cv2 - from core.utils.data_utils import read_image_mmcv - - print("sys.argv:", sys.argv) - logger = setup_my_logger(name="core") - register_with_name_cfg(sys.argv[1]) - print("dataset catalog: ", DatasetCatalog.list()) - test_vis() -- GitLab