From e3bda6930440433dddeed0bd34a079e3ece28cd6 Mon Sep 17 00:00:00 2001
From: liuxingyu <lxy17@foxmail.com>
Date: Wed, 2 Nov 2022 13:59:10 +0800
Subject: [PATCH] rearrange dataset files

---
 .../gdrn_modeling/datasets/dataset_factory.py |   9 -
 core/gdrn_modeling/datasets/duck_frames.py    | 278 -------
 .../datasets/hb_bench_driller_phone_d2.py     | 555 --------------
 det/yolox/data/datasets/dataset_factory.py    |   9 +-
 .../datasets/hb_bench_driller_phone_d2.py     | 615 ---------------
 det/yolox/data/datasets/lm_dataset_d2.py      | 701 ++++++++++++++++++
 6 files changed, 702 insertions(+), 1465 deletions(-)
 delete mode 100644 core/gdrn_modeling/datasets/duck_frames.py
 delete mode 100644 core/gdrn_modeling/datasets/hb_bench_driller_phone_d2.py
 delete mode 100644 det/yolox/data/datasets/hb_bench_driller_phone_d2.py
 create mode 100644 det/yolox/data/datasets/lm_dataset_d2.py

diff --git a/core/gdrn_modeling/datasets/dataset_factory.py b/core/gdrn_modeling/datasets/dataset_factory.py
index a14b6fa..3d9f58b 100644
--- a/core/gdrn_modeling/datasets/dataset_factory.py
+++ b/core/gdrn_modeling/datasets/dataset_factory.py
@@ -16,8 +16,6 @@ from core.gdrn_modeling.datasets import (
     hb_pbr,
     hb_bop_val,
     hb_bop_test,
-    hb_bench_driller_phone_d2,
-    duck_frames,
     tudl_pbr,
     tudl_d2,
     tudl_bop_test,
@@ -41,21 +39,14 @@ __all__ = [
     "get_available_datasets",
 ]
 _DSET_MOD_NAMES = [
-    "lm_syn_imgn",
     "lm_dataset_d2",
-    "lm_syn_egl",
     "lm_pbr",
-    "lm_blender",
-    "lm_dataset_crop_d2",
     "ycbv_pbr",
     "ycbv_d2",
     "ycbv_bop_test",
     "hb_pbr",
     "hb_bop_val",
     "hb_bop_test",
-    "hb_bench_driller_phone_d2",
-    "duck_frames",
-    "lm_new_duck_pbr",
     "tudl_pbr",
     "tudl_d2",
     "tudl_bop_test",
diff --git a/core/gdrn_modeling/datasets/duck_frames.py b/core/gdrn_modeling/datasets/duck_frames.py
deleted file mode 100644
index 1271b7e..0000000
--- a/core/gdrn_modeling/datasets/duck_frames.py
+++ /dev/null
@@ -1,278 +0,0 @@
-import hashlib
-import logging
-import os
-import os.path as osp
-import time
-from collections import OrderedDict
-
-import cv2
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from detectron2.data import DatasetCatalog, MetadataCatalog
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
-
-import ref
-
-from lib.pysixd import inout, misc
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class DUCK_FRAMES_Dataset(object):
-    def __init__(self, data_cfg):
-        """Set with_depth default to True, and decide whether to load them into
-        dataloader/network later."""
-        self.data_cfg = data_cfg
-        self.name = data_cfg["name"]
-        self.root = data_cfg.get("root", "datasets/duck_fabi")
-        self.idx_files = data_cfg["idx_files"]
-        self.models_root = data_cfg["models_root"]
-        self.objs = objs = data_cfg.get("objs", ref.lm_full.objects)
-        self.scale_to_meter = data_cfg.get("scale_to_meter", 0.001)
-        self.with_depth = data_cfg.get("with_depth", True)
-        self.height = data_cfg.get("height", 720)
-        self.width = data_cfg.get("width", 1280)
-        self.depth_factor = data_cfg.get("depth_factor", 1000)
-        self.cache_dir = data_cfg.get("cache_dir", ".cache")
-        self.use_cache = data_cfg.get("use_cache", True)
-        self.num_to_load = data_cfg.get("num_to_load", -1)
-        self.filter_invalid = data_cfg.get("filter_invalid", False)
-
-        #####################################################
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-
-        self.images = []
-
-        for idx_file in self.idx_files:
-            assert osp.exists(idx_file), idx_file
-            with open(idx_file, "r") as f:
-                for line in f:
-                    file_name = line.strip("\r\n")
-                    image_path = osp.join(self.root, file_name)
-                    assert osp.exists(image_path), image_path
-                    self.images.append(image_path)  # load rgb image
-
-        assert len(self.images) > 0, "wrong len of images: {}".format(len(self.images))
-
-        if self.num_to_load > 0:
-            self.num_to_load = min(self.num_to_load, len(self.images))
-        else:
-            self.num_to_load = len(self.images)
-        logger.info("Dataset has {} images".format(len(self.images)))
-        logger.info("num images to load: {}".format(self.num_to_load))
-
-    def get_sample_dict(self, index):
-        record = {}
-        img_file = self.images[index]
-        record["dataset_name"] = self.name
-        record["file_name"] = osp.relpath(img_file, PROJ_ROOT)
-        record["height"] = self.height
-        record["width"] = self.width
-        image_name = img_file.split("/")[-1]
-        scene_id = 0
-        image_id = image_name.split(".")[0].split("_")[-1]
-        record["image_id"] = self._unique_id
-        record["scene_im_id"] = "{}/{}".format(scene_id, image_id)
-        # record["cam"] = ref.lm_full.camera_matrix
-        record["cam"] = np.array([[572.4114, 0, 645.2611], [0, 573.57043, 362.04899], [0, 0, 1]], dtype=np.float32)
-        return record
-
-    def __call__(self):
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}".format(
-                    self.name,
-                    self.root,
-                    self.with_depth,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-        dataset_dicts = []
-        logger.info("loading dataset dicts")
-        indices = [i for i in range(self.num_to_load)]
-
-        self._unique_id = 0
-        for index in tqdm(indices):
-            sample_dict = self.get_sample_dict(index)
-            if sample_dict is not None:
-                dataset_dicts.append(sample_dict)
-            self._unique_id += 1
-        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # logger.info("load cached object models from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(self.models_root, f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply"),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def __len__(self):
-        return self.num_to_load
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-SPLITS_DUCK_FRAMES = dict(
-    duck_frames_lm=dict(
-        name="duck_frames_lm",
-        root=osp.join(DATASETS_ROOT, "duck_fabi"),
-        idx_files=[osp.join(DATASETS_ROOT, "duck_fabi/duck_frames.txt")],
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=["duck"],
-        scale_to_meter=0.001,
-        with_depth=False,
-        depth_factor=1000.0,
-        height=720,
-        width=1280,
-        cache_dir=".cache",
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-    duck_frames=dict(
-        name="duck_frames",
-        root=osp.join(DATASETS_ROOT, "duck_fabi"),
-        idx_files=[osp.join(DATASETS_ROOT, "duck_fabi/duck_frames.txt")],
-        models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"),
-        objs=["duck"],
-        scale_to_meter=0.001,
-        with_depth=False,
-        depth_factor=1000.0,
-        height=720,
-        width=1280,
-        cache_dir=".cache",
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=False,
-        ref_key="lm_duck_fabi",
-    ),
-)
-
-
-def register_duck_frames():
-    for dset_name, data_cfg in SPLITS_DUCK_FRAMES.items():
-        # if comm.is_main_process():
-        #     iprint('register dataset: {}'.format(dset_name))
-        DatasetCatalog.register(dset_name, DUCK_FRAMES_Dataset(data_cfg))
-        MetadataCatalog.get(dset_name).set(
-            ref_key=data_cfg["ref_key"],
-            objs=data_cfg["objs"],
-            eval_error_types=["ad", "rete", "proj"],
-            evaluator_type="bop",
-            thing_classes=data_cfg["objs"],
-        )
-
-
-##################
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-
-    if name in SPLITS_DUCK_FRAMES:
-        used_cfg = SPLITS_DUCK_FRAMES[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, DUCK_FRAMES_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        thing_classes=used_cfg["objs"],
-    )
-
-
-def get_available_datasets():
-    names = list(SPLITS_DUCK_FRAMES.keys())
-    return names
-
-
-if __name__ == "__main__":
-    from lib.vis_utils.image import grid_show
-    from detectron2.utils.logger import setup_logger
-
-    logger = setup_logger(name="core")
-    register_duck_frames()
-
-    print("dataset catalog: ", DatasetCatalog.list())
-    dset_name = "duck_frames"
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = cv2.imread(d["file_name"], cv2.IMREAD_COLOR).astype("float32") / 255.0
-        cv2.imshow("color", img)
-        k = cv2.waitKey()
-        if k == 27:
-            cv2.destroyAllWindows()
-            break
diff --git a/core/gdrn_modeling/datasets/hb_bench_driller_phone_d2.py b/core/gdrn_modeling/datasets/hb_bench_driller_phone_d2.py
deleted file mode 100644
index d00e06a..0000000
--- a/core/gdrn_modeling/datasets/hb_bench_driller_phone_d2.py
+++ /dev/null
@@ -1,555 +0,0 @@
-# NOTE: different from Self6D-v1 which uses hb-v1, this uses hb_bop conventions
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
-sys.path.insert(0, PROJ_ROOT)
-
-import ref
-
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class HB_BenchDrillerPhone:
-    """a test sequence (test sequence 2) of HomebrewedDB contains 3 objects in
-    linemod."""
-
-    def __init__(self, data_cfg):
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.dataset_root = dataset_root = data_cfg["dataset_root"]
-        self.ann_files = data_cfg["ann_files"]
-        self.models_root = data_cfg["models_root"]  # models_lm
-        self.scale_to_meter = data_cfg["scale_to_meter"]
-
-        # use the images with converted K
-        cam_type = data_cfg["cam_type"]
-        assert cam_type in ["linemod", "hb"]
-        self.cam_type = cam_type
-        if cam_type == "linemod":  # linemod K
-            self.cam = np.array(
-                [[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]],
-                dtype="float32",
-            )
-            self.rgb_root = osp.join(dataset_root, "sequence/rgb_lmK")
-            self.depth_root = osp.join(dataset_root, "sequence/depth_lmK")
-            self.mask_visib_root = osp.join(dataset_root, "sequence/mask_visib_lmK")
-        else:  # hb
-            self.cam = np.array(
-                [[537.4799, 0, 318.8965], [0, 536.1447, 238.3781], [0, 0, 1]],
-                dtype="float32",
-            )
-            self.rgb_root = osp.join(dataset_root, "sequence/rgb")
-            self.depth_root = osp.join(dataset_root, "sequence/depth")
-            self.mask_visib_root = osp.join(dataset_root, "sequence/mask_visib")
-        assert osp.exists(self.rgb_root), self.rgb_root
-
-        self.with_masks = data_cfg.get("with_masks", True)
-        self.with_depth = data_cfg.get("with_depth", True)
-
-        self.height = data_cfg["height"]
-        self.width = data_cfg["width"]
-        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
-        self.use_cache = data_cfg.get("use_cache", True)
-        self.num_to_load = data_cfg["num_to_load"]  # -1
-        self.filter_invalid = data_cfg["filter_invalid"]
-        ##################################################
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.hb_bdp.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}".format(
-                    self.name, self.dataset_root, self.with_masks, self.with_depth, __name__
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-        dataset_dicts = []
-        im_id_global = 0
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        # NOTE: converted from gt_v1, obj_id --> obj_id+1
-        gt_path = osp.join(self.dataset_root, "sequence/gt_v2.json")
-        gt_dict = mmcv.load(gt_path)
-
-        # determine which images to load by self.ann_files
-        sel_im_ids = []
-        for ann_file in self.ann_files:
-            with open(ann_file, "r") as f:
-                for line in f:
-                    line = line.strip("\r\n")
-                    cur_im_id = int(line)
-                    if cur_im_id not in sel_im_ids:
-                        sel_im_ids.append(cur_im_id)
-
-        for str_im_id, annos in tqdm(gt_dict.items()):  # str im ids
-            int_im_id = int(str_im_id)
-            if int_im_id not in sel_im_ids:
-                continue
-            rgb_path = osp.join(self.rgb_root, "color_{:06d}.png".format(int_im_id))
-            depth_path = osp.join(self.depth_root, "{:06d}.png".format(int_im_id))
-
-            scene_id = 2  # dummy (because in the whole test set, its scene id is 2)
-            record = {
-                "dataset_name": self.name,
-                "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                "depth_factor": 1 / self.scale_to_meter,
-                "height": self.height,
-                "width": self.width,
-                "image_id": im_id_global,
-                "scene_im_id": "{}/{}".format(scene_id, int_im_id),  # for evaluation
-                "cam": self.cam,
-                "img_type": "real",
-            }
-            im_id_global += 1
-
-            inst_annos = []
-            for anno_i, anno in enumerate(annos):
-                obj_id = anno["obj_id"]
-                cls_name = ref.hb_bdp.id2obj[obj_id]
-                if cls_name not in self.objs:
-                    continue
-                if cls_name not in ref.hb_bdp.objects:  # only support 3 objects
-                    continue
-
-                cur_label = self.cat2label[obj_id]
-
-                R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
-                t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
-                pose = np.hstack([R, t.reshape(3, 1)])
-                if self.cam_type == "hb":
-                    bbox = anno["obj_bb"]
-                    bbox_mode = BoxMode.XYWH_ABS
-                elif self.cam_type == "linemod":
-                    # get bbox from projected points
-                    bbox = misc.compute_2d_bbox_xyxy_from_pose_v2(
-                        self.models[cur_label]["pts"].astype("float32"),
-                        pose.astype("float32"),
-                        self.cam,
-                        width=self.width,
-                        height=self.height,
-                        clip=True,
-                    )
-                    bbox_mode = BoxMode.XYXY_ABS
-                    x1, y1, x2, y2 = bbox
-                    w = x2 - x1
-                    h = y2 - y1
-                else:
-                    raise ValueError("Wrong cam type: {}".format(self.cam_type))
-
-                if self.filter_invalid:
-                    if h <= 1 or w <= 1:
-                        self.num_instances_without_valid_box += 1
-                        continue
-
-                mask_visib_file = osp.join(
-                    self.mask_visib_root,
-                    "{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                )
-                assert osp.exists(mask_visib_file), mask_visib_file
-                # load mask visib  TODO: load both mask_visib and mask_full
-                mask_single = mmcv.imread(mask_visib_file, "unchanged")
-                area = mask_single.sum()
-                if area < 3:  # filter out too small or nearly invisible instances
-                    self.num_instances_without_valid_segmentation += 1
-                    continue
-                mask_rle = binary_mask_to_rle(mask_single, compressed=True)
-
-                quat = mat2quat(R).astype("float32")
-
-                proj = (record["cam"] @ t.T).T
-                proj = proj[:2] / proj[2]
-
-                inst = {
-                    "category_id": cur_label,  # 0-based label
-                    "bbox": bbox,
-                    "bbox_mode": bbox_mode,
-                    "pose": pose,
-                    "quat": quat,
-                    "trans": t,
-                    "centroid_2d": proj,  # absolute (cx, cy)
-                    "segmentation": mask_rle,
-                }
-
-                # NOTE: currently no xyz
-                # if "test" not in self.name:
-                #     xyz_path = osp.join(xyz_root, f"{int_im_id:06d}_{anno_i:06d}.pkl")
-                #     assert osp.exists(xyz_path), xyz_path
-                #     inst["xyz_path"] = xyz_path
-
-                model_info = self.models_info[str(obj_id)]
-                inst["model_info"] = model_info
-                for key in ["bbox3d_and_center"]:
-                    inst[key] = self.models[cur_label][key]
-                inst_annos.append(inst)
-            if len(inst_annos) == 0 and self.filter_invalid:  # filter im without anno
-                continue
-            record["annotations"] = inst_annos
-            dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        if self.num_to_load > 0:
-            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-            dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
-
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(PROJ_ROOT, ".cache", "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # logger.info("load cached object models from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            if obj_name not in ref.hb_bdp.objects:
-                models.append(None)
-                continue
-            model = inout.load_ply(
-                osp.join(self.models_root, "obj_{:06d}.ply".format(ref.hb_bdp.obj2id[obj_name])),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_hb_bdp_metadata(obj_names, ref_key):
-    """task specific metadata."""
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-SPLITS_HB_BenchviseDrillerPhone = dict(
-    # TODO: maybe add scene name
-    hb_benchvise_driller_phone_all_lmK=dict(
-        name="hb_benchvise_driller_phone_all_lmK",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/all.txt")],
-        objs=["benchvise", "driller", "phone"],
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="linemod",
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-    hb_benchvise_driller_phone_all=dict(
-        name="hb_benchvise_driller_phone_all",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/all.txt")],
-        objs=["benchvise", "driller", "phone"],
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="hb",  # NOTE: hb K
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-    hb_benchvise_driller_phone_test_lmK=dict(
-        name="hb_benchvise_driller_phone_test_lmK",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/test.txt")],
-        objs=["benchvise", "driller", "phone"],
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="linemod",
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-    hb_benchvise_driller_phone_test=dict(
-        name="hb_benchvise_driller_phone_test",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/test.txt")],
-        objs=["benchvise", "driller", "phone"],
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="hb",
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-)
-
-
-# add varying percent splits
-VARY_PERCENT_SPLITS = [
-    "test100",
-    "train090",
-    "train180",
-    "train270",
-    "train360",
-    "train450",
-    "train540",
-    "train630",
-    "train720",
-    "train810",
-    "train900",
-]
-
-# all objects
-for _split in VARY_PERCENT_SPLITS:
-    for cam_type in ["linemod", "hb"]:
-        K_str = "_lmK" if cam_type == "linemod" else ""
-        name = "hb_benchvise_driller_phone_{}{}".format(_split, K_str)
-        if name not in SPLITS_HB_BenchviseDrillerPhone:
-            SPLITS_HB_BenchviseDrillerPhone[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-                models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-                ann_files=[osp.join(DATASETS_ROOT, f"hb_bench_driller_phone/image_set/{_split}.txt")],
-                objs=["benchvise", "driller", "phone"],
-                use_cache=True,
-                num_to_load=-1,
-                cam_type=cam_type,
-                scale_to_meter=0.001,
-                filter_invalid=False,
-                height=480,
-                width=640,
-                ref_key="hb_bdp",
-            )
-
-# single obj splits
-for obj in ref.hb_bdp.objects:
-    for split in ["test", "train", "all"] + VARY_PERCENT_SPLITS:
-        for cam_type in ["linemod", "hb"]:
-            K_str = "_lmK" if cam_type == "linemod" else ""
-            name = "hb_bdp_{}_{}{}".format(obj, split, K_str)
-            if name not in SPLITS_HB_BenchviseDrillerPhone:
-                SPLITS_HB_BenchviseDrillerPhone[name] = dict(
-                    name=name,
-                    dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-                    models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-                    ann_files=[osp.join(DATASETS_ROOT, f"hb_bench_driller_phone/image_set/{split}.txt")],
-                    objs=[obj],
-                    use_cache=True,
-                    num_to_load=-1,
-                    cam_type=cam_type,
-                    scale_to_meter=0.001,
-                    filter_invalid=False,
-                    height=480,
-                    width=640,
-                    ref_key="hb_bdp",
-                )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_HB_BenchviseDrillerPhone:
-        used_cfg = SPLITS_HB_BenchviseDrillerPhone[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, HB_BenchDrillerPhone(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",  # NOTE: should not be bop
-        **get_hb_bdp_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_HB_BenchviseDrillerPhone.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    # python -m core.datasets.lm_dataset_d2 lmo_syn_vispy_train
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        imH, imW = img.shape[:2]
-        annos = d["annotations"]
-        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
-        bboxes = [anno["bbox"] for anno in annos]
-        bbox_modes = [anno["bbox_mode"] for anno in annos]
-        bboxes_xyxy = np.array(
-            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
-        )
-        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
-        quats = [anno["quat"] for anno in annos]
-        transes = [anno["trans"] for anno in annos]
-        Rs = [quat2mat(quat) for quat in quats]
-        # 0-based label
-        cat_ids = [anno["category_id"] for anno in annos]
-        K = d["cam"]
-        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
-        # # TODO: visualize pose and keypoints
-        labels = [objs[cat_id] for cat_id in cat_ids]
-        for _i in range(len(annos)):
-            img_vis = vis_image_mask_bbox_cv2(
-                img,
-                masks[_i : _i + 1],
-                bboxes=bboxes_xyxy[_i : _i + 1],
-                labels=labels[_i : _i + 1],
-            )
-            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
-
-            grid_show(
-                [
-                    img[:, :, [2, 1, 0]],
-                    img_vis[:, :, [2, 1, 0]],
-                    img_vis_kpts2d[:, :, [2, 1, 0]],
-                    depth,
-                ],
-                ["img", "vis_img", "img_vis_kpts2d", "depth"],
-                row=2,
-                col=2,
-            )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m this_module dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import (
-        vis_image_mask_bbox_cv2,
-        vis_image_bboxes_cv2,
-    )
-    from lib.utils.mask_utils import cocosegm2mask
-    from lib.utils.bbox_utils import xywh_to_xyxy
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-    test_vis()
diff --git a/det/yolox/data/datasets/dataset_factory.py b/det/yolox/data/datasets/dataset_factory.py
index a1f952b..acb2abb 100644
--- a/det/yolox/data/datasets/dataset_factory.py
+++ b/det/yolox/data/datasets/dataset_factory.py
@@ -3,6 +3,7 @@ import os.path as osp
 import mmcv
 from detectron2.data import DatasetCatalog
 from . import (
+    lm_dataset_d2,
     lm_pbr,
     ycbv_pbr,
     ycbv_d2,
@@ -10,7 +11,6 @@ from . import (
     hb_pbr,
     hb_bop_val,
     hb_bop_test,
-    hb_bench_driller_phone_d2,
     tudl_train_real,
     tudl_pbr,
     tudl_bop_test,
@@ -34,21 +34,14 @@ __all__ = [
     "get_available_datasets",
 ]
 _DSET_MOD_NAMES = [
-    "lm_syn_imgn",
     "lm_dataset_d2",
-    # "lm_syn_egl",
     "lm_pbr",
-    "lm_blender",
-    # "lm_dataset_crop_d2",
     "ycbv_pbr",
     "ycbv_d2",
     "ycbv_bop_test",
     "hb_pbr",
     "hb_bop_val",
     "hb_bop_test",
-    "hb_bench_driller_phone_d2",
-    # "duck_frames",
-    # "lm_new_duck_pbr",
     "tudl_train_real",
     "tudl_pbr",
     "tudl_bop_test",
diff --git a/det/yolox/data/datasets/hb_bench_driller_phone_d2.py b/det/yolox/data/datasets/hb_bench_driller_phone_d2.py
deleted file mode 100644
index 5681192..0000000
--- a/det/yolox/data/datasets/hb_bench_driller_phone_d2.py
+++ /dev/null
@@ -1,615 +0,0 @@
-# NOTE: different from Self6D-v1 which uses hb-v1, this uses hb_bop conventions
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
-sys.path.insert(0, PROJ_ROOT)
-
-import ref
-
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class HB_BenchDrillerPhone:
-    """a test sequence (test sequence 2) of HomebrewedDB contains 3 objects in
-    linemod."""
-
-    def __init__(self, data_cfg):
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.dataset_root = dataset_root = data_cfg["dataset_root"]
-        self.ann_files = data_cfg["ann_files"]
-        self.models_root = data_cfg["models_root"]  # models_lm
-        self.scale_to_meter = data_cfg["scale_to_meter"]
-
-        # use the images with converted K
-        assert cam_type in ["linemod", "hb"]
-        self.cam_type = cam_type
-        if cam_type == "linemod":  # linemod K
-            self.cam = np.array(
-                [
-                    [572.4114, 0, 325.2611],
-                    [0, 573.57043, 242.04899],
-                    [0, 0, 1],
-                ],
-                dtype="float32",
-            )
-            self.rgb_root = osp.join(dataset_root, "sequence/rgb_lmK")
-            self.depth_root = osp.join(dataset_root, "sequence/depth_lmK")
-            self.mask_visib_root = osp.join(dataset_root, "sequence/mask_visib_lmK")
-        else:  # hb
-            self.cam = np.array(
-                [[537.4799, 0, 318.8965], [0, 536.1447, 238.3781], [0, 0, 1]],
-                dtype="float32",
-            )
-            self.rgb_root = osp.join(dataset_root, "sequence/rgb")
-            self.depth_root = osp.join(dataset_root, "sequence/depth")
-            self.mask_visib_root = osp.join(dataset_root, "sequence/mask_visib")
-        assert osp.exists(self.rgb_root), self.rgb_root
-
-        self.with_masks = data_cfg.get("with_masks", True)
-        self.with_depth = data_cfg.get("with_depth", True)
-
-        self.height = data_cfg["height"]
-        self.width = data_cfg["width"]
-        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
-        self.use_cache = data_cfg.get("use_cache", True)
-        self.num_to_load = data_cfg["num_to_load"]  # -1
-        self.filter_invalid = data_cfg["filter_invalid"]
-        ##################################################
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.hb_bdp.id2obj.items() if obj_name in ref.hb_bdp.objects]
-        self.lm_cat_ids = [cat_id for cat_id, obj_name in lm13_id2obj.items() if obj_name in ref.hb_bdp.objects]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.lm_map_id = {k: v for k, v in zip(self.cat_ids, self.lm_cat_ids)}  # from hb label to lm label
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-        dataset_dicts = []
-        im_id_global = 0
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        # NOTE: converted from gt_v1, obj_id --> obj_id+1
-        gt_path = osp.join(self.dataset_root, "sequence/gt_v2.json")
-        gt_dict = mmcv.load(gt_path)
-
-        # determine which images to load by self.ann_files
-        sel_im_ids = []
-        for ann_file in self.ann_files:
-            with open(ann_file, "r") as f:
-                for line in f:
-                    line = line.strip("\r\n")
-                    cur_im_id = int(line)
-                    if cur_im_id not in sel_im_ids:
-                        sel_im_ids.append(cur_im_id)
-
-        for str_im_id, annos in tqdm(gt_dict.items()):  # str im ids
-            int_im_id = int(str_im_id)
-            if int_im_id not in sel_im_ids:
-                continue
-            rgb_path = osp.join(self.rgb_root, "color_{:06d}.png".format(int_im_id))
-            depth_path = osp.join(self.depth_root, "{:06d}.png".format(int_im_id))
-
-            scene_id = 2  # dummy (because in the whole test set, its scene id is 2)
-            record = {
-                "dataset_name": self.name,
-                "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                "depth_factor": 1 / self.scale_to_meter,
-                "height": self.height,
-                "width": self.width,
-                "image_id": im_id_global,
-                "scene_im_id": "{}/{}".format(scene_id, int_im_id),  # for evaluation
-                "cam": self.cam,
-                "img_type": "real",
-            }
-            im_id_global += 1
-
-            inst_annos = []
-            for anno_i, anno in enumerate(annos):
-                obj_id = anno["obj_id"]
-                cls_name = ref.hb_bdp.id2obj[obj_id]
-                if cls_name not in self.objs:
-                    continue
-                if cls_name not in ref.hb_bdp.objects:  # only support 3 objects
-                    continue
-
-                cur_label = self.cat2label[obj_id]
-                lm_cur_label = self.lm_map_id[obj_id] - 1  # 0-based label
-
-                R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
-                t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
-                pose = np.hstack([R, t.reshape(3, 1)])
-                if self.cam_type == "hb":
-                    bbox = anno["obj_bb"]
-                    bbox_mode = BoxMode.XYWH_ABS
-                elif self.cam_type == "linemod":
-                    # get bbox from projected points
-                    bbox = misc.compute_2d_bbox_xyxy_from_pose_v2(
-                        self.models[cur_label]["pts"].astype("float32"),
-                        pose.astype("float32"),
-                        self.cam,
-                        width=self.width,
-                        height=self.height,
-                        clip=True,
-                    )
-                    bbox_mode = BoxMode.XYXY_ABS
-                    x1, y1, x2, y2 = bbox
-                    w = x2 - x1
-                    h = y2 - y1
-                else:
-                    raise ValueError("Wrong cam type: {}".format(self.cam_type))
-
-                if self.filter_invalid:
-                    if h <= 1 or w <= 1:
-                        self.num_instances_without_valid_box += 1
-                        continue
-
-                mask_visib_file = osp.join(
-                    self.mask_visib_root,
-                    "{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                )
-                assert osp.exists(mask_visib_file), mask_visib_file
-                # load mask visib  TODO: load both mask_visib and mask_full
-                mask_single = mmcv.imread(mask_visib_file, "unchanged")
-                area = mask_single.sum()
-                if area < 3:  # filter out too small or nearly invisible instances
-                    self.num_instances_without_valid_segmentation += 1
-                    continue
-                mask_rle = binary_mask_to_rle(mask_single, compressed=True)
-
-                quat = mat2quat(R).astype("float32")
-
-                proj = (record["cam"] @ t.T).T
-                proj = proj[:2] / proj[2]
-
-                inst = {
-                    "category_id": lm_cur_label,  # 0-based label
-                    "bbox": bbox,
-                    "bbox_mode": bbox_mode,
-                    "pose": pose,
-                    "quat": quat,
-                    "trans": t,
-                    "centroid_2d": proj,  # absolute (cx, cy)
-                    "segmentation": mask_rle,
-                }
-
-                # NOTE: currently no xyz
-                # if "test" not in self.name:
-                #     xyz_path = osp.join(xyz_root, f"{int_im_id:06d}_{anno_i:06d}.pkl")
-                #     assert osp.exists(xyz_path), xyz_path
-                #     inst["xyz_path"] = xyz_path
-
-                model_info = self.models_info[str(obj_id)]
-                inst["model_info"] = model_info
-                for key in ["bbox3d_and_center"]:
-                    inst[key] = self.models[lm_cur_label][key]
-
-                inst_annos.append(inst)
-            if len(inst_annos) == 0 and self.filter_invalid:  # filter im without anno
-                continue
-            record["annotations"] = inst_annos
-            dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        if self.num_to_load > 0:
-            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-            dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info(
-            "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
-        )
-
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(PROJ_ROOT, ".cache", "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # logger.info("load cached object models from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            if obj_name not in ref.hb_bdp.objects:
-                models.append(None)
-                continue
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    "obj_{:06d}.ply".format(ref.hb_bdp.obj2id[obj_name]),
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_hb_bdp_metadata(obj_names, ref_key):
-    """task specific metadata."""
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        if obj_name not in data_ref.objects:
-            sym_info = None
-            continue
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-lm13_id2obj = {
-    1: "ape",
-    2: "benchvise",
-    3: "camera",
-    4: "can",
-    5: "cat",
-    6: "driller",
-    7: "duck",
-    8: "eggbox",
-    9: "glue",
-    10: "holepuncher",
-    11: "iron",
-    12: "lamp",
-    13: "phone",
-}  # no bowl, cup
-
-SPLITS_HB_BenchviseDrillerPhone = dict(
-    # TODO: maybe add scene name
-    hb_benchvise_driller_phone_all_lmK=dict(
-        name="hb_benchvise_driller_phone_all_lmK",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/all.txt")],
-        objs=["benchvise", "driller", "phone"],
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="linemod",
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-    hb_benchvise_driller_phone_all=dict(
-        name="hb_benchvise_driller_phone_all",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/all.txt")],
-        objs=["benchvise", "driller", "phone"],
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="hb",  # NOTE: hb K
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-    hb_benchvise_driller_phone_test_lmK=dict(
-        name="hb_benchvise_driller_phone_test_lmK",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/test.txt")],
-        objs=["benchvise", "driller", "phone"],
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="linemod",
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-    hb2lm_benchvise_driller_phone_test_lmK=dict(
-        name="hb2lm_benchvise_driller_phone_test_lmK",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/test.txt")],
-        objs=[v for v in lm13_id2obj.values()],  # pretend to have 13 classed of objs
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="linemod",
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-    hb_benchvise_driller_phone_test=dict(
-        name="hb_benchvise_driller_phone_test",
-        dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-        models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-        ann_files=[osp.join(DATASETS_ROOT, "hb_bench_driller_phone/image_set/test.txt")],
-        objs=["benchvise", "driller", "phone"],
-        use_cache=True,
-        num_to_load=-1,
-        cam_type="hb",
-        scale_to_meter=0.001,
-        filter_invalid=False,
-        height=480,
-        width=640,
-        ref_key="hb_bdp",
-    ),
-)
-
-
-# add varying percent splits
-VARY_PERCENT_SPLITS = [
-    "test100",
-    "train090",
-    "train180",
-    "train270",
-    "train360",
-    "train450",
-    "train540",
-    "train630",
-    "train720",
-    "train810",
-    "train900",
-]
-
-# all objects
-for _split in VARY_PERCENT_SPLITS:
-    for cam_type in ["linemod", "hb"]:
-        K_str = "_lmK" if cam_type == "linemod" else ""
-        name = "hb_benchvise_driller_phone_{}{}".format(_split, K_str)
-        if name not in SPLITS_HB_BenchviseDrillerPhone:
-            SPLITS_HB_BenchviseDrillerPhone[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-                models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        f"hb_bench_driller_phone/image_set/{_split}.txt",
-                    )
-                ],
-                objs=["benchvise", "driller", "phone"],
-                use_cache=True,
-                num_to_load=-1,
-                cam_type=cam_type,
-                scale_to_meter=0.001,
-                filter_invalid=False,
-                height=480,
-                width=640,
-                ref_key="hb_bdp",
-            )
-
-# single obj splits
-for obj in ref.hb_bdp.objects:
-    for split in ["test", "train", "all"] + VARY_PERCENT_SPLITS:
-        for cam_type in ["linemod", "hb"]:
-            K_str = "_lmK" if cam_type == "linemod" else ""
-            name = "hb_bdp_{}_{}{}".format(obj, split, K_str)
-            if name not in SPLITS_HB_BenchviseDrillerPhone:
-                SPLITS_HB_BenchviseDrillerPhone[name] = dict(
-                    name=name,
-                    dataset_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone"),
-                    models_root=osp.join(DATASETS_ROOT, "hb_bench_driller_phone/models_lm/"),
-                    ann_files=[
-                        osp.join(
-                            DATASETS_ROOT,
-                            f"hb_bench_driller_phone/image_set/{split}.txt",
-                        )
-                    ],
-                    objs=[obj],
-                    use_cache=True,
-                    num_to_load=-1,
-                    cam_type=cam_type,
-                    scale_to_meter=0.001,
-                    filter_invalid=False,
-                    height=480,
-                    width=640,
-                    ref_key="hb_bdp",
-                )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_HB_BenchviseDrillerPhone:
-        used_cfg = SPLITS_HB_BenchviseDrillerPhone[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, HB_BenchDrillerPhone(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="coco_bop",  # NOTE: should not be bop
-        **get_hb_bdp_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_HB_BenchviseDrillerPhone.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    # python -m core.datasets.lm_dataset_d2 lmo_syn_vispy_train
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        imH, imW = img.shape[:2]
-        annos = d["annotations"]
-        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
-        bboxes = [anno["bbox"] for anno in annos]
-        bbox_modes = [anno["bbox_mode"] for anno in annos]
-        bboxes_xyxy = np.array(
-            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
-        )
-        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
-        quats = [anno["quat"] for anno in annos]
-        transes = [anno["trans"] for anno in annos]
-        Rs = [quat2mat(quat) for quat in quats]
-        # 0-based label
-        cat_ids = [anno["category_id"] for anno in annos]
-        K = d["cam"]
-        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
-        # # TODO: visualize pose and keypoints
-        labels = [objs[cat_id] for cat_id in cat_ids]
-        for _i in range(len(annos)):
-            img_vis = vis_image_mask_bbox_cv2(
-                img,
-                masks[_i : _i + 1],
-                bboxes=bboxes_xyxy[_i : _i + 1],
-                labels=labels[_i : _i + 1],
-            )
-            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
-
-            grid_show(
-                [
-                    img[:, :, [2, 1, 0]],
-                    img_vis[:, :, [2, 1, 0]],
-                    img_vis_kpts2d[:, :, [2, 1, 0]],
-                    depth,
-                ],
-                ["img", "vis_img", "img_vis_kpts2d", "depth"],
-                row=2,
-                col=2,
-            )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m this_module dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import (
-        vis_image_mask_bbox_cv2,
-        vis_image_bboxes_cv2,
-    )
-    from lib.utils.mask_utils import cocosegm2mask
-    from lib.utils.bbox_utils import xywh_to_xyxy
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-    test_vis()
diff --git a/det/yolox/data/datasets/lm_dataset_d2.py b/det/yolox/data/datasets/lm_dataset_d2.py
new file mode 100644
index 0000000..269c1e3
--- /dev/null
+++ b/det/yolox/data/datasets/lm_dataset_d2.py
@@ -0,0 +1,701 @@
+import hashlib
+import logging
+import os
+import os.path as osp
+import sys
+import time
+from collections import OrderedDict
+import mmcv
+import numpy as np
+from tqdm import tqdm
+from transforms3d.quaternions import mat2quat, quat2mat
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+
+cur_dir = osp.dirname(osp.abspath(__file__))
+PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
+sys.path.insert(0, PROJ_ROOT)
+
+import ref
+
+from lib.pysixd import inout, misc
+from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
+from lib.utils.utils import dprint, lazy_property
+
+
+logger = logging.getLogger(__name__)
+DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
+
+
+class LM_Dataset(object):
+    """lm splits."""
+
+    def __init__(self, data_cfg):
+        """
+        Set with_depth and with_masks default to True,
+        and decide whether to load them into dataloader/network later
+        with_masks:
+        """
+        self.name = data_cfg["name"]
+        self.data_cfg = data_cfg
+
+        self.objs = data_cfg["objs"]  # selected objects
+
+        self.ann_files = data_cfg["ann_files"]  # idx files with image ids
+        self.image_prefixes = data_cfg["image_prefixes"]
+
+        self.dataset_root = data_cfg["dataset_root"]  # BOP_DATASETS/lm/
+        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
+        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
+
+        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
+        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
+        self.depth_factor = data_cfg["depth_factor"]  # 1000.0
+
+        self.cam_type = data_cfg["cam_type"]
+        self.cam = data_cfg["cam"]  #
+        self.height = data_cfg["height"]  # 480
+        self.width = data_cfg["width"]  # 640
+
+        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
+        self.use_cache = data_cfg.get("use_cache", True)
+        self.num_to_load = data_cfg["num_to_load"]  # -1
+        self.filter_invalid = data_cfg["filter_invalid"]
+        self.filter_scene = data_cfg.get("filter_scene", False)
+        ##################################################
+        if self.cam is None:
+            assert self.cam_type in ["local", "dataset"]
+            if self.cam_type == "dataset":
+                self.cam = np.array(
+                    [
+                        [572.4114, 0, 325.2611],
+                        [0, 573.57043, 242.04899],
+                        [0, 0, 1],
+                    ]
+                )
+            elif self.cam_type == "local":
+                # self.cam = np.array([[539.8100, 0, 318.2700], [0, 539.8300, 239.5600], [0, 0, 1]])
+                # yapf: disable
+                self.cam = np.array(
+                    [[518.81993115, 0.,           320.50653699],
+                     [0.,           518.86581081, 243.5604188 ],
+                     [0.,           0.,           1.          ]])
+                # yapf: enable
+                # RMS: 0.14046169348724977
+                # camera matrix:
+                # [[518.81993115   0.         320.50653699]
+                # [  0.         518.86581081 243.5604188 ]
+                # [  0.           0.           1.        ]]
+                # distortion coefficients:  [ 0.04147325 -0.21469544 -0.00053707 -0.00251986  0.17406399]
+
+        # NOTE: careful! Only the selected objects
+        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
+        # map selected objs to [0, num_objs-1]
+        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
+        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
+        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
+        ##########################################################
+
+    def __call__(self):  # LM_Dataset
+        """Load light-weight instance annotations of all images into a list of
+        dicts in Detectron2 format.
+
+        Do not load heavy data into memory in this file, since we will
+        load the annotations of all images into memory.
+        """
+        # cache the dataset_dicts to avoid loading masks from files
+        hashed_file_name = hashlib.md5(
+            (
+                "".join([str(fn) for fn in self.objs])
+                + "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
+                    self.name,
+                    self.dataset_root,
+                    self.with_masks,
+                    self.with_depth,
+                    self.cam_type,
+                    __name__,
+                )
+            ).encode("utf-8")
+        ).hexdigest()
+        cache_path = osp.join(
+            self.cache_dir,
+            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
+        )
+
+        if osp.exists(cache_path) and self.use_cache:
+            logger.info("load cached dataset dicts from {}".format(cache_path))
+            return mmcv.load(cache_path)
+
+        t_start = time.perf_counter()
+
+        logger.info("loading dataset dicts: {}".format(self.name))
+        self.num_instances_without_valid_segmentation = 0
+        self.num_instances_without_valid_box = 0
+        dataset_dicts = []  #######################################################
+        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
+        unique_im_id = 0
+        for ann_file, scene_root in zip(tqdm(self.ann_files), self.image_prefixes):
+            # linemod each scene is an object
+            with open(ann_file, "r") as f_ann:
+                indices = [line.strip("\r\n") for line in f_ann.readlines()]  # string ids
+            gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
+            gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))  # bbox_obj, bbox_visib
+            for im_id in tqdm(indices):
+                int_im_id = int(im_id)
+                str_im_id = str(int_im_id)
+                rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
+                assert osp.exists(rgb_path), rgb_path
+
+                depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
+
+                scene_id = int(rgb_path.split("/")[-3])
+                scene_im_id = f"{scene_id}/{int_im_id}"
+                if self.filter_scene:
+                    if scene_id not in self.cat_ids:
+                        continue
+                record = {
+                    "dataset_name": self.name,
+                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
+                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
+                    "height": self.height,
+                    "width": self.width,
+                    "image_id": unique_im_id,
+                    "scene_im_id": scene_im_id,  # for evaluation
+                    "cam": self.cam,
+                    "img_type": "real",
+                }
+                unique_im_id += 1
+                insts = []
+                for anno_i, anno in enumerate(gt_dict[str_im_id]):
+                    obj_id = anno["obj_id"]
+                    if obj_id not in self.cat_ids:
+                        continue
+                    cur_label = self.cat2label[obj_id]  # 0-based label
+                    R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
+                    t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
+                    pose = np.hstack([R, t.reshape(3, 1)])
+                    quat = mat2quat(R).astype("float32")
+
+                    proj = (record["cam"] @ t.T).T
+                    proj = proj[:2] / proj[2]
+
+                    bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
+                    bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
+                    x1, y1, w, h = bbox_visib
+                    if self.filter_invalid:
+                        if h <= 1 or w <= 1:
+                            self.num_instances_without_valid_box += 1
+                            continue
+
+                    mask_file = osp.join(
+                        scene_root,
+                        "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
+                    )
+                    mask_visib_file = osp.join(
+                        scene_root,
+                        "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
+                    )
+                    assert osp.exists(mask_file), mask_file
+                    assert osp.exists(mask_visib_file), mask_visib_file
+                    # load mask visib  TODO: load both mask_visib and mask_full
+                    mask_single = mmcv.imread(mask_visib_file, "unchanged")
+                    mask_single = mask_single.astype("bool")
+                    area = mask_single.sum()
+                    if area < 3:  # filter out too small or nearly invisible instances
+                        self.num_instances_without_valid_segmentation += 1
+                        continue
+                    mask_rle = binary_mask_to_rle(mask_single, compressed=True)
+                    inst = {
+                        "category_id": cur_label,  # 0-based label
+                        "bbox": bbox_visib,  # TODO: load both bbox_obj and bbox_visib
+                        "bbox_mode": BoxMode.XYWH_ABS,
+                        "pose": pose,
+                        "quat": quat,
+                        "trans": t,
+                        "centroid_2d": proj,  # absolute (cx, cy)
+                        "segmentation": mask_rle,
+                        "mask_full_file": mask_file,  # TODO: load as mask_full, rle
+                    }
+                    for key in ["bbox3d_and_center"]:
+                        inst[key] = self.models[cur_label][key]
+                    insts.append(inst)
+                if len(insts) == 0:  # filter im without anno
+                    continue
+                record["annotations"] = insts
+                dataset_dicts.append(record)
+
+        if self.num_instances_without_valid_segmentation > 0:
+            logger.warning(
+                "Filtered out {} instances without valid segmentation. "
+                "There might be issues in your dataset generation process.".format(
+                    self.num_instances_without_valid_segmentation
+                )
+            )
+        if self.num_instances_without_valid_box > 0:
+            logger.warning(
+                "Filtered out {} instances without valid box. "
+                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
+            )
+        ##########################################################################
+        if self.num_to_load > 0:
+            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
+            dataset_dicts = dataset_dicts[: self.num_to_load]
+        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
+
+        mmcv.mkdir_or_exist(osp.dirname(cache_path))
+        mmcv.dump(dataset_dicts, cache_path, protocol=4)
+        logger.info("Dumped dataset_dicts to {}".format(cache_path))
+        return dataset_dicts
+
+    @lazy_property
+    def models(self):
+        """Load models into a list."""
+        cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
+        if osp.exists(cache_path) and self.use_cache:
+            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
+            return mmcv.load(cache_path)
+
+        models = []
+        for obj_name in self.objs:
+            model = inout.load_ply(
+                osp.join(
+                    self.models_root,
+                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
+                ),
+                vertex_scale=self.scale_to_meter,
+            )
+            # NOTE: the bbox3d_and_center is not obtained from centered vertices
+            # for BOP models, not a big problem since they had been centered
+            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
+
+            models.append(model)
+        logger.info("cache models to {}".format(cache_path))
+        mmcv.mkdir_or_exist(osp.dirname(cache_path))
+        mmcv.dump(models, cache_path, protocol=4)
+        return models
+
+    def image_aspect_ratio(self):
+        return self.width / self.height  # 4/3
+
+
+########### register datasets ############################################################
+
+
+def get_lm_metadata(obj_names):
+    # task specific metadata
+    meta = {"thing_classes": obj_names}
+    return meta
+
+
+LM_13_OBJECTS = [
+    "ape",
+    "benchvise",
+    "camera",
+    "can",
+    "cat",
+    "driller",
+    "duck",
+    "eggbox",
+    "glue",
+    "holepuncher",
+    "iron",
+    "lamp",
+    "phone",
+]  # no bowl, cup
+LM_OCC_OBJECTS = [
+    "ape",
+    "can",
+    "cat",
+    "driller",
+    "duck",
+    "eggbox",
+    "glue",
+    "holepuncher",
+]
+################################################################################
+
+SPLITS_LM = dict(
+    lm_13_train=dict(
+        name="lm_13_train",
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
+        objs=LM_13_OBJECTS,  # selected objects
+        ann_files=[
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "train"),
+            )
+            for _obj in LM_13_OBJECTS
+        ],
+        image_prefixes=[
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj]),
+            )
+            for _obj in LM_13_OBJECTS
+        ],
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        depth_factor=1000.0,
+        cam_type="dataset",
+        cam=ref.lm_full.camera_matrix,
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_scene=True,
+        filter_invalid=True,
+        ref_key="lm_full",
+    ),
+    lm_13_test=dict(
+        name="lm_13_test",
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
+        objs=LM_13_OBJECTS,
+        ann_files=[
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "test"),
+            )
+            for _obj in LM_13_OBJECTS
+        ],
+        # NOTE: scene root
+        image_prefixes=[
+            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj])
+            for _obj in LM_13_OBJECTS
+        ],
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        depth_factor=1000.0,
+        cam_type="dataset",
+        cam=ref.lm_full.camera_matrix,
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_scene=True,
+        filter_invalid=False,
+        ref_key="lm_full",
+    ),
+    lm_13_all=dict(
+        name="lm_13_all",  # for get all real bboxes
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
+        objs=LM_13_OBJECTS,
+        ann_files=[
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "all"),
+            )
+            for _obj in LM_13_OBJECTS
+        ],
+        # NOTE: scene root
+        image_prefixes=[
+            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj])
+            for _obj in LM_13_OBJECTS
+        ],
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        depth_factor=1000.0,
+        cam_type="dataset",
+        cam=ref.lm_full.camera_matrix,
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_scene=True,
+        filter_invalid=False,
+        ref_key="lm_full",
+    ),
+    lmo_train=dict(
+        name="lmo_train",
+        # use lm real all (8 objects) to train for lmo
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
+        objs=LM_OCC_OBJECTS,  # selected objects
+        ann_files=[
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "all"),
+            )
+            for _obj in LM_OCC_OBJECTS
+        ],
+        image_prefixes=[
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lm/test/{:06d}".format(ref.lmo_full.obj2id[_obj]),
+            )
+            for _obj in LM_OCC_OBJECTS
+        ],
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        depth_factor=1000.0,
+        cam_type="dataset",
+        cam=ref.lmo_full.camera_matrix,
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_scene=True,
+        filter_invalid=True,
+        ref_key="lmo_full",
+    ),
+    lmo_NoBopTest_train=dict(
+        name="lmo_NoBopTest_train",
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
+        objs=LM_OCC_OBJECTS,
+        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt")],
+        # NOTE: scene root
+        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        depth_factor=1000.0,
+        cam_type="dataset",
+        cam=ref.lmo_full.camera_matrix,
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_scene=False,
+        filter_invalid=True,
+        ref_key="lmo_full",
+    ),
+    lmo_test=dict(
+        name="lmo_test",
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
+        objs=LM_OCC_OBJECTS,
+        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_test.txt")],
+        # NOTE: scene root
+        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        depth_factor=1000.0,
+        cam_type="dataset",
+        cam=ref.lmo_full.camera_matrix,
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_scene=False,
+        filter_invalid=False,
+        ref_key="lmo_full",
+    ),
+    lmo_bop_test=dict(
+        name="lmo_bop_test",
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
+        objs=LM_OCC_OBJECTS,
+        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt")],
+        # NOTE: scene root
+        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        depth_factor=1000.0,
+        cam_type="dataset",
+        cam=ref.lmo_full.camera_matrix,
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_scene=False,
+        filter_invalid=False,
+        ref_key="lmo_full",
+    ),
+)
+
+# single obj splits for lm real
+for obj in ref.lm_full.objects:
+    for split in ["train", "test", "all"]:
+        name = "lm_real_{}_{}".format(obj, split)
+        ann_files = [
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(obj, split),
+            )
+        ]
+        if split in ["train", "all"]:  # all is used to train lmo
+            filter_invalid = True
+        elif split in ["test"]:
+            filter_invalid = False
+        else:
+            raise ValueError("{}".format(split))
+        if name not in SPLITS_LM:
+            SPLITS_LM[name] = dict(
+                name=name,
+                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
+                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
+                objs=[obj],  # only this obj
+                ann_files=ann_files,
+                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])],
+                scale_to_meter=0.001,
+                with_masks=True,  # (load masks but may not use it)
+                with_depth=True,  # (load depth path here, but may not use it)
+                depth_factor=1000.0,
+                cam_type="dataset",
+                cam=ref.lm_full.camera_matrix,
+                height=480,
+                width=640,
+                cache_dir=osp.join(PROJ_ROOT, ".cache"),
+                use_cache=True,
+                num_to_load=-1,
+                filter_invalid=False,
+                filter_scene=True,
+                ref_key="lm_full",
+            )
+
+# single obj splits for lmo_NoBopTest_train
+for obj in ref.lmo_full.objects:
+    for split in ["train"]:
+        name = "lmo_NoBopTest_{}_{}".format(obj, split)
+        if split in ["train"]:
+            filter_invalid = True
+        else:
+            raise ValueError("{}".format(split))
+        if name not in SPLITS_LM:
+            SPLITS_LM[name] = dict(
+                name=name,
+                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
+                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
+                objs=[obj],
+                ann_files=[
+                    osp.join(
+                        DATASETS_ROOT,
+                        "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt",
+                    )
+                ],
+                # NOTE: scene root
+                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
+                scale_to_meter=0.001,
+                with_masks=True,  # (load masks but may not use it)
+                with_depth=True,  # (load depth path here, but may not use it)
+                depth_factor=1000.0,
+                cam_type="dataset",
+                cam=ref.lmo_full.camera_matrix,
+                height=480,
+                width=640,
+                cache_dir=osp.join(PROJ_ROOT, ".cache"),
+                use_cache=True,
+                num_to_load=-1,
+                filter_scene=False,
+                filter_invalid=filter_invalid,
+                ref_key="lmo_full",
+            )
+
+
+def register_with_name_cfg(name, data_cfg=None):
+    """Assume pre-defined datasets live in `./datasets`.
+
+    Args:
+        name: datasnet_name,
+        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
+            otherwise requires data_cfg
+            data_cfg can be set in cfg.DATA_CFG.name
+    """
+    dprint("register dataset: {}".format(name))
+    if name in SPLITS_LM:
+        used_cfg = SPLITS_LM[name]
+    else:
+        assert data_cfg is not None, f"dataset name {name} is not registered"
+        used_cfg = data_cfg
+    DatasetCatalog.register(name, LM_Dataset(used_cfg))
+    # something like eval_types
+    MetadataCatalog.get(name).set(
+        id="linemod",  # NOTE: for pvnet to determine module
+        ref_key=used_cfg["ref_key"],
+        objs=used_cfg["objs"],
+        eval_error_types=["ad", "rete", "proj"],
+        evaluator_type="coco_bop",
+        **get_lm_metadata(obj_names=used_cfg["objs"]),
+    )
+
+
+def get_available_datasets():
+    return list(SPLITS_LM.keys())
+
+
+#### tests ###############################################
+def test_vis():
+    dset_name = sys.argv[1]
+    assert dset_name in DatasetCatalog.list()
+
+    meta = MetadataCatalog.get(dset_name)
+    dprint("MetadataCatalog: ", meta)
+    objs = meta.objs
+
+    t_start = time.perf_counter()
+    dicts = DatasetCatalog.get(dset_name)
+    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
+
+    dirname = "output/{}-data-vis".format(dset_name)
+    os.makedirs(dirname, exist_ok=True)
+    for d in dicts:
+        img = read_image_mmcv(d["file_name"], format="BGR")
+        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
+
+        imH, imW = img.shape[:2]
+        annos = d["annotations"]
+        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
+        bboxes = [anno["bbox"] for anno in annos]
+        bbox_modes = [anno["bbox_mode"] for anno in annos]
+        bboxes_xyxy = np.array(
+            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
+        )
+        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
+        quats = [anno["quat"] for anno in annos]
+        transes = [anno["trans"] for anno in annos]
+        Rs = [quat2mat(quat) for quat in quats]
+        # 0-based label
+        cat_ids = [anno["category_id"] for anno in annos]
+        K = d["cam"]
+        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
+        labels = [objs[cat_id] for cat_id in cat_ids]
+        img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels)
+        img_vis_kpts2d = img.copy()
+        for anno_i in range(len(annos)):
+            img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i])
+        grid_show(
+            [
+                img[:, :, [2, 1, 0]],
+                img_vis[:, :, [2, 1, 0]],
+                img_vis_kpts2d[:, :, [2, 1, 0]],
+                depth,
+            ],
+            [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"],
+            row=2,
+            col=2,
+        )
+
+
+if __name__ == "__main__":
+    """Test the  dataset loader.
+
+    Usage:
+        python -m det.yolov4.datasets.lm_dataset_d2 dataset_name
+    """
+    from lib.vis_utils.image import grid_show
+    from lib.utils.setup_logger import setup_my_logger
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    from core.utils.data_utils import read_image_mmcv
+    from lib.vis_utils.image import vis_image_mask_bbox_cv2
+
+    print("sys.argv:", sys.argv)
+    logger = setup_my_logger(name="core")
+    register_with_name_cfg(sys.argv[1])
+    print("dataset catalog: ", DatasetCatalog.list())
+    test_vis()
-- 
GitLab