diff --git a/core/gdrn_modeling/datasets/dataset_factory.py b/core/gdrn_modeling/datasets/dataset_factory.py
index 3d9f58ba47bcf20f6f9e914df6696c1193c8b722..c60929e6314797547b2e7b707de41b21e501c6c1 100644
--- a/core/gdrn_modeling/datasets/dataset_factory.py
+++ b/core/gdrn_modeling/datasets/dataset_factory.py
@@ -8,8 +8,8 @@ import detectron2.utils.comm as comm
 import ref
 from detectron2.data import DatasetCatalog, MetadataCatalog
 from core.gdrn_modeling.datasets import (
-    lm_dataset_d2,
     lm_pbr,
+    lmo_bop_test,
     ycbv_pbr,
     ycbv_d2,
     ycbv_bop_test,
@@ -39,8 +39,8 @@ __all__ = [
     "get_available_datasets",
 ]
 _DSET_MOD_NAMES = [
-    "lm_dataset_d2",
     "lm_pbr",
+    "lmo_bop_test",
     "ycbv_pbr",
     "ycbv_d2",
     "ycbv_bop_test",
diff --git a/core/gdrn_modeling/datasets/lm_dataset_d2.py b/core/gdrn_modeling/datasets/lm_dataset_d2.py
deleted file mode 100644
index d4a473367aaa55a0fd7c1c9b860753aef81e276f..0000000000000000000000000000000000000000
--- a/core/gdrn_modeling/datasets/lm_dataset_d2.py
+++ /dev/null
@@ -1,887 +0,0 @@
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
-sys.path.insert(0, PROJ_ROOT)
-
-import ref
-
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_Dataset(object):
-    """lm splits."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.ann_files = data_cfg["ann_files"]  # idx files with image ids
-        self.image_prefixes = data_cfg["image_prefixes"]
-        self.xyz_prefixes = data_cfg["xyz_prefixes"]
-
-        self.dataset_root = data_cfg["dataset_root"]  # BOP_DATASETS/lm/
-        assert osp.exists(self.dataset_root), self.dataset_root
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
-        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
-
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
-        self.use_cache = data_cfg.get("use_cache", True)
-        self.num_to_load = data_cfg["num_to_load"]  # -1
-        self.filter_invalid = data_cfg["filter_invalid"]
-        self.filter_scene = data_cfg.get("filter_scene", False)
-        self.debug_im_id = data_cfg.get("debug_im_id", None)
-        ##################################################
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_Dataset
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  # ######################################################
-        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
-        assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}"
-        unique_im_id = 0
-        for ann_file, scene_root, xyz_root in zip(tqdm(self.ann_files), self.image_prefixes, self.xyz_prefixes):
-            # linemod each scene is an object
-            with open(ann_file, "r") as f_ann:
-                indices = [line.strip("\r\n") for line in f_ann.readlines()]  # string ids
-            gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
-            gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))  # bbox_obj, bbox_visib
-            cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json"))
-            for im_id in tqdm(indices):
-                int_im_id = int(im_id)
-                str_im_id = str(int_im_id)
-                rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
-
-                scene_id = int(rgb_path.split("/")[-3])
-                scene_im_id = f"{scene_id}/{int_im_id}"
-
-                if self.debug_im_id is not None:
-                    if self.debug_im_id != scene_im_id:
-                        continue
-
-                K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
-                depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"]
-                if self.filter_scene:
-                    if scene_id not in self.cat_ids:
-                        continue
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": unique_im_id,
-                    "scene_im_id": scene_im_id,  # for evaluation
-                    "cam": K,
-                    "depth_factor": depth_factor,
-                    "img_type": "real",
-                }
-                unique_im_id += 1
-                insts = []
-                for anno_i, anno in enumerate(gt_dict[str_im_id]):
-                    obj_id = anno["obj_id"]
-                    if obj_id not in self.cat_ids:
-                        continue
-                    cur_label = self.cat2label[obj_id]  # 0-based label
-                    R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
-                    t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
-                    pose = np.hstack([R, t.reshape(3, 1)])
-                    quat = mat2quat(R).astype("float32")
-
-                    proj = (record["cam"] @ t.T).T
-                    proj = proj[:2] / proj[2]
-
-                    bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
-                    bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
-                    x1, y1, w, h = bbox_visib
-                    if self.filter_invalid:
-                        if h <= 1 or w <= 1:
-                            self.num_instances_without_valid_box += 1
-                            continue
-
-                    mask_file = osp.join(
-                        scene_root,
-                        "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                    )
-                    mask_visib_file = osp.join(
-                        scene_root,
-                        "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                    )
-                    assert osp.exists(mask_file), mask_file
-                    assert osp.exists(mask_visib_file), mask_visib_file
-                    # load mask visib
-                    mask_single = mmcv.imread(mask_visib_file, "unchanged")
-                    mask_single = mask_single.astype("bool")
-                    area = mask_single.sum()
-                    if area < 3:  # filter out too small or nearly invisible instances
-                        self.num_instances_without_valid_segmentation += 1
-                        continue
-                    mask_rle = binary_mask_to_rle(mask_single, compressed=True)
-                    # load mask full
-                    mask_full = mmcv.imread(mask_file, "unchanged")
-                    mask_full = mask_full.astype("bool")
-                    mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
-
-                    inst = {
-                        "category_id": cur_label,  # 0-based label
-                        "bbox": bbox_visib,
-                        "bbox_obj": bbox_obj,
-                        "bbox_mode": BoxMode.XYWH_ABS,
-                        "pose": pose,
-                        "quat": quat,
-                        "trans": t,
-                        "centroid_2d": proj,  # absolute (cx, cy)
-                        "segmentation": mask_rle,
-                        "mask_full": mask_full_rle,
-                    }
-
-                    if "test" not in self.name.lower():
-                        # if True:
-                        xyz_path = osp.join(xyz_root, f"{int_im_id:06d}_{anno_i:06d}.pkl")
-                        assert osp.exists(xyz_path), xyz_path
-                        inst["xyz_path"] = xyz_path
-
-                    model_info = self.models_info[str(obj_id)]
-                    inst["model_info"] = model_info
-                    # TODO: using full mask and full xyz
-                    for key in ["bbox3d_and_center"]:
-                        inst[key] = self.models[cur_label][key]
-                    insts.append(inst)
-                if len(insts) == 0:  # filter im without anno
-                    continue
-                record["annotations"] = insts
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        if self.num_to_load > 0:
-            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-            dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
-
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-LM_OCC_OBJECTS = [
-    "ape",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-]
-################################################################################
-
-SPLITS_LM = dict(
-    lm_13_train=dict(
-        name="lm_13_train",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "train"),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=True,
-        ref_key="lm_full",
-    ),
-    lm_13_test=dict(
-        name="lm_13_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "test"),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        # NOTE: scene root
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj])
-            for _obj in LM_13_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=False,
-        ref_key="lm_full",
-    ),
-    lmo_train=dict(
-        name="lmo_train",
-        # use lm real all (8 objects) to train for lmo
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_OCC_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "all"),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/{:06d}".format(ref.lmo_full.obj2id[_obj]),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lmo_full.obj2id[_obj]),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=True,
-        ref_key="lmo_full",
-    ),
-    lmo_NoBopTest_train=dict(
-        name="lmo_NoBopTest_train",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt")],
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2),
-            )
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=True,
-        ref_key="lmo_full",
-    ),
-    lmo_test=dict(
-        name="lmo_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_test.txt")],
-        # NOTE: scene root
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[None],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-    lmo_bop_test=dict(
-        name="lmo_bop_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt")],
-        # NOTE: scene root
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[None],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-)
-
-# single obj splits for lm real
-for obj in ref.lm_full.objects:
-    for split in ["train", "test", "all"]:
-        name = "lm_real_{}_{}".format(obj, split)
-        ann_files = [
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(obj, split),
-            )
-        ]
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                objs=[obj],  # only this obj
-                ann_files=ann_files,
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])],
-                xyz_prefixes=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]),
-                    )
-                ],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_invalid=filter_invalid,
-                filter_scene=True,
-                ref_key="lm_full",
-            )
-
-# single obj splits for lmo_NoBopTest_train
-for obj in ref.lmo_full.objects:
-    for split in ["train"]:
-        name = "lmo_NoBopTest_{}_{}".format(obj, split)
-        if split in ["train"]:
-            filter_invalid = True
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2),
-                    )
-                ],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=filter_invalid,
-                ref_key="lmo_full",
-            )
-
-# single obj splits for lmo_test
-for obj in ref.lmo_full.objects:
-    for split in ["test"]:
-        name = "lmo_{}_{}".format(obj, split)
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[None],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=False,
-                ref_key="lmo_full",
-            )
-
-# single obj splits for lmo_bop_test
-for obj in ref.lmo_full.objects:
-    for split in ["test"]:
-        name = "lmo_{}_bop_{}".format(obj, split)
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[None],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=False,
-                ref_key="lmo_full",
-            )
-
-# ================ add single image dataset for debug =======================================
-debug_im_ids = {
-    "train": {obj: [] for obj in ref.lm_full.objects},
-    "test": {obj: [] for obj in ref.lm_full.objects},
-}
-for obj in ref.lm_full.objects:
-    for split in ["train", "test"]:
-        cur_ann_file = osp.join(DATASETS_ROOT, f"BOP_DATASETS/lm/image_set/{obj}_{split}.txt")
-        ann_files = [cur_ann_file]
-
-        im_ids = []
-        with open(cur_ann_file, "r") as f:
-            for line in f:
-                # scene_id(obj_id)/im_id
-                im_ids.append("{}/{}".format(ref.lm_full.obj2id[obj], int(line.strip("\r\n"))))
-
-        debug_im_ids[split][obj] = im_ids
-        for debug_im_id in debug_im_ids[split][obj]:
-            name = "lm_single_{}{}_{}".format(obj, debug_im_id.split("/")[1], split)
-            if name not in SPLITS_LM:
-                SPLITS_LM[name] = dict(
-                    name=name,
-                    dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-                    models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                    objs=[obj],  # only this obj
-                    ann_files=ann_files,
-                    image_prefixes=[
-                        osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])
-                    ],
-                    xyz_prefixes=[
-                        osp.join(
-                            DATASETS_ROOT,
-                            "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]),
-                        )
-                    ],
-                    scale_to_meter=0.001,
-                    with_masks=True,  # (load masks but may not use it)
-                    with_depth=True,  # (load depth path here, but may not use it)
-                    height=480,
-                    width=640,
-                    cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                    use_cache=True,
-                    num_to_load=-1,
-                    filter_invalid=False,
-                    filter_scene=True,
-                    ref_key="lm_full",
-                    debug_im_id=debug_im_id,  # NOTE: debug im id
-                )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM:
-        used_cfg = SPLITS_LM[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        id="linemod",  # NOTE: for pvnet to determine module
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        imH, imW = img.shape[:2]
-        annos = d["annotations"]
-        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
-        bboxes = [anno["bbox"] for anno in annos]
-        bbox_modes = [anno["bbox_mode"] for anno in annos]
-        bboxes_xyxy = np.array(
-            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
-        )
-        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
-        quats = [anno["quat"] for anno in annos]
-        transes = [anno["trans"] for anno in annos]
-        Rs = [quat2mat(quat) for quat in quats]
-        # 0-based label
-        cat_ids = [anno["category_id"] for anno in annos]
-        K = d["cam"]
-        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
-        # # TODO: visualize pose and keypoints
-        labels = [objs[cat_id] for cat_id in cat_ids]
-        for _i in range(len(annos)):
-            img_vis = vis_image_mask_bbox_cv2(
-                img,
-                masks[_i : _i + 1],
-                bboxes=bboxes_xyxy[_i : _i + 1],
-                labels=labels[_i : _i + 1],
-            )
-            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
-            if "test" not in dset_name.lower():
-                xyz_path = annos[_i]["xyz_path"]
-                xyz_info = mmcv.load(xyz_path)
-                x1, y1, x2, y2 = xyz_info["xyxy"]
-                xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
-                xyz = np.zeros((imH, imW, 3), dtype=np.float32)
-                xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
-                xyz_show = get_emb_show(xyz)
-                xyz_crop_show = get_emb_show(xyz_crop)
-                img_xyz = img.copy() / 255.0
-                mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
-                fg_idx = np.where(mask_xyz != 0)
-                img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
-                img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
-                img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
-                # diff mask
-                diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]
-
-                grid_show(
-                    [
-                        img[:, :, [2, 1, 0]],
-                        img_vis[:, :, [2, 1, 0]],
-                        img_vis_kpts2d[:, :, [2, 1, 0]],
-                        depth,
-                        # xyz_show,
-                        diff_mask_xyz,
-                        xyz_crop_show,
-                        img_xyz[:, :, [2, 1, 0]],
-                        img_xyz_crop[:, :, [2, 1, 0]],
-                        img_vis_crop,
-                    ],
-                    [
-                        "img",
-                        "vis_img",
-                        "img_vis_kpts2d",
-                        "depth",
-                        "diff_mask_xyz",
-                        "xyz_crop_show",
-                        "img_xyz",
-                        "img_xyz_crop",
-                        "img_vis_crop",
-                    ],
-                    row=3,
-                    col=3,
-                )
-            else:
-                grid_show(
-                    [
-                        img[:, :, [2, 1, 0]],
-                        img_vis[:, :, [2, 1, 0]],
-                        img_vis_kpts2d[:, :, [2, 1, 0]],
-                        depth,
-                    ],
-                    ["img", "vis_img", "img_vis_kpts2d", "depth"],
-                    row=2,
-                    col=2,
-                )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    python this_file.py dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.utils import get_emb_show
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-
-    test_vis()
diff --git a/core/gdrn_modeling/datasets/lmo_bop_test.py b/core/gdrn_modeling/datasets/lmo_bop_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e64da4ff1caee086fac9e9e6d69e5efeae49c83
--- /dev/null
+++ b/core/gdrn_modeling/datasets/lmo_bop_test.py
@@ -0,0 +1,521 @@
+import hashlib
+import logging
+import os
+import os.path as osp
+import sys
+import time
+from collections import OrderedDict
+import mmcv
+import numpy as np
+from tqdm import tqdm
+from transforms3d.quaternions import mat2quat, quat2mat
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+
+cur_dir = osp.dirname(osp.abspath(__file__))
+PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
+sys.path.insert(0, PROJ_ROOT)
+
+import ref
+
+from lib.pysixd import inout, misc
+from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
+from lib.utils.utils import dprint, iprint, lazy_property
+
+
+logger = logging.getLogger(__name__)
+DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
+
+
+class LMO_BOP_TEST_Dataset(object):
+    """lmo bop test splits."""
+
+    def __init__(self, data_cfg):
+        """
+        Set with_depth and with_masks default to True,
+        and decide whether to load them into dataloader/network later
+        with_masks:
+        """
+        self.name = data_cfg["name"]
+        self.data_cfg = data_cfg
+
+        self.objs = data_cfg["objs"]  # selected objects
+
+        self.dataset_root = data_cfg.get("dataset_root", osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test"))
+        assert osp.exists(self.dataset_root), self.dataset_root
+
+        self.ann_file = data_cfg["ann_file"]  # json file with scene_id and im_id items
+
+        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lmo/models
+        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
+
+        self.with_masks = data_cfg["with_masks"]
+        self.with_depth = data_cfg["with_depth"]
+
+        self.height = data_cfg["height"]  # 480
+        self.width = data_cfg["width"]  # 640
+
+        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
+        self.use_cache = data_cfg.get("use_cache", True)
+        self.num_to_load = data_cfg["num_to_load"]  # -1
+        self.filter_invalid = data_cfg.get("filter_invalid", True)
+        ##################################################
+
+        # NOTE: careful! Only the selected objects
+        self.cat_ids = [cat_id for cat_id, obj_name in ref.lmo_full.id2obj.items() if obj_name in self.objs]
+        # map selected objs to [0, num_objs-1]
+        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
+        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
+        self.obj2label = OrderedDict((obj, obj_id) for obj, obj_id in enumerate(self.objs))
+        ##########################################################
+
+    def __call__(self):
+        """Load light-weight instance annotations of all images into a list of
+        dicts in Detectron2 format.
+
+        Do not load heavy data into memory in this file, since we will
+        load the annotations of all images into memory.
+        """
+        # cache the dataset_dicts to avoid loading masks from files
+        hashed_file_name = hashlib.md5(
+            (
+                "".join([str(fn) for fn in self.objs])
+                + "dataset_dicts_{}_{}_{}_{}_{}".format(
+                    self.name,
+                    self.dataset_root,
+                    self.with_masks,
+                    self.with_depth,
+                    __name__,
+                )
+            ).encode("utf-8")
+        ).hexdigest()
+        cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name))
+
+        if osp.exists(cache_path) and self.use_cache:
+            logger.info("load cached dataset dicts from {}".format(cache_path))
+            return mmcv.load(cache_path)
+
+        t_start = time.perf_counter()
+
+        logger.info("loading dataset dicts: {}".format(self.name))
+        self.num_instances_without_valid_segmentation = 0
+        self.num_instances_without_valid_box = 0
+        dataset_dicts = []  # ######################################################
+        # it is slow because of loading and converting masks to rle
+        targets = mmcv.load(self.ann_file)
+
+        scene_im_ids = [(item["scene_id"], item["im_id"]) for item in targets]
+        scene_im_ids = sorted(list(set(scene_im_ids)))
+
+        # load infos for each scene
+        gt_dicts = {}
+        gt_info_dicts = {}
+        cam_dicts = {}
+        for scene_id, im_id in scene_im_ids:
+            scene_root = osp.join(self.dataset_root, f"{scene_id:06d}")
+            if scene_id not in gt_dicts:
+                gt_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_gt.json"))
+            if scene_id not in gt_info_dicts:
+                gt_info_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))  # bbox_obj, bbox_visib
+            if scene_id not in cam_dicts:
+                cam_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_camera.json"))
+
+        for scene_id, int_im_id in tqdm(scene_im_ids):
+            str_im_id = str(int_im_id)
+            scene_root = osp.join(self.dataset_root, f"{scene_id:06d}")
+
+            gt_dict = gt_dicts[scene_id]
+            gt_info_dict = gt_info_dicts[scene_id]
+            cam_dict = cam_dicts[scene_id]
+
+            rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
+            assert osp.exists(rgb_path), rgb_path
+
+            depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
+
+            scene_im_id = f"{scene_id}/{int_im_id}"
+
+            K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
+            depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"]  # 10000
+
+            record = {
+                "dataset_name": self.name,
+                "file_name": osp.relpath(rgb_path, PROJ_ROOT),
+                "depth_file": osp.relpath(depth_path, PROJ_ROOT),
+                "height": self.height,
+                "width": self.width,
+                "image_id": int_im_id,
+                "scene_im_id": scene_im_id,  # for evaluation
+                "cam": K,
+                "depth_factor": depth_factor,
+                "img_type": "real",  # NOTE: has background
+            }
+            insts = []
+            for anno_i, anno in enumerate(gt_dict[str_im_id]):
+                obj_id = anno["obj_id"]
+                if obj_id not in self.cat_ids:
+                    continue
+                cur_label = self.cat2label[obj_id]  # 0-based label
+                R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
+                t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
+                pose = np.hstack([R, t.reshape(3, 1)])
+                quat = mat2quat(R).astype("float32")
+
+                proj = (record["cam"] @ t.T).T
+                proj = proj[:2] / proj[2]
+
+                bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
+                bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
+                x1, y1, w, h = bbox_visib
+                if self.filter_invalid:
+                    if h <= 1 or w <= 1:
+                        self.num_instances_without_valid_box += 1
+                        continue
+
+                mask_file = osp.join(
+                    scene_root,
+                    "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
+                )
+                mask_visib_file = osp.join(
+                    scene_root,
+                    "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
+                )
+                assert osp.exists(mask_file), mask_file
+                assert osp.exists(mask_visib_file), mask_visib_file
+                # load mask visib
+                mask_single = mmcv.imread(mask_visib_file, "unchanged")
+                mask_single = mask_single.astype("bool")
+                area = mask_single.sum()
+                if area < 3:  # filter out too small or nearly invisible instances
+                    self.num_instances_without_valid_segmentation += 1
+                mask_rle = binary_mask_to_rle(mask_single, compressed=True)
+
+                # load mask full
+                mask_full = mmcv.imread(mask_file, "unchanged")
+                mask_full = mask_full.astype("bool")
+                mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
+
+                visib_fract = gt_info_dict[str_im_id][anno_i].get("visib_fract", 1.0)
+
+                inst = {
+                    "category_id": cur_label,  # 0-based label
+                    "bbox": bbox_visib,
+                    "bbox_obj": bbox_obj,
+                    "bbox_mode": BoxMode.XYWH_ABS,
+                    "pose": pose,
+                    "quat": quat,
+                    "trans": t,
+                    "centroid_2d": proj,  # absolute (cx, cy)
+                    "segmentation": mask_rle,
+                    "mask_full": mask_full_rle,
+                    "visib_fract": visib_fract,
+                    "xyz_path": None,  #  no need for test
+                }
+
+                model_info = self.models_info[str(obj_id)]
+                inst["model_info"] = model_info
+                for key in ["bbox3d_and_center"]:
+                    inst[key] = self.models[cur_label][key]
+                insts.append(inst)
+            if len(insts) == 0:  # filter im without anno
+                continue
+            record["annotations"] = insts
+            dataset_dicts.append(record)
+
+        if self.num_instances_without_valid_segmentation > 0:
+            logger.warning(
+                "There are {} instances without valid segmentation. "
+                "There might be issues in your dataset generation process.".format(
+                    self.num_instances_without_valid_segmentation
+                )
+            )
+        if self.num_instances_without_valid_box > 0:
+            logger.warning(
+                "There are {} instances without valid box. "
+                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
+            )
+        ##########################################################################
+        if self.num_to_load > 0:
+            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
+            dataset_dicts = dataset_dicts[: self.num_to_load]
+        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
+
+        mmcv.mkdir_or_exist(osp.dirname(cache_path))
+        mmcv.dump(dataset_dicts, cache_path, protocol=4)
+        logger.info("Dumped dataset_dicts to {}".format(cache_path))
+        return dataset_dicts
+
+    @lazy_property
+    def models_info(self):
+        models_info_path = osp.join(self.models_root, "models_info.json")
+        assert osp.exists(models_info_path), models_info_path
+        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
+        return models_info
+
+    @lazy_property
+    def models(self):
+        """Load models into a list."""
+        cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
+        if osp.exists(cache_path) and self.use_cache:
+            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
+            return mmcv.load(cache_path)
+
+        models = []
+        for obj_name in self.objs:
+            model = inout.load_ply(
+                osp.join(
+                    self.models_root,
+                    f"obj_{ref.lmo_full.obj2id[obj_name]:06d}.ply",
+                ),
+                vertex_scale=self.scale_to_meter,
+            )
+            # NOTE: the bbox3d_and_center is not obtained from centered vertices
+            # for BOP models, not a big problem since they had been centered
+            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
+
+            models.append(model)
+        logger.info("cache models to {}".format(cache_path))
+        mmcv.mkdir_or_exist(osp.dirname(cache_path))
+        mmcv.dump(models, cache_path, protocol=4)
+        return models
+
+    def __len__(self):
+        return self.num_to_load
+
+    def image_aspect_ratio(self):
+        return self.width / self.height  # 4/3
+
+
+########### register datasets ############################################################
+
+
+def get_lmo_metadata(obj_names, ref_key):
+    """task specific metadata."""
+
+    data_ref = ref.__dict__[ref_key]
+
+    cur_sym_infos = {}  # label based key
+    loaded_models_info = data_ref.get_models_info()
+
+    for i, obj_name in enumerate(obj_names):
+        obj_id = data_ref.obj2id[obj_name]
+        model_info = loaded_models_info[str(obj_id)]
+        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
+            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
+            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
+        else:
+            sym_info = None
+        cur_sym_infos[i] = sym_info
+
+    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
+    return meta
+
+
+##########################################################################
+
+SPLITS_LMO = dict(
+    lmo_bop_test=dict(
+        name="lmo_bop_test",
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
+        objs=ref.lmo_full.objects,
+        ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test_targets_bop19.json"),
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_invalid=False,
+        ref_key="lmo_full",
+    ),
+)
+
+# single obj splits for lmo bop test
+for obj in ref.lmo_full.objects:
+    for split in [
+        "bop_test",
+    ]:
+        name = "lmo_{}_{}".format(obj, split)
+        ann_files = [
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lmo/image_set/{}_{}.txt".format(obj, split),
+            )
+        ]
+        if name not in SPLITS_LMO:
+            SPLITS_LMO[name] = dict(
+                name=name,
+                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
+                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
+                objs=[obj],  # only this obj
+                scale_to_meter=0.001,
+                ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test_targets_bop19.json"),
+                with_masks=True,  # (load masks but may not use it)
+                with_depth=True,  # (load depth path here, but may not use it)
+                height=480,
+                width=640,
+                cache_dir=osp.join(PROJ_ROOT, ".cache"),
+                use_cache=True,
+                num_to_load=-1,
+                filter_invalid=False,
+                ref_key="lmo_full",
+            )
+
+
+def register_with_name_cfg(name, data_cfg=None):
+    """Assume pre-defined datasets live in `./datasets`.
+
+    Args:
+        name: datasnet_name,
+        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
+            otherwise requires data_cfg
+            data_cfg can be set in cfg.DATA_CFG.name
+    """
+    dprint("register dataset: {}".format(name))
+    if name in SPLITS_LMO:
+        used_cfg = SPLITS_LMO[name]
+    else:
+        assert data_cfg is not None, f"dataset name {name} is not registered"
+        used_cfg = data_cfg
+    DatasetCatalog.register(name, LMO_BOP_TEST_Dataset(used_cfg))
+    # something like eval_types
+    MetadataCatalog.get(name).set(
+        id="lmo",  # NOTE: for pvnet to determine module
+        ref_key=used_cfg["ref_key"],
+        objs=used_cfg["objs"],
+        eval_error_types=["ad", "rete", "proj"],
+        evaluator_type="bop",
+        **get_lmo_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
+    )
+
+
+def get_available_datasets():
+    return list(SPLITS_LMO.keys())
+
+
+#### tests ###############################################
+def test_vis():
+    dset_name = sys.argv[1]
+    assert dset_name in DatasetCatalog.list()
+
+    meta = MetadataCatalog.get(dset_name)
+    dprint("MetadataCatalog: ", meta)
+    objs = meta.objs
+
+    t_start = time.perf_counter()
+    dicts = DatasetCatalog.get(dset_name)
+    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
+
+    dirname = "output/{}-data-vis".format(dset_name)
+    os.makedirs(dirname, exist_ok=True)
+    for d in dicts:
+        img = read_image_mmcv(d["file_name"], format="BGR")
+        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
+
+        imH, imW = img.shape[:2]
+        annos = d["annotations"]
+        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
+        bboxes = [anno["bbox"] for anno in annos]
+        bbox_modes = [anno["bbox_mode"] for anno in annos]
+        bboxes_xyxy = np.array(
+            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
+        )
+        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
+        quats = [anno["quat"] for anno in annos]
+        transes = [anno["trans"] for anno in annos]
+        Rs = [quat2mat(quat) for quat in quats]
+        # 0-based label
+        cat_ids = [anno["category_id"] for anno in annos]
+        K = d["cam"]
+        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
+        # # TODO: visualize pose and keypoints
+        labels = [objs[cat_id] for cat_id in cat_ids]
+        for _i in range(len(annos)):
+            img_vis = vis_image_mask_bbox_cv2(
+                img,
+                masks[_i : _i + 1],
+                bboxes=bboxes_xyxy[_i : _i + 1],
+                labels=labels[_i : _i + 1],
+            )
+            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
+            if "test" not in dset_name.lower():
+                xyz_path = annos[_i]["xyz_path"]
+                xyz_info = mmcv.load(xyz_path)
+                x1, y1, x2, y2 = xyz_info["xyxy"]
+                xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
+                xyz = np.zeros((imH, imW, 3), dtype=np.float32)
+                xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
+                xyz_show = get_emb_show(xyz)
+                xyz_crop_show = get_emb_show(xyz_crop)
+                img_xyz = img.copy() / 255.0
+                mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
+                fg_idx = np.where(mask_xyz != 0)
+                img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
+                img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
+                img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
+                # diff mask
+                diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]
+
+                grid_show(
+                    [
+                        img[:, :, [2, 1, 0]],
+                        img_vis[:, :, [2, 1, 0]],
+                        img_vis_kpts2d[:, :, [2, 1, 0]],
+                        depth,
+                        # xyz_show,
+                        diff_mask_xyz,
+                        xyz_crop_show,
+                        img_xyz[:, :, [2, 1, 0]],
+                        img_xyz_crop[:, :, [2, 1, 0]],
+                        img_vis_crop,
+                    ],
+                    [
+                        "img",
+                        "vis_img",
+                        "img_vis_kpts2d",
+                        "depth",
+                        "diff_mask_xyz",
+                        "xyz_crop_show",
+                        "img_xyz",
+                        "img_xyz_crop",
+                        "img_vis_crop",
+                    ],
+                    row=3,
+                    col=3,
+                )
+            else:
+                grid_show(
+                    [
+                        img[:, :, [2, 1, 0]],
+                        img_vis[:, :, [2, 1, 0]],
+                        img_vis_kpts2d[:, :, [2, 1, 0]],
+                        depth,
+                    ],
+                    ["img", "vis_img", "img_vis_kpts2d", "depth"],
+                    row=2,
+                    col=2,
+                )
+
+
+if __name__ == "__main__":
+    """Test the  dataset loader.
+
+    python this_file.py dataset_name
+    """
+    from lib.vis_utils.image import grid_show
+    from lib.utils.setup_logger import setup_my_logger
+
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    from lib.vis_utils.image import vis_image_mask_bbox_cv2
+    from core.utils.utils import get_emb_show
+    from core.utils.data_utils import read_image_mmcv
+
+    print("sys.argv:", sys.argv)
+    logger = setup_my_logger(name="core")
+    register_with_name_cfg(sys.argv[1])
+    print("dataset catalog: ", DatasetCatalog.list())
+
+    test_vis()
diff --git a/det/yolox/data/datasets/dataset_factory.py b/det/yolox/data/datasets/dataset_factory.py
index 19b268025c807f0318af43daedc01ded738bfde8..eabeef9b744c961a60821c515230a748d9f17916 100644
--- a/det/yolox/data/datasets/dataset_factory.py
+++ b/det/yolox/data/datasets/dataset_factory.py
@@ -3,8 +3,8 @@ import os.path as osp
 import mmcv
 from detectron2.data import DatasetCatalog
 from . import (
-    lm_dataset_d2,
     lm_pbr,
+    lmo_bop_test,
     ycbv_pbr,
     ycbv_d2,
     ycbv_bop_test,
@@ -34,8 +34,8 @@ __all__ = [
     "get_available_datasets",
 ]
 _DSET_MOD_NAMES = [
-    "lm_dataset_d2",
     "lm_pbr",
+    "lmo_bop_test",
     "ycbv_pbr",
     "ycbv_d2",
     "ycbv_bop_test",
diff --git a/det/yolox/data/datasets/lm_dataset_d2.py b/det/yolox/data/datasets/lm_dataset_d2.py
deleted file mode 100644
index dae93146b3afde3ce0abddcc28150ca76d21634d..0000000000000000000000000000000000000000
--- a/det/yolox/data/datasets/lm_dataset_d2.py
+++ /dev/null
@@ -1,886 +0,0 @@
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
-sys.path.insert(0, PROJ_ROOT)
-
-import ref
-
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_Dataset(object):
-    """lm splits."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.ann_files = data_cfg["ann_files"]  # idx files with image ids
-        self.image_prefixes = data_cfg["image_prefixes"]
-        self.xyz_prefixes = data_cfg["xyz_prefixes"]
-
-        self.dataset_root = data_cfg["dataset_root"]  # BOP_DATASETS/lm/
-        assert osp.exists(self.dataset_root), self.dataset_root
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
-        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
-
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
-        self.use_cache = data_cfg.get("use_cache", True)
-        self.num_to_load = data_cfg["num_to_load"]  # -1
-        self.filter_invalid = data_cfg["filter_invalid"]
-        self.filter_scene = data_cfg.get("filter_scene", False)
-        self.debug_im_id = data_cfg.get("debug_im_id", None)
-        ##################################################
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_Dataset
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  # ######################################################
-        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
-        assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}"
-        unique_im_id = 0
-        for ann_file, scene_root, xyz_root in zip(tqdm(self.ann_files), self.image_prefixes, self.xyz_prefixes):
-            # linemod each scene is an object
-            with open(ann_file, "r") as f_ann:
-                indices = [line.strip("\r\n") for line in f_ann.readlines()]  # string ids
-            gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
-            gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))  # bbox_obj, bbox_visib
-            cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json"))
-            for im_id in tqdm(indices):
-                int_im_id = int(im_id)
-                str_im_id = str(int_im_id)
-                rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
-
-                scene_id = int(rgb_path.split("/")[-3])
-                scene_im_id = f"{scene_id}/{int_im_id}"
-
-                if self.debug_im_id is not None:
-                    if self.debug_im_id != scene_im_id:
-                        continue
-
-                K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
-                depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"]
-                if self.filter_scene:
-                    if scene_id not in self.cat_ids:
-                        continue
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": unique_im_id,
-                    "scene_im_id": scene_im_id,  # for evaluation
-                    "cam": K,
-                    "depth_factor": depth_factor,
-                    "img_type": "real",
-                }
-                unique_im_id += 1
-                insts = []
-                for anno_i, anno in enumerate(gt_dict[str_im_id]):
-                    obj_id = anno["obj_id"]
-                    if obj_id not in self.cat_ids:
-                        continue
-                    cur_label = self.cat2label[obj_id]  # 0-based label
-                    R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
-                    t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
-                    pose = np.hstack([R, t.reshape(3, 1)])
-                    quat = mat2quat(R).astype("float32")
-
-                    proj = (record["cam"] @ t.T).T
-                    proj = proj[:2] / proj[2]
-
-                    bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
-                    bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
-                    x1, y1, w, h = bbox_visib
-                    if self.filter_invalid:
-                        if h <= 1 or w <= 1:
-                            self.num_instances_without_valid_box += 1
-                            continue
-
-                    mask_file = osp.join(
-                        scene_root,
-                        "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                    )
-                    mask_visib_file = osp.join(
-                        scene_root,
-                        "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                    )
-                    assert osp.exists(mask_file), mask_file
-                    assert osp.exists(mask_visib_file), mask_visib_file
-                    # load mask visib
-                    mask_single = mmcv.imread(mask_visib_file, "unchanged")
-                    mask_single = mask_single.astype("bool")
-                    area = mask_single.sum()
-                    if area < 3:  # filter out too small or nearly invisible instances
-                        self.num_instances_without_valid_segmentation += 1
-                        continue
-                    mask_rle = binary_mask_to_rle(mask_single, compressed=True)
-                    # load mask full
-                    mask_full = mmcv.imread(mask_file, "unchanged")
-                    mask_full = mask_full.astype("bool")
-                    mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
-
-                    inst = {
-                        "category_id": cur_label,  # 0-based label
-                        "bbox": bbox_obj,  # TODO: load both bbox_obj and bbox_visib
-                        "bbox_mode": BoxMode.XYWH_ABS,
-                        "pose": pose,
-                        "quat": quat,
-                        "trans": t,
-                        "centroid_2d": proj,  # absolute (cx, cy)
-                        "segmentation": mask_rle,
-                        "mask_full": mask_full_rle,
-                    }
-
-                    if "test" not in self.name.lower():
-                        # if True:
-                        xyz_path = osp.join(xyz_root, f"{int_im_id:06d}_{anno_i:06d}.pkl")
-                        assert osp.exists(xyz_path), xyz_path
-                        inst["xyz_path"] = xyz_path
-
-                    model_info = self.models_info[str(obj_id)]
-                    inst["model_info"] = model_info
-                    # TODO: using full mask and full xyz
-                    for key in ["bbox3d_and_center"]:
-                        inst[key] = self.models[cur_label][key]
-                    insts.append(inst)
-                if len(insts) == 0:  # filter im without anno
-                    continue
-                record["annotations"] = insts
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        if self.num_to_load > 0:
-            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-            dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
-
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-LM_OCC_OBJECTS = [
-    "ape",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-]
-################################################################################
-
-SPLITS_LM = dict(
-    lm_13_train=dict(
-        name="lm_13_train",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "train"),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=True,
-        ref_key="lm_full",
-    ),
-    lm_13_test=dict(
-        name="lm_13_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "test"),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        # NOTE: scene root
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj])
-            for _obj in LM_13_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=False,
-        ref_key="lm_full",
-    ),
-    lmo_train=dict(
-        name="lmo_train",
-        # use lm real all (8 objects) to train for lmo
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_OCC_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "all"),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/{:06d}".format(ref.lmo_full.obj2id[_obj]),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lmo_full.obj2id[_obj]),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=True,
-        ref_key="lmo_full",
-    ),
-    lmo_NoBopTest_train=dict(
-        name="lmo_NoBopTest_train",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt")],
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2),
-            )
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=True,
-        ref_key="lmo_full",
-    ),
-    lmo_test=dict(
-        name="lmo_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_test.txt")],
-        # NOTE: scene root
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[None],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-    lmo_bop_test=dict(
-        name="lmo_bop_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt")],
-        # NOTE: scene root
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[None],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-)
-
-# single obj splits for lm real
-for obj in ref.lm_full.objects:
-    for split in ["train", "test", "all"]:
-        name = "lm_real_{}_{}".format(obj, split)
-        ann_files = [
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(obj, split),
-            )
-        ]
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                objs=[obj],  # only this obj
-                ann_files=ann_files,
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])],
-                xyz_prefixes=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]),
-                    )
-                ],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_invalid=filter_invalid,
-                filter_scene=True,
-                ref_key="lm_full",
-            )
-
-# single obj splits for lmo_NoBopTest_train
-for obj in ref.lmo_full.objects:
-    for split in ["train"]:
-        name = "lmo_NoBopTest_{}_{}".format(obj, split)
-        if split in ["train"]:
-            filter_invalid = True
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2),
-                    )
-                ],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=filter_invalid,
-                ref_key="lmo_full",
-            )
-
-# single obj splits for lmo_test
-for obj in ref.lmo_full.objects:
-    for split in ["test"]:
-        name = "lmo_{}_{}".format(obj, split)
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[None],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=False,
-                ref_key="lmo_full",
-            )
-
-# single obj splits for lmo_bop_test
-for obj in ref.lmo_full.objects:
-    for split in ["test"]:
-        name = "lmo_{}_bop_{}".format(obj, split)
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[None],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=False,
-                ref_key="lmo_full",
-            )
-
-# ================ add single image dataset for debug =======================================
-debug_im_ids = {
-    "train": {obj: [] for obj in ref.lm_full.objects},
-    "test": {obj: [] for obj in ref.lm_full.objects},
-}
-for obj in ref.lm_full.objects:
-    for split in ["train", "test"]:
-        cur_ann_file = osp.join(DATASETS_ROOT, f"BOP_DATASETS/lm/image_set/{obj}_{split}.txt")
-        ann_files = [cur_ann_file]
-
-        im_ids = []
-        with open(cur_ann_file, "r") as f:
-            for line in f:
-                # scene_id(obj_id)/im_id
-                im_ids.append("{}/{}".format(ref.lm_full.obj2id[obj], int(line.strip("\r\n"))))
-
-        debug_im_ids[split][obj] = im_ids
-        for debug_im_id in debug_im_ids[split][obj]:
-            name = "lm_single_{}{}_{}".format(obj, debug_im_id.split("/")[1], split)
-            if name not in SPLITS_LM:
-                SPLITS_LM[name] = dict(
-                    name=name,
-                    dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-                    models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                    objs=[obj],  # only this obj
-                    ann_files=ann_files,
-                    image_prefixes=[
-                        osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])
-                    ],
-                    xyz_prefixes=[
-                        osp.join(
-                            DATASETS_ROOT,
-                            "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]),
-                        )
-                    ],
-                    scale_to_meter=0.001,
-                    with_masks=True,  # (load masks but may not use it)
-                    with_depth=True,  # (load depth path here, but may not use it)
-                    height=480,
-                    width=640,
-                    cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                    use_cache=True,
-                    num_to_load=-1,
-                    filter_invalid=False,
-                    filter_scene=True,
-                    ref_key="lm_full",
-                    debug_im_id=debug_im_id,  # NOTE: debug im id
-                )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM:
-        used_cfg = SPLITS_LM[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        id="linemod",  # NOTE: for pvnet to determine module
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        imH, imW = img.shape[:2]
-        annos = d["annotations"]
-        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
-        bboxes = [anno["bbox"] for anno in annos]
-        bbox_modes = [anno["bbox_mode"] for anno in annos]
-        bboxes_xyxy = np.array(
-            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
-        )
-        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
-        quats = [anno["quat"] for anno in annos]
-        transes = [anno["trans"] for anno in annos]
-        Rs = [quat2mat(quat) for quat in quats]
-        # 0-based label
-        cat_ids = [anno["category_id"] for anno in annos]
-        K = d["cam"]
-        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
-        # # TODO: visualize pose and keypoints
-        labels = [objs[cat_id] for cat_id in cat_ids]
-        for _i in range(len(annos)):
-            img_vis = vis_image_mask_bbox_cv2(
-                img,
-                masks[_i : _i + 1],
-                bboxes=bboxes_xyxy[_i : _i + 1],
-                labels=labels[_i : _i + 1],
-            )
-            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
-            if "test" not in dset_name.lower():
-                xyz_path = annos[_i]["xyz_path"]
-                xyz_info = mmcv.load(xyz_path)
-                x1, y1, x2, y2 = xyz_info["xyxy"]
-                xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
-                xyz = np.zeros((imH, imW, 3), dtype=np.float32)
-                xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
-                xyz_show = get_emb_show(xyz)
-                xyz_crop_show = get_emb_show(xyz_crop)
-                img_xyz = img.copy() / 255.0
-                mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
-                fg_idx = np.where(mask_xyz != 0)
-                img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
-                img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
-                img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
-                # diff mask
-                diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]
-
-                grid_show(
-                    [
-                        img[:, :, [2, 1, 0]],
-                        img_vis[:, :, [2, 1, 0]],
-                        img_vis_kpts2d[:, :, [2, 1, 0]],
-                        depth,
-                        # xyz_show,
-                        diff_mask_xyz,
-                        xyz_crop_show,
-                        img_xyz[:, :, [2, 1, 0]],
-                        img_xyz_crop[:, :, [2, 1, 0]],
-                        img_vis_crop,
-                    ],
-                    [
-                        "img",
-                        "vis_img",
-                        "img_vis_kpts2d",
-                        "depth",
-                        "diff_mask_xyz",
-                        "xyz_crop_show",
-                        "img_xyz",
-                        "img_xyz_crop",
-                        "img_vis_crop",
-                    ],
-                    row=3,
-                    col=3,
-                )
-            else:
-                grid_show(
-                    [
-                        img[:, :, [2, 1, 0]],
-                        img_vis[:, :, [2, 1, 0]],
-                        img_vis_kpts2d[:, :, [2, 1, 0]],
-                        depth,
-                    ],
-                    ["img", "vis_img", "img_vis_kpts2d", "depth"],
-                    row=2,
-                    col=2,
-                )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    python this_file.py dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.utils import get_emb_show
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-
-    test_vis()
diff --git a/det/yolox/data/datasets/lmo_bop_test.py b/det/yolox/data/datasets/lmo_bop_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..42695b58f47c86fa9511f8016d2db6499b00113b
--- /dev/null
+++ b/det/yolox/data/datasets/lmo_bop_test.py
@@ -0,0 +1,521 @@
+import hashlib
+import logging
+import os
+import os.path as osp
+import sys
+import time
+from collections import OrderedDict
+import mmcv
+import numpy as np
+from tqdm import tqdm
+from transforms3d.quaternions import mat2quat, quat2mat
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+
+cur_dir = osp.dirname(osp.abspath(__file__))
+PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
+sys.path.insert(0, PROJ_ROOT)
+
+import ref
+
+from lib.pysixd import inout, misc
+from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
+from lib.utils.utils import dprint, iprint, lazy_property
+
+
+logger = logging.getLogger(__name__)
+DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
+
+
+class LMO_BOP_TEST_Dataset(object):
+    """lmo bop test splits."""
+
+    def __init__(self, data_cfg):
+        """
+        Set with_depth and with_masks default to True,
+        and decide whether to load them into dataloader/network later
+        with_masks:
+        """
+        self.name = data_cfg["name"]
+        self.data_cfg = data_cfg
+
+        self.objs = data_cfg["objs"]  # selected objects
+
+        self.dataset_root = data_cfg.get("dataset_root", osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test"))
+        assert osp.exists(self.dataset_root), self.dataset_root
+
+        self.ann_file = data_cfg["ann_file"]  # json file with scene_id and im_id items
+
+        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lmo/models
+        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
+
+        self.with_masks = data_cfg["with_masks"]
+        self.with_depth = data_cfg["with_depth"]
+
+        self.height = data_cfg["height"]  # 480
+        self.width = data_cfg["width"]  # 640
+
+        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
+        self.use_cache = data_cfg.get("use_cache", True)
+        self.num_to_load = data_cfg["num_to_load"]  # -1
+        self.filter_invalid = data_cfg.get("filter_invalid", True)
+        ##################################################
+
+        # NOTE: careful! Only the selected objects
+        self.cat_ids = [cat_id for cat_id, obj_name in ref.lmo_full.id2obj.items() if obj_name in self.objs]
+        # map selected objs to [0, num_objs-1]
+        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
+        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
+        self.obj2label = OrderedDict((obj, obj_id) for obj, obj_id in enumerate(self.objs))
+        ##########################################################
+
+    def __call__(self):
+        """Load light-weight instance annotations of all images into a list of
+        dicts in Detectron2 format.
+
+        Do not load heavy data into memory in this file, since we will
+        load the annotations of all images into memory.
+        """
+        # cache the dataset_dicts to avoid loading masks from files
+        hashed_file_name = hashlib.md5(
+            (
+                "".join([str(fn) for fn in self.objs])
+                + "dataset_dicts_{}_{}_{}_{}_{}".format(
+                    self.name,
+                    self.dataset_root,
+                    self.with_masks,
+                    self.with_depth,
+                    __name__,
+                )
+            ).encode("utf-8")
+        ).hexdigest()
+        cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name))
+
+        if osp.exists(cache_path) and self.use_cache:
+            logger.info("load cached dataset dicts from {}".format(cache_path))
+            return mmcv.load(cache_path)
+
+        t_start = time.perf_counter()
+
+        logger.info("loading dataset dicts: {}".format(self.name))
+        self.num_instances_without_valid_segmentation = 0
+        self.num_instances_without_valid_box = 0
+        dataset_dicts = []  # ######################################################
+        # it is slow because of loading and converting masks to rle
+        targets = mmcv.load(self.ann_file)
+
+        scene_im_ids = [(item["scene_id"], item["im_id"]) for item in targets]
+        scene_im_ids = sorted(list(set(scene_im_ids)))
+
+        # load infos for each scene
+        gt_dicts = {}
+        gt_info_dicts = {}
+        cam_dicts = {}
+        for scene_id, im_id in scene_im_ids:
+            scene_root = osp.join(self.dataset_root, f"{scene_id:06d}")
+            if scene_id not in gt_dicts:
+                gt_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_gt.json"))
+            if scene_id not in gt_info_dicts:
+                gt_info_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))  # bbox_obj, bbox_visib
+            if scene_id not in cam_dicts:
+                cam_dicts[scene_id] = mmcv.load(osp.join(scene_root, "scene_camera.json"))
+
+        for scene_id, int_im_id in tqdm(scene_im_ids):
+            str_im_id = str(int_im_id)
+            scene_root = osp.join(self.dataset_root, f"{scene_id:06d}")
+
+            gt_dict = gt_dicts[scene_id]
+            gt_info_dict = gt_info_dicts[scene_id]
+            cam_dict = cam_dicts[scene_id]
+
+            rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
+            assert osp.exists(rgb_path), rgb_path
+
+            depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
+
+            scene_im_id = f"{scene_id}/{int_im_id}"
+
+            K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
+            depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"]  # 10000
+
+            record = {
+                "dataset_name": self.name,
+                "file_name": osp.relpath(rgb_path, PROJ_ROOT),
+                "depth_file": osp.relpath(depth_path, PROJ_ROOT),
+                "height": self.height,
+                "width": self.width,
+                "image_id": int_im_id,
+                "scene_im_id": scene_im_id,  # for evaluation
+                "cam": K,
+                "depth_factor": depth_factor,
+                "img_type": "real",  # NOTE: has background
+            }
+            insts = []
+            for anno_i, anno in enumerate(gt_dict[str_im_id]):
+                obj_id = anno["obj_id"]
+                if obj_id not in self.cat_ids:
+                    continue
+                cur_label = self.cat2label[obj_id]  # 0-based label
+                R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
+                t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
+                pose = np.hstack([R, t.reshape(3, 1)])
+                quat = mat2quat(R).astype("float32")
+
+                proj = (record["cam"] @ t.T).T
+                proj = proj[:2] / proj[2]
+
+                bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
+                bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
+                x1, y1, w, h = bbox_visib
+                if self.filter_invalid:
+                    if h <= 1 or w <= 1:
+                        self.num_instances_without_valid_box += 1
+                        continue
+
+                mask_file = osp.join(
+                    scene_root,
+                    "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
+                )
+                mask_visib_file = osp.join(
+                    scene_root,
+                    "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
+                )
+                assert osp.exists(mask_file), mask_file
+                assert osp.exists(mask_visib_file), mask_visib_file
+                # load mask visib
+                mask_single = mmcv.imread(mask_visib_file, "unchanged")
+                mask_single = mask_single.astype("bool")
+                area = mask_single.sum()
+                if area < 3:  # filter out too small or nearly invisible instances
+                    self.num_instances_without_valid_segmentation += 1
+                mask_rle = binary_mask_to_rle(mask_single, compressed=True)
+
+                # load mask full
+                mask_full = mmcv.imread(mask_file, "unchanged")
+                mask_full = mask_full.astype("bool")
+                mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
+
+                visib_fract = gt_info_dict[str_im_id][anno_i].get("visib_fract", 1.0)
+
+                inst = {
+                    "category_id": cur_label,  # 0-based label
+                    "bbox": bbox_visib,
+                    "bbox_obj": bbox_obj,
+                    "bbox_mode": BoxMode.XYWH_ABS,
+                    "pose": pose,
+                    "quat": quat,
+                    "trans": t,
+                    "centroid_2d": proj,  # absolute (cx, cy)
+                    "segmentation": mask_rle,
+                    "mask_full": mask_full_rle,
+                    "visib_fract": visib_fract,
+                    "xyz_path": None,  #  no need for test
+                }
+
+                model_info = self.models_info[str(obj_id)]
+                inst["model_info"] = model_info
+                for key in ["bbox3d_and_center"]:
+                    inst[key] = self.models[cur_label][key]
+                insts.append(inst)
+            if len(insts) == 0:  # filter im without anno
+                continue
+            record["annotations"] = insts
+            dataset_dicts.append(record)
+
+        if self.num_instances_without_valid_segmentation > 0:
+            logger.warning(
+                "There are {} instances without valid segmentation. "
+                "There might be issues in your dataset generation process.".format(
+                    self.num_instances_without_valid_segmentation
+                )
+            )
+        if self.num_instances_without_valid_box > 0:
+            logger.warning(
+                "There are {} instances without valid box. "
+                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
+            )
+        ##########################################################################
+        if self.num_to_load > 0:
+            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
+            dataset_dicts = dataset_dicts[: self.num_to_load]
+        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
+
+        mmcv.mkdir_or_exist(osp.dirname(cache_path))
+        mmcv.dump(dataset_dicts, cache_path, protocol=4)
+        logger.info("Dumped dataset_dicts to {}".format(cache_path))
+        return dataset_dicts
+
+    @lazy_property
+    def models_info(self):
+        models_info_path = osp.join(self.models_root, "models_info.json")
+        assert osp.exists(models_info_path), models_info_path
+        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
+        return models_info
+
+    @lazy_property
+    def models(self):
+        """Load models into a list."""
+        cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
+        if osp.exists(cache_path) and self.use_cache:
+            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
+            return mmcv.load(cache_path)
+
+        models = []
+        for obj_name in self.objs:
+            model = inout.load_ply(
+                osp.join(
+                    self.models_root,
+                    f"obj_{ref.lmo_full.obj2id[obj_name]:06d}.ply",
+                ),
+                vertex_scale=self.scale_to_meter,
+            )
+            # NOTE: the bbox3d_and_center is not obtained from centered vertices
+            # for BOP models, not a big problem since they had been centered
+            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
+
+            models.append(model)
+        logger.info("cache models to {}".format(cache_path))
+        mmcv.mkdir_or_exist(osp.dirname(cache_path))
+        mmcv.dump(models, cache_path, protocol=4)
+        return models
+
+    def __len__(self):
+        return self.num_to_load
+
+    def image_aspect_ratio(self):
+        return self.width / self.height  # 4/3
+
+
+########### register datasets ############################################################
+
+
+def get_lmo_metadata(obj_names, ref_key):
+    """task specific metadata."""
+
+    data_ref = ref.__dict__[ref_key]
+
+    cur_sym_infos = {}  # label based key
+    loaded_models_info = data_ref.get_models_info()
+
+    for i, obj_name in enumerate(obj_names):
+        obj_id = data_ref.obj2id[obj_name]
+        model_info = loaded_models_info[str(obj_id)]
+        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
+            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
+            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
+        else:
+            sym_info = None
+        cur_sym_infos[i] = sym_info
+
+    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
+    return meta
+
+
+##########################################################################
+
+SPLITS_LMO = dict(
+    lmo_bop_test=dict(
+        name="lmo_bop_test",
+        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test"),
+        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
+        objs=ref.lmo_full.objects,
+        ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test_targets_bop19.json"),
+        scale_to_meter=0.001,
+        with_masks=True,  # (load masks but may not use it)
+        with_depth=True,  # (load depth path here, but may not use it)
+        height=480,
+        width=640,
+        cache_dir=osp.join(PROJ_ROOT, ".cache"),
+        use_cache=True,
+        num_to_load=-1,
+        filter_invalid=False,
+        ref_key="lmo_full",
+    ),
+)
+
+# single obj splits for lmo bop test
+for obj in ref.lmo_full.objects:
+    for split in [
+        "bop_test",
+    ]:
+        name = "lmo_{}_{}".format(obj, split)
+        ann_files = [
+            osp.join(
+                DATASETS_ROOT,
+                "BOP_DATASETS/lmo/image_set/{}_{}.txt".format(obj, split),
+            )
+        ]
+        if name not in SPLITS_LMO:
+            SPLITS_LMO[name] = dict(
+                name=name,
+                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
+                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
+                objs=[obj],  # only this obj
+                scale_to_meter=0.001,
+                ann_file=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test_targets_bop19.json"),
+                with_masks=True,  # (load masks but may not use it)
+                with_depth=True,  # (load depth path here, but may not use it)
+                height=480,
+                width=640,
+                cache_dir=osp.join(PROJ_ROOT, ".cache"),
+                use_cache=True,
+                num_to_load=-1,
+                filter_invalid=False,
+                ref_key="lmo_full",
+            )
+
+
+def register_with_name_cfg(name, data_cfg=None):
+    """Assume pre-defined datasets live in `./datasets`.
+
+    Args:
+        name: datasnet_name,
+        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
+            otherwise requires data_cfg
+            data_cfg can be set in cfg.DATA_CFG.name
+    """
+    dprint("register dataset: {}".format(name))
+    if name in SPLITS_LMO:
+        used_cfg = SPLITS_LMO[name]
+    else:
+        assert data_cfg is not None, f"dataset name {name} is not registered"
+        used_cfg = data_cfg
+    DatasetCatalog.register(name, LMO_BOP_TEST_Dataset(used_cfg))
+    # something like eval_types
+    MetadataCatalog.get(name).set(
+        id="lmo",  # NOTE: for pvnet to determine module
+        ref_key=used_cfg["ref_key"],
+        objs=used_cfg["objs"],
+        eval_error_types=["ad", "rete", "proj"],
+        evaluator_type="bop",
+        **get_lmo_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
+    )
+
+
+def get_available_datasets():
+    return list(SPLITS_LMO.keys())
+
+
+#### tests ###############################################
+def test_vis():
+    dset_name = sys.argv[1]
+    assert dset_name in DatasetCatalog.list()
+
+    meta = MetadataCatalog.get(dset_name)
+    dprint("MetadataCatalog: ", meta)
+    objs = meta.objs
+
+    t_start = time.perf_counter()
+    dicts = DatasetCatalog.get(dset_name)
+    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
+
+    dirname = "output/{}-data-vis".format(dset_name)
+    os.makedirs(dirname, exist_ok=True)
+    for d in dicts:
+        img = read_image_mmcv(d["file_name"], format="BGR")
+        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
+
+        imH, imW = img.shape[:2]
+        annos = d["annotations"]
+        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
+        bboxes = [anno["bbox"] for anno in annos]
+        bbox_modes = [anno["bbox_mode"] for anno in annos]
+        bboxes_xyxy = np.array(
+            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
+        )
+        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
+        quats = [anno["quat"] for anno in annos]
+        transes = [anno["trans"] for anno in annos]
+        Rs = [quat2mat(quat) for quat in quats]
+        # 0-based label
+        cat_ids = [anno["category_id"] for anno in annos]
+        K = d["cam"]
+        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
+        # # TODO: visualize pose and keypoints
+        labels = [objs[cat_id] for cat_id in cat_ids]
+        for _i in range(len(annos)):
+            img_vis = vis_image_mask_bbox_cv2(
+                img,
+                masks[_i : _i + 1],
+                bboxes=bboxes_xyxy[_i : _i + 1],
+                labels=labels[_i : _i + 1],
+            )
+            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
+            if "test" not in dset_name.lower():
+                xyz_path = annos[_i]["xyz_path"]
+                xyz_info = mmcv.load(xyz_path)
+                x1, y1, x2, y2 = xyz_info["xyxy"]
+                xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
+                xyz = np.zeros((imH, imW, 3), dtype=np.float32)
+                xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
+                xyz_show = get_emb_show(xyz)
+                xyz_crop_show = get_emb_show(xyz_crop)
+                img_xyz = img.copy() / 255.0
+                mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
+                fg_idx = np.where(mask_xyz != 0)
+                img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
+                img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
+                img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
+                # diff mask
+                diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]
+
+                grid_show(
+                    [
+                        img[:, :, [2, 1, 0]],
+                        img_vis[:, :, [2, 1, 0]],
+                        img_vis_kpts2d[:, :, [2, 1, 0]],
+                        depth,
+                        # xyz_show,
+                        diff_mask_xyz,
+                        xyz_crop_show,
+                        img_xyz[:, :, [2, 1, 0]],
+                        img_xyz_crop[:, :, [2, 1, 0]],
+                        img_vis_crop,
+                    ],
+                    [
+                        "img",
+                        "vis_img",
+                        "img_vis_kpts2d",
+                        "depth",
+                        "diff_mask_xyz",
+                        "xyz_crop_show",
+                        "img_xyz",
+                        "img_xyz_crop",
+                        "img_vis_crop",
+                    ],
+                    row=3,
+                    col=3,
+                )
+            else:
+                grid_show(
+                    [
+                        img[:, :, [2, 1, 0]],
+                        img_vis[:, :, [2, 1, 0]],
+                        img_vis_kpts2d[:, :, [2, 1, 0]],
+                        depth,
+                    ],
+                    ["img", "vis_img", "img_vis_kpts2d", "depth"],
+                    row=2,
+                    col=2,
+                )
+
+
+if __name__ == "__main__":
+    """Test the  dataset loader.
+
+    python this_file.py dataset_name
+    """
+    from lib.vis_utils.image import grid_show
+    from lib.utils.setup_logger import setup_my_logger
+
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    from lib.vis_utils.image import vis_image_mask_bbox_cv2
+    from core.utils.utils import get_emb_show
+    from core.utils.data_utils import read_image_mmcv
+
+    print("sys.argv:", sys.argv)
+    logger = setup_my_logger(name="core")
+    register_with_name_cfg(sys.argv[1])
+    print("dataset catalog: ", DatasetCatalog.list())
+
+    test_vis()