remove useless files

25ceb527 · liuxingyu · bae23a6b · 25ceb527 · bae23a6b · bae23a6b
Commit 25ceb527 authored 2 years ago by liuxingyu
--- a/core/gdrn_modeling/datasets/dataset_factory.py
+++ b/core/gdrn_modeling/datasets/dataset_factory.py
@@ -8,12 +8,8 @@ import detectron2.utils.comm as comm
 import ref
 from detectron2.data import DatasetCatalog, MetadataCatalog
 from core.gdrn_modeling.datasets import (
-    lm_syn_imgn,
    lm_dataset_d2,
-    lm_syn_egl,
    lm_pbr,
-    lm_blender,
-    lm_dataset_crop_d2,
    ycbv_pbr,
    ycbv_d2,
    ycbv_bop_test,
@@ -22,7 +18,6 @@ from core.gdrn_modeling.datasets import (
    hb_bop_test,
    hb_bench_driller_phone_d2,
    duck_frames,
-    lm_new_duck_pbr,
    tudl_pbr,
    tudl_d2,
    tudl_bop_test,

--- a/core/gdrn_modeling/datasets/lm_blender.py
+++ b/core/gdrn_modeling/datasets/lm_blender.py
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
-sys.path.insert(0, PROJ_ROOT)
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-import random
-import ref
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import (
-    binary_mask_to_rle,
-    cocosegm2mask,
-    mask2bbox_xywh,
-)
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_BLENDER_Dataset(object):
-    """lm blender data, from pvnet-rendering."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.ann_files = data_cfg["ann_files"]  # json files with image ids and pose/bbox
-        self.image_prefixes = data_cfg["image_prefixes"]
-
-        self.dataset_root = data_cfg["dataset_root"]  # lm_renders_blender/
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
-        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
-        self.with_xyz = data_cfg["with_xyz"]
-        self.depth_factor = data_cfg["depth_factor"]  # 1000.0
-
-        self.cam = data_cfg["cam"]  #
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg["cache_dir"]  # .cache
-        self.use_cache = data_cfg["use_cache"]  # True
-        # sample uniformly to get n items
-        self.n_per_obj = data_cfg.get("n_per_obj", 10000)
-        self.filter_invalid = data_cfg["filter_invalid"]
-        ##################################################
-        if self.cam is None:
-            self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_BLENDER
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    self.with_xyz,
-                    self.n_per_obj,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  #######################################################
-        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
-
-        for ann_file, scene_root in zip(tqdm(self.ann_files), self.image_prefixes):
-            # each scene is an object
-            assert osp.exists(ann_file), ann_file
-            scene_gt_dict = mmcv.load(ann_file)
-            # sample uniformly (equal space)
-            indices = list(scene_gt_dict.keys())
-            if self.n_per_obj > 0:
-                sample_num = min(self.n_per_obj, len(scene_gt_dict))
-                sel_indices_idx = np.linspace(0, len(scene_gt_dict) - 1, sample_num, dtype=np.int32)
-                sel_indices = [indices[int(_i)] for _i in sel_indices_idx]
-            else:
-                sel_indices = indices
-
-            for str_im_id in tqdm(sel_indices):
-                int_im_id = int(str_im_id)
-                rgb_path = osp.join(scene_root, "{}.jpg").format(str_im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "{}_depth_opengl.png".format(str_im_id))
-
-                obj_name = osp.basename(ann_file).split("_")[0]  # obj_gt.json
-                obj_id = ref.lm_full.obj2id[obj_name]
-                if obj_name not in self.objs:
-                    continue
-
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": int_im_id,
-                    "scene_im_id": f"{obj_id}/{int_im_id}",
-                    "cam": self.cam,
-                    "img_type": "syn_blender",  # has bg
-                }
-
-                cur_label = self.obj2label[obj_name]  # 0-based label
-                anno = scene_gt_dict[str_im_id][0]  # only one object
-                R = np.array(anno["cam_R_m2c"]).reshape(3, 3)
-                t = np.array(anno["cam_t_m2c"]).reshape(-1) / 1000
-                pose = np.hstack([R, t.reshape(3, 1)])
-                quat = mat2quat(R).astype("float32")
-                proj = (record["cam"] @ t.T).T
-                proj = proj[:2] / proj[2]
-
-                bbox_visib = anno["bbox_visib"]
-                x1, y1, w, h = bbox_visib
-
-                cx, cy = proj
-                crop_x1 = round(np.clip(cx - 64, 0, self.width - 1))
-                crop_x2 = round(np.clip(cx + 64, 0, self.width - 1))
-                crop_y1 = round(np.clip(cy - 64, 0, self.height - 1))
-                crop_y2 = round(np.clip(cy + 64, 0, self.height - 1))
-
-                # convert to xywh
-                crop_w = crop_x2 - crop_x1
-                crop_h = crop_y2 - crop_y1
-                bbox_128 = [crop_x1, crop_y1, crop_w, crop_h]
-
-                if self.filter_invalid:
-                    if h <= 1 or w <= 1:
-                        self.num_instances_without_valid_box += 1
-                        continue
-
-                mask_path = osp.join(scene_root, "{}_mask_opengl.png".format(str_im_id))
-                mask = mmcv.imread(mask_path, "unchanged")
-                mask = (mask > 0).astype(np.uint8)
-
-                area = mask.sum()
-                if area < 3:  # filter out too small or nearly invisible instances
-                    self.num_instances_without_valid_segmentation += 1
-                    continue
-                mask_rle = binary_mask_to_rle(mask, compressed=True)
-
-                xyz_path = osp.join(scene_root, "{}_xyz_bop.pkl".format(str_im_id))
-                assert osp.exists(xyz_path), xyz_path
-
-                visib_fract = anno.get("visib_fract", 1.0)
-                inst = {
-                    "category_id": cur_label,  # 0-based label
-                    "bbox": bbox_visib,  # TODO: load both bbox_obj and bbox_visib
-                    "bbox_mode": BoxMode.XYWH_ABS,
-                    "bbox_crop": bbox_128,
-                    "pose": pose,
-                    "quat": quat,
-                    "trans": t,
-                    "centroid_2d": proj,  # absolute (cx, cy)
-                    "segmentation": mask_rle,
-                    "xyz_path": xyz_path,
-                    "visib_fract": visib_fract,
-                    "mask_full": mask_rle,  # NOTE!
-                }
-
-                model_info = self.models_info[str(obj_id)]
-                inst["model_info"] = model_info
-                for key in ["bbox3d_and_center"]:
-                    inst[key] = self.models[cur_label][key]
-                record["annotations"] = [inst]
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        # if self.num_to_load > 0:
-        #     self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-        #     random.shuffle(dataset_dicts)
-        #     dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info(
-            "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
-        )
-
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-LM_OCC_OBJECTS = [
-    "ape",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-]
-################################################################################
-
-SPLITS_LM_BLENDER = dict(
-    lm_blender_13_train=dict(
-        name="lm_blender_13_train",  # BB8 training set
-        dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "lm_renders_blender/renders/{}_gt.json".format(_obj),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_13_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        with_xyz=True,
-        depth_factor=1000.0,
-        cam=ref.lm_full.camera_matrix,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        n_per_obj=-1,  # num per class, -1 for all 10k
-        filter_invalid=False,
-        ref_key="lm_full",
-    ),
-    lmo_blender_train=dict(
-        name="lmo_blender_train",
-        dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "lm_renders_blender/renders/{}_gt.json".format(_obj),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_OCC_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        with_xyz=True,
-        depth_factor=1000.0,
-        cam=ref.lmo_full.camera_matrix,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        n_per_obj=-1,  # n per class, -1 for all 10k
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-)
-
-# single obj splits
-for obj in ref.lm_full.objects:
-    for split in ["train"]:
-        for name_prefix in ["lm", "lmo"]:
-            name = "{}_blender_{}_{}".format(name_prefix, obj, split)
-            ref_key = f"{name_prefix}_full"
-            ann_files = [
-                osp.join(
-                    DATASETS_ROOT,
-                    "lm_renders_blender/renders/{}_gt.json".format(obj),
-                )
-            ]
-            if split in ["train"]:
-                filter_invalid = True
-            else:
-                raise ValueError("{}".format(split))
-            if name not in SPLITS_LM_BLENDER:
-                SPLITS_LM_BLENDER[name] = dict(
-                    name=name,
-                    dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
-                    models_root=osp.join(DATASETS_ROOT, f"BOP_DATASETS/{name_prefix}/models"),
-                    objs=[obj],  # only this obj
-                    ann_files=ann_files,
-                    image_prefixes=[osp.join(DATASETS_ROOT, f"lm_renders_blender/renders/{obj}")],
-                    scale_to_meter=0.001,
-                    with_masks=True,  # (load masks but may not use it)
-                    with_depth=True,  # (load depth path here, but may not use it)
-                    with_xyz=True,
-                    depth_factor=1000.0,
-                    cam=ref.__dict__[ref_key].camera_matrix,
-                    height=480,
-                    width=640,
-                    cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                    use_cache=True,
-                    n_per_obj=-1,
-                    filter_invalid=False,
-                    ref_key=ref_key,
-                )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM_BLENDER:
-        used_cfg = SPLITS_LM_BLENDER[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_BLENDER_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM_BLENDER.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        anno = d["annotations"][0]  # only one instance per image
-        imH, imW = img.shape[:2]
-        mask = cocosegm2mask(anno["segmentation"], imH, imW)
-        bbox = anno["bbox"]
-        bbox_mode = anno["bbox_mode"]
-        bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
-        kpt3d = anno["bbox3d_and_center"]
-        quat = anno["quat"]
-        trans = anno["trans"]
-        R = quat2mat(quat)
-        # 0-based label
-        cat_id = anno["category_id"]
-        K = d["cam"]
-        kpt_2d = misc.project_pts(kpt3d, K, R, trans)
-        # # TODO: visualize pose and keypoints
-        label = objs[cat_id]
-        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
-        img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
-        img_vis_kpt2d = img.copy()
-        img_vis_kpt2d = misc.draw_projected_box3d(
-            img_vis_kpt2d,
-            kpt_2d,
-            middle_color=None,
-            bottom_color=(128, 128, 128),
-        )
-
-        xyz_info = mmcv.load(anno["xyz_path"])
-        xyz = np.zeros((imH, imW, 3), dtype=np.float32)
-        xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
-        x1, y1, x2, y2 = xyz_info["xyxy"]
-        xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
-        xyz_show = get_emb_show(xyz)
-
-        grid_show(
-            [
-                img[:, :, [2, 1, 0]],
-                img_vis[:, :, [2, 1, 0]],
-                img_vis_kpt2d[:, :, [2, 1, 0]],
-                depth,
-                xyz_show,
-            ],
-            ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"],
-            row=2,
-            col=3,
-        )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m core.datasets.lm_blender dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.utils import get_emb_show
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-    test_vis()
--- a/core/gdrn_modeling/datasets/lm_dataset_crop_d2.py
+++ b/core/gdrn_modeling/datasets/lm_dataset_crop_d2.py
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-import time
-from collections import OrderedDict
-
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
-sys.path.insert(0, PROJ_ROOT)
-
-import ref
-
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_CROP_Dataset(object):
-    """lm crop splits."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.ann_files = data_cfg["ann_files"]  # idx files with image ids
-        self.image_prefixes = data_cfg["image_prefixes"]
-
-        self.dataset_root = data_cfg["dataset_root"]  # BOP_DATASETS/lm/
-        assert osp.exists(self.dataset_root), self.dataset_root
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
-        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
-        self.depth_factor = data_cfg["depth_factor"]  # 1000.0
-
-        self.cam_type = data_cfg["cam_type"]
-        self.cam = data_cfg["cam"]  #
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg["cache_dir"]  # .cache
-        self.use_cache = data_cfg["use_cache"]  # True
-        self.num_to_load = data_cfg["num_to_load"]  # -1
-        self.filter_invalid = data_cfg["filter_invalid"]
-        self.filter_scene = data_cfg.get("filter_scene", False)
-        ##################################################
-        if self.cam is None:
-            assert self.cam_type in ["local", "dataset"]
-            if self.cam_type == "dataset":
-                self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
-            elif self.cam_type == "local":
-                # self.cam = np.array([[539.8100, 0, 318.2700], [0, 539.8300, 239.5600], [0, 0, 1]])
-                # yapf: disable
-                self.cam = np.array(
-                    [[518.81993115, 0.,           320.50653699],
-                     [0.,           518.86581081, 243.5604188 ],
-                     [0.,           0.,           1.          ]])
-                # yapf: enable
-                # RMS: 0.14046169348724977
-                # camera matrix:
-                # [[518.81993115   0.         320.50653699]
-                # [  0.         518.86581081 243.5604188 ]
-                # [  0.           0.           1.        ]]
-                # distortion coefficients:  [ 0.04147325 -0.21469544 -0.00053707 -0.00251986  0.17406399]
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_CROP_Dataset
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}".format(
-                    self.name, self.dataset_root, self.with_masks, self.with_depth, self.cam_type
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name))
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  #######################################################
-        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
-        unique_im_id = 0
-        for ann_file, scene_root in zip(self.ann_files, self.image_prefixes):
-            # linemod each scene is an object
-            with open(ann_file, "r") as f_ann:
-                indices = [line.strip("\r\n") for line in f_ann.readlines()]  # string ids
-            gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
-            gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))  # bbox_obj, bbox_visib
-            for im_id in tqdm(indices):
-                int_im_id = int(im_id)
-                rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
-
-                scene_id = int(rgb_path.split("/")[-3])
-                scene_im_id = "{}/{}".format(scene_id, int_im_id)
-                if self.filter_scene:
-                    if scene_id not in self.cat_ids:
-                        continue
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": unique_im_id,
-                    "scene_im_id": scene_im_id,  # for evaluation
-                    "cam": self.cam,
-                    "depth_factor": self.depth_factor,
-                    "img_type": "real",
-                }
-                unique_im_id += 1
-                insts = []
-                for anno_i, anno in enumerate(gt_dict[im_id]):
-                    obj_id = anno["obj_id"]
-                    if obj_id not in self.cat_ids:
-                        continue
-                    cur_label = self.cat2label[obj_id]  # 0-based label
-                    R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
-                    t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
-                    pose = np.hstack([R, t.reshape(3, 1)])
-                    quat = mat2quat(R).astype("float32")
-
-                    proj = (record["cam"] @ t.T).T
-                    proj = proj[:2] / proj[2]
-
-                    bbox_visib = gt_info_dict[im_id][anno_i]["bbox_visib"]
-                    bbox_obj = gt_info_dict[im_id][anno_i]["bbox_obj"]
-                    x1, y1, w, h = bbox_visib
-
-                    cx, cy = proj
-                    crop_x1 = round(np.clip(cx - 64, 0, self.width - 1))
-                    crop_x2 = round(np.clip(cx + 64, 0, self.width - 1))
-                    crop_y1 = round(np.clip(cy - 64, 0, self.height - 1))
-                    crop_y2 = round(np.clip(cy + 64, 0, self.height - 1))
-
-                    # convert to xywh
-                    crop_w = crop_x2 - crop_x1
-                    crop_h = crop_y2 - crop_y1
-                    bbox_128 = [crop_x1, crop_y1, crop_w, crop_h]
-
-                    if self.filter_invalid:
-                        if h <= 1 or w <= 1:
-                            self.num_instances_without_valid_box += 1
-                            continue
-
-                    mask_file = osp.join(scene_root, "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i))
-                    mask_visib_file = osp.join(scene_root, "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i))
-                    assert osp.exists(mask_file), mask_file
-                    assert osp.exists(mask_visib_file), mask_visib_file
-                    # load mask visib  TODO: load both mask_visib and mask_full
-                    mask_single = mmcv.imread(mask_visib_file, "unchanged")
-                    mask_single = mask_single.astype("bool")
-                    area = mask_single.sum()
-                    if area < 3:  # filter out too small or nearly invisible instances
-                        self.num_instances_without_valid_segmentation += 1
-                        continue
-                    mask_rle = binary_mask_to_rle(mask_single, compressed=True)
-                    # load mask full
-                    mask_full = mmcv.imread(mask_file, "unchanged")
-                    mask_full = mask_full.astype("bool")
-                    mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
-
-                    inst = {
-                        "category_id": cur_label,  # 0-based label
-                        "bbox": bbox_visib,  # TODO: load both bbox_obj and bbox_visib
-                        "bbox_mode": BoxMode.XYWH_ABS,
-                        "bbox_crop": bbox_128,
-                        "pose": pose,
-                        "quat": quat,
-                        "trans": t,
-                        "centroid_2d": proj,  # absolute (cx, cy)
-                        "segmentation": mask_rle,
-                        "mask_full": mask_full_rle,
-                    }
-                    model_info = self.models_info[str(obj_id)]
-                    inst["model_info"] = model_info
-                    for key in ["bbox3d_and_center"]:
-                        inst[key] = self.models[cur_label][key]
-                    insts.append(inst)
-                if len(insts) == 0:  # filter im without anno
-                    continue
-                record["annotations"] = insts
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        if self.num_to_load > 0:
-            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-            dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
-
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def __len__(self):
-        # return len(self.images)
-        return self.num_to_load
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_CROP_11_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup, eggbox, glue
-################################################################################
-
-SPLITS_LM_CROP = dict(
-    lm_crop_11_train=dict(
-        name="lm_crop_11_train",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_CROP_11_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format("train", _obj))
-            for _obj in LM_CROP_11_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj]))
-            for _obj in LM_CROP_11_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        depth_factor=1000.0,
-        cam_type="dataset",
-        cam=ref.lm_full.camera_matrix,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=False,
-        ref_key="lm_full",
-    ),
-    lm_crop_11_test=dict(
-        name="lm_crop_11_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_CROP_11_OBJECTS,
-        ann_files=[
-            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format("test", _obj))
-            for _obj in LM_CROP_11_OBJECTS
-        ],
-        # NOTE: scene root
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj])
-            for _obj in LM_CROP_11_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        depth_factor=1000.0,
-        cam_type="dataset",
-        cam=ref.lm_full.camera_matrix,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=False,
-        ref_key="lm_full",
-    ),
-)
-
-# single obj splits
-for obj in ref.lm_full.objects:
-    for split in ["train", "test"]:
-        name = "lm_crop_{}_{}".format(obj, split)
-        ann_files = [osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/image_set_lm_crop/lm_crop_{}_{}.txt".format(split, obj))]
-        if split in ["train"]:
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM_CROP:
-            SPLITS_LM_CROP[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                objs=[obj],  # only this obj
-                ann_files=ann_files,
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                depth_factor=1000.0,
-                cam_type="dataset",
-                cam=ref.lm_full.camera_matrix,
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_invalid=False,
-                filter_scene=True,
-                ref_key="lm_full",
-            )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM_CROP:
-        used_cfg = SPLITS_LM_CROP[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_CROP_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM_CROP.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    # python -m this_module lmo_test
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        imH, imW = img.shape[:2]
-        annos = d["annotations"]
-        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
-        bboxes = [anno["bbox"] for anno in annos]
-        bbox_modes = [anno["bbox_mode"] for anno in annos]
-        bboxes_xyxy = np.array(
-            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
-        )
-        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
-        quats = [anno["quat"] for anno in annos]
-        centers_2d = [anno["centroid_2d"] for anno in annos]
-        bboxes_128 = []
-        for center_2d in centers_2d:
-            cx, cy = center_2d
-            bboxes_128.append([cx - 64, cy - 64, cx + 64, cy + 64])
-        bboxes_128 = np.array(bboxes_128)
-        bboxes_128[:, 0] = np.clip(bboxes_128[:, 0], 0, imW - 1)
-        bboxes_128[:, 2] = np.clip(bboxes_128[:, 2], 0, imW - 1)
-        bboxes_128[:, 1] = np.clip(bboxes_128[:, 1], 0, imH - 1)
-        bboxes_128[:, 3] = np.clip(bboxes_128[:, 3], 0, imH - 1)
-
-        transes = [anno["trans"] for anno in annos]
-        Rs = [quat2mat(quat) for quat in quats]
-        # 0-based label
-        cat_ids = [anno["category_id"] for anno in annos]
-        K = d["cam"]
-        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
-        # visualizer = Visualizer(img[:, :, [2,1,0]], metadata=meta)
-        # vis = visualizer.draw_dataset_dict(d)  # TODO: add pose visualization and depth visualization
-        # # fpath = osp.join(dirname, osp.basename(d["file_name"]))
-        # # vis.save(fpath)
-        # img_vis = vis.get_image()[:, :, [2,1,0]]
-        # # TODO: visualize pose and keypoints
-        labels = [objs[cat_id] for cat_id in cat_ids]
-        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
-        img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels)
-        img_vis_kpts2d = img.copy()
-        for anno_i in range(len(annos)):
-            img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i])
-        img_vis_bbox_128 = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_128, labels=labels)
-        # grid_show(
-        #     [img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_bbox_64[:, :, ::-1], img_vis_kpts2d[:, :, [2, 1, 0]], depth],
-        #     ["img", "vis_img", "img_vis_bbox_64", "img_vis_kpts2d", "depth"],
-        #     row=2,
-        #     col=3,
-        # )
-
-        grid_show(
-            [img_vis_bbox_128[:, :, ::-1], depth],
-            ["img_vis_bbox_128", "depth"],
-            row=1,
-            col=2,
-        )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m core.datasets.lm_dataset_d2 dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-    from detectron2.utils.visualizer import Visualizer
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.pysixd import misc
-    from core.utils.data_utils import read_image_mmcv
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from lib.utils.mask_utils import cocosegm2mask
-    from lib.utils.bbox_utils import xywh_to_xyxy
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-    test_vis()
--- a/core/gdrn_modeling/datasets/lm_new_duck_pbr.py
+++ b/core/gdrn_modeling/datasets/lm_new_duck_pbr.py
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
-sys.path.insert(0, PROJ_ROOT)
-
-import ref
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
-from lib.utils.utils import dprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_NEW_DUCK_PBR_Dataset:
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.dataset_root = data_cfg.get(
-            "dataset_root",
-            osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"),
-        )
-        assert osp.exists(self.dataset_root), self.dataset_root
-        self.models_root = data_cfg["models_root"]  # duck_fabi/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]
-        self.with_depth = data_cfg["with_depth"]
-
-        self.height = data_cfg["height"]
-        self.width = data_cfg["width"]
-
-        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
-        self.use_cache = data_cfg.get("use_cache", True)
-        self.num_to_load = data_cfg.get("num_to_load", -1)  # -1
-        self.filter_invalid = data_cfg.get("filter_invalid", True)
-        ##################################################
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_duck_fabi.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-        self.scenes = [f"{i:06d}" for i in range(50)]
-
-    def __call__(self):
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  # ######################################################
-        # it is slow because of loading and converting masks to rle
-        for scene in tqdm(self.scenes):
-            scene_id = int(scene)
-            scene_root = osp.join(self.dataset_root, scene)
-
-            gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
-            gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))
-            cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json"))
-
-            for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"):
-                int_im_id = int(str_im_id)
-                rgb_path = osp.join(scene_root, "rgb/{:06d}.jpg").format(int_im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
-
-                scene_im_id = f"{scene_id}/{int_im_id}"
-
-                K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
-                depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"]  # 10000
-
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": int_im_id,
-                    "scene_im_id": scene_im_id,  # for evaluation
-                    "cam": K,
-                    "depth_factor": depth_factor,
-                    "img_type": "syn_pbr",  # NOTE: has background
-                }
-                insts = []
-                for anno_i, anno in enumerate(gt_dict[str_im_id]):
-                    obj_id = anno["obj_id"]
-                    if obj_id not in self.cat_ids:
-                        continue
-                    cur_label = self.cat2label[obj_id]  # 0-based label
-                    R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
-                    t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
-                    pose = np.hstack([R, t.reshape(3, 1)])
-                    quat = mat2quat(R).astype("float32")
-
-                    proj = (record["cam"] @ t.T).T
-                    proj = proj[:2] / proj[2]
-
-                    bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
-                    bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
-                    x1, y1, w, h = bbox_visib
-                    if self.filter_invalid:
-                        if h <= 1 or w <= 1:
-                            self.num_instances_without_valid_box += 1
-                            continue
-
-                    mask_file = osp.join(
-                        scene_root,
-                        "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                    )
-                    mask_visib_file = osp.join(
-                        scene_root,
-                        "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                    )
-                    assert osp.exists(mask_file), mask_file
-                    assert osp.exists(mask_visib_file), mask_visib_file
-                    # load mask visib
-                    mask_single = mmcv.imread(mask_visib_file, "unchanged")
-                    mask_single = mask_single.astype("bool")
-                    area = mask_single.sum()
-                    if area < 30:  # filter out too small or nearly invisible instances
-                        self.num_instances_without_valid_segmentation += 1
-                        continue
-                    mask_rle = binary_mask_to_rle(mask_single, compressed=True)
-
-                    # load mask full
-                    mask_full = mmcv.imread(mask_file, "unchanged")
-                    mask_full = mask_full.astype("bool")
-                    mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
-
-                    visib_fract = gt_info_dict[str_im_id][anno_i].get("visib_fract", 1.0)
-
-                    inst = {
-                        "category_id": cur_label,  # 0-based label
-                        "bbox": bbox_visib,  # TODO: load both bbox_obj and bbox_visib
-                        "bbox_mode": BoxMode.XYWH_ABS,
-                        "pose": pose,
-                        "quat": quat,
-                        "trans": t,
-                        "centroid_2d": proj,  # absolute (cx, cy)
-                        "segmentation": mask_rle,
-                        "mask_full": mask_full_rle,
-                        "visib_fract": visib_fract,
-                    }
-
-                    model_info = self.models_info[str(obj_id)]
-                    inst["model_info"] = model_info
-                    for key in ["bbox3d_and_center"]:
-                        inst[key] = self.models[cur_label][key]
-                    insts.append(inst)
-                if len(insts) == 0:  # filter im without anno
-                    continue
-                record["annotations"] = insts
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        if self.num_to_load > 0:
-            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-            dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
-
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # logger.info("load cached object models from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_duck_fabi.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-LM_OCC_OBJECTS = [
-    "ape",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-]
-################################################################################
-
-
-SPLITS_LM_NEW_DUCK_PBR = dict(
-    lm_new_duck_pbr_13_train=dict(
-        name="lm_new_duck_pbr_13_train",
-        objs=LM_13_OBJECTS,  # selected objects
-        dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"),
-        models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"),
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=720,
-        width=1280,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_invalid=True,
-        ref_key="lm_duck_fabi",
-    ),
-    lm_new_duck_pbr_8_train=dict(
-        name="lm_new_duck_pbr_8_train",
-        objs=LM_OCC_OBJECTS,  # selected objects
-        dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"),
-        models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"),
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=720,
-        width=1280,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_invalid=True,
-        ref_key="lm_duck_fabi",  # TODO: maybe have bug
-    ),
-)
-
-# single obj splits
-for obj in ref.lm_duck_fabi.objects:
-    for split in ["train"]:
-        name = "lm_new_duck_pbr_{}_{}".format(obj, split)
-        if split in ["train"]:
-            filter_invalid = True
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM_NEW_DUCK_PBR:
-            SPLITS_LM_NEW_DUCK_PBR[name] = dict(
-                name=name,
-                objs=[obj],  # only this obj
-                dataset_root=osp.join(DATASETS_ROOT, "duck_fabi/train_pbr"),
-                models_root=osp.join(DATASETS_ROOT, "duck_fabi/models"),
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=720,
-                width=1280,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_invalid=filter_invalid,
-                ref_key="lm_duck_fabi",
-            )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM_NEW_DUCK_PBR:
-        used_cfg = SPLITS_LM_NEW_DUCK_PBR[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_NEW_DUCK_PBR_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM_NEW_DUCK_PBR.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0
-
-        imH, imW = img.shape[:2]
-        annos = d["annotations"]
-        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
-        bboxes = [anno["bbox"] for anno in annos]
-        bbox_modes = [anno["bbox_mode"] for anno in annos]
-        bboxes_xyxy = np.array(
-            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
-        )
-        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
-        quats = [anno["quat"] for anno in annos]
-        transes = [anno["trans"] for anno in annos]
-        Rs = [quat2mat(quat) for quat in quats]
-        # 0-based label
-        cat_ids = [anno["category_id"] for anno in annos]
-        K = d["cam"]
-        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
-        # # TODO: visualize pose and keypoints
-        labels = [objs[cat_id] for cat_id in cat_ids]
-        for _i in range(len(annos)):
-            img_vis = vis_image_mask_bbox_cv2(
-                img,
-                masks[_i : _i + 1],
-                bboxes=bboxes_xyxy[_i : _i + 1],
-                labels=labels[_i : _i + 1],
-            )
-            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
-
-            grid_show(
-                [
-                    img[:, :, [2, 1, 0]],
-                    img_vis[:, :, [2, 1, 0]],
-                    img_vis_kpts2d[:, :, [2, 1, 0]],
-                    depth,
-                ],
-                ["img", "vis_img", "img_vis_kpts2d", "depth"],
-                row=2,
-                col=2,
-            )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m core.gdrn_modeling.datasets.lm_new_duck_pbr dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-
-    test_vis()
--- a/core/gdrn_modeling/datasets/lm_syn_egl.py
+++ b/core/gdrn_modeling/datasets/lm_syn_egl.py
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
-sys.path.insert(0, PROJ_ROOT)
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-import ref
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask, mask2bbox_xywh
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_SYN_EGL_Dataset(object):
-    """lm synthetic data by egl renderer."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.dataset_root = data_cfg.get("dataset_root", osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl"))
-        self.xyz_root = data_cfg.get("xyz_root", osp.join(self.dataset_root, "xyz_crop"))
-        assert osp.exists(self.dataset_root), self.dataset_root
-
-        self.gt_path = osp.join(self.dataset_root, "gt.json")
-        assert osp.exists(self.gt_path)
-
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]
-        self.with_depth = data_cfg["with_depth"]
-        self.depth_factor = data_cfg.get("depth_factor", 10000.0)
-
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
-        self.use_cache = data_cfg.get("use_cache", True)
-        self.num_to_load = data_cfg["num_to_load"]  # -1
-        self.filter_invalid = data_cfg.get("filter_invalid", True)
-        ##################################################
-        self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_SYN_EGL_Dataset
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    self.num_to_load,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name))
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  #######################################################
-        gt_dict = mmcv.load(self.gt_path)
-        unique_im_id = 0
-        for str_im_id, annos in tqdm(gt_dict.items()):
-            int_im_id = int(str_im_id)
-
-            rgb_path = osp.join(self.dataset_root, "rgb/{:06d}.jpg".format(int_im_id))
-            assert osp.exists(rgb_path), rgb_path
-
-            depth_path = osp.join(self.dataset_root, "depth/{:06d}.png".format(int_im_id))
-            record = {
-                "dataset_name": self.name,
-                "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                "height": self.height,
-                "width": self.width,
-                "image_id": unique_im_id,
-                "scene_im_id": f"0/{int_im_id}",  # for pose evaluation
-                "cam": self.cam,
-                "depth_factor": self.depth_factor,
-                "img_type": "syn_egl",  # NOTE: has background
-            }
-            unique_im_id += 1
-            insts = []
-            for anno_i, anno in enumerate(annos):
-                obj_id = anno["obj_id"]
-                if obj_id not in self.cat_ids:
-                    continue
-                cur_label = self.cat2label[obj_id]  # 0-based label
-                pose = np.array(anno["pose"])
-                R = pose[:3, :3]
-                t = pose[:3, 3]
-                quat = mat2quat(R).astype("float32")
-                proj = (record["cam"] @ t.T).T
-                proj = proj[:2] / proj[2]
-
-                mask_vis_rle = anno["mask_visib"]
-                mask_full_rle = anno["mask_full"]
-                bbox = anno["bbox"]
-
-                x1, y1, w, h = bbox
-                if self.filter_invalid:
-                    if h <= 1 or w <= 1:
-                        self.num_instances_without_valid_box += 1
-                        continue
-                mask_vis = cocosegm2mask(mask_vis_rle, self.height, self.width)
-                vis_area = mask_vis.sum()
-                if vis_area < 30:  # filter out too small or nearly invisible instances
-                    self.num_instances_without_valid_segmentation += 1
-                    continue
-                mask_full = cocosegm2mask(mask_full_rle, self.height, self.width)
-                full_area = mask_full.sum()
-                if full_area > 0:
-                    visib_fract = vis_area / full_area
-                else:
-                    visib_fract = 0
-
-                xyz_path = osp.join(self.xyz_root, f"{int_im_id:06d}_{anno_i:06d}-xyz.pkl")
-                assert osp.exists(xyz_path), xyz_path
-                inst = {
-                    "category_id": cur_label,  # 0-based label
-                    "bbox": bbox,  # TODO: load both bbox_obj and bbox_visib
-                    "bbox_mode": BoxMode.XYWH_ABS,
-                    "pose": pose,
-                    "quat": quat,
-                    "trans": t,
-                    "centroid_2d": proj,  # absolute (cx, cy)
-                    "segmentation": mask_vis_rle,
-                    "mask_full": mask_full_rle,
-                    "visib_fract": visib_fract,
-                    "xyz_path": xyz_path,
-                }
-
-                model_info = self.models_info[str(obj_id)]
-                inst["model_info"] = model_info
-                # TODO: using full mask and full xyz
-                for key in ["bbox3d_and_center"]:
-                    inst[key] = self.models[cur_label][key]
-                insts.append(inst)
-            if len(insts) == 0:  # filter im without anno
-                continue
-            record["annotations"] = insts
-            dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        if self.num_to_load > 0:
-            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-            dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
-
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-LM_OCC_OBJECTS = [
-    "ape",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-]
-lm_model_root = "BOP_DATASETS/lm/models/"
-lmo_model_root = "BOP_DATASETS/lmo/models/"
-################################################################################
-
-SPLITS_LM_EGL = dict(
-    lm_egl_13_train=dict(
-        name="lm_egl_13_train",
-        objs=LM_13_OBJECTS,  # selected objects
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl/xyz_crop"),
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        depth_factor=10000.0,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        ref_key="lm_full",
-    ),
-    lmo_egl_train=dict(
-        name="lmo_egl_train",
-        objs=LM_OCC_OBJECTS,  # selected objects
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        # NOTE: soft link to lm/train_egl
-        xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl/xyz_crop"),
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        depth_factor=10000.0,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_invalid=True,
-        ref_key="lmo_full",
-    ),
-)
-
-# single obj splits
-for obj in ref.lm_full.objects:
-    for split in ["train"]:
-        name = "lm_egl_{}_{}".format(obj, split)
-        if split in ["train"]:
-            filter_invalid = True
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM_EGL:
-            SPLITS_LM_EGL[name] = dict(
-                name=name,
-                objs=[obj],  # only this obj
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/train_egl/xyz_crop"),
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                depth_factor=10000.0,
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_invalid=filter_invalid,
-                ref_key="lm_full",
-            )
-
-# lmo single objs
-for obj in ref.lmo_full.objects:
-    for split in ["train"]:
-        name = "lmo_egl_{}_{}".format(obj, split)
-        if split in ["train"]:
-            filter_invalid = True
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM_EGL:
-            SPLITS_LM_EGL[name] = dict(
-                name=name,
-                objs=[obj],  # only this obj
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                # NOTE: soft link to lm/train_egl
-                xyz_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/train_egl/xyz_crop"),
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                depth_factor=10000.0,
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_invalid=filter_invalid,
-                ref_key="lmo_full",
-            )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM_EGL:
-        used_cfg = SPLITS_LM_EGL[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_SYN_EGL_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM_EGL.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 10000.0
-
-        imH, imW = img.shape[:2]
-        annos = d["annotations"]
-        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
-        bboxes = [anno["bbox"] for anno in annos]
-        bbox_modes = [anno["bbox_mode"] for anno in annos]
-        bboxes_xyxy = np.array(
-            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
-        )
-        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
-        quats = [anno["quat"] for anno in annos]
-        transes = [anno["trans"] for anno in annos]
-        Rs = [quat2mat(quat) for quat in quats]
-        # 0-based label
-        cat_ids = [anno["category_id"] for anno in annos]
-        K = d["cam"]
-        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
-        # # TODO: visualize pose and keypoints
-        labels = [objs[cat_id] for cat_id in cat_ids]
-        for _i in range(len(annos)):
-            img_vis = vis_image_mask_bbox_cv2(
-                img,
-                masks[_i : _i + 1],
-                bboxes=bboxes_xyxy[_i : _i + 1],
-                labels=labels[_i : _i + 1],
-            )
-            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
-            xyz_path = annos[_i]["xyz_path"]
-            xyz_info = mmcv.load(xyz_path)
-            x1, y1, x2, y2 = xyz_info["xyxy"]
-            xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
-            xyz = np.zeros((imH, imW, 3), dtype=np.float32)
-            xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
-            xyz_show = get_emb_show(xyz)
-            xyz_crop_show = get_emb_show(xyz_crop)
-            img_xyz = img.copy() / 255.0
-            mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
-            fg_idx = np.where(mask_xyz != 0)
-            img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
-            img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
-            img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
-            # diff mask
-            diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]
-
-            grid_show(
-                [
-                    img[:, :, [2, 1, 0]],
-                    img_vis[:, :, [2, 1, 0]],
-                    img_vis_kpts2d[:, :, [2, 1, 0]],
-                    depth,
-                    # xyz_show,
-                    diff_mask_xyz,
-                    xyz_crop_show,
-                    img_xyz[:, :, [2, 1, 0]],
-                    img_xyz_crop[:, :, [2, 1, 0]],
-                    img_vis_crop,
-                ],
-                [
-                    "img",
-                    "vis_img",
-                    "img_vis_kpts2d",
-                    "depth",
-                    "diff_mask_xyz",
-                    "xyz_crop_show",
-                    "img_xyz",
-                    "img_xyz_crop",
-                    "img_vis_crop",
-                ],
-                row=3,
-                col=3,
-            )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m this_module dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.utils import get_emb_show
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-
-    test_vis()
--- a/core/gdrn_modeling/datasets/lm_syn_imgn.py
+++ b/core/gdrn_modeling/datasets/lm_syn_imgn.py
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../.."))
-sys.path.insert(0, PROJ_ROOT)
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-import random
-import ref
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import (
-    binary_mask_to_rle,
-    cocosegm2mask,
-    mask2bbox_xywh,
-)
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_SYN_IMGN_Dataset(object):
-    """lm synthetic data, imgn(imagine) from DeepIM."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.ann_files = data_cfg["ann_files"]  # idx files with image ids
-        self.image_prefixes = data_cfg["image_prefixes"]
-        self.xyz_prefixes = data_cfg["xyz_prefixes"]
-
-        self.dataset_root = data_cfg["dataset_root"]  # lm_imgn
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
-        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
-        self.depth_factor = data_cfg["depth_factor"]  # 1000.0
-
-        self.cam = data_cfg["cam"]  #
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg["cache_dir"]  # .cache
-        self.use_cache = data_cfg["use_cache"]  # True
-        # sample uniformly to get n items
-        self.n_per_obj = data_cfg.get("n_per_obj", 1000)
-        self.filter_invalid = data_cfg["filter_invalid"]
-        self.filter_scene = data_cfg.get("filter_scene", False)
-        ##################################################
-        if self.cam is None:
-            self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_SYN_IMGN_Dataset
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    self.n_per_obj,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.dataset_root,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  #######################################################
-        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
-        assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}"
-        for ann_file, scene_root, xyz_root in zip(self.ann_files, self.image_prefixes, self.xyz_prefixes):
-            # linemod each scene is an object
-            with open(ann_file, "r") as f_ann:
-                indices = [line.strip("\r\n").split()[-1] for line in f_ann.readlines()]  # string ids
-            # sample uniformly (equal space)
-            if self.n_per_obj > 0:
-                sample_num = min(self.n_per_obj, len(indices))
-                sel_indices_idx = np.linspace(0, len(indices) - 1, sample_num, dtype=np.int32)
-                sel_indices = [indices[int(_i)] for _i in sel_indices_idx]
-            else:
-                sel_indices = indices
-
-            for im_id in tqdm(sel_indices):
-                rgb_path = osp.join(scene_root, "{}-color.png").format(im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "{}-depth.png".format(im_id))
-
-                obj_name = im_id.split("/")[0]
-                if obj_name == "benchviseblue":
-                    obj_name = "benchvise"
-                obj_id = ref.lm_full.obj2id[obj_name]
-                if self.filter_scene:
-                    if obj_name not in self.objs:
-                        continue
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": im_id.split("/")[-1],
-                    "scene_im_id": im_id,
-                    "cam": self.cam,
-                    "img_type": "syn",
-                }
-
-                cur_label = self.obj2label[obj_name]  # 0-based label
-                pose_path = osp.join(scene_root, "{}-pose.txt".format(im_id))
-                pose = np.loadtxt(pose_path, skiprows=1)
-                R = pose[:3, :3]
-                t = pose[:3, 3]
-                quat = mat2quat(R).astype("float32")
-                proj = (record["cam"] @ t.T).T
-                proj = proj[:2] / proj[2]
-
-                depth = mmcv.imread(depth_path, "unchanged") / 1000.0
-                mask = (depth > 0).astype(np.uint8)
-
-                bbox_obj = mask2bbox_xywh(mask)
-                x1, y1, w, h = bbox_obj
-                if self.filter_invalid:
-                    if h <= 1 or w <= 1:
-                        self.num_instances_without_valid_box += 1
-                        continue
-                area = mask.sum()
-                if area < 3:  # filter out too small or nearly invisible instances
-                    self.num_instances_without_valid_segmentation += 1
-                    continue
-                mask_rle = binary_mask_to_rle(mask, compressed=True)
-
-                xyz_path = osp.join(xyz_root, f"{im_id}-xyz.pkl")
-                assert osp.exists(xyz_path), xyz_path
-                inst = {
-                    "category_id": cur_label,  # 0-based label
-                    "bbox": bbox_obj,  # TODO: load both bbox_obj and bbox_visib
-                    "bbox_mode": BoxMode.XYWH_ABS,
-                    "pose": pose,
-                    "quat": quat,
-                    "trans": t,
-                    "centroid_2d": proj,  # absolute (cx, cy)
-                    "segmentation": mask_rle,
-                    "mask_full": mask_rle,  # only one object
-                    "visib_fract": 1.0,
-                    "xyz_path": xyz_path,
-                }
-
-                model_info = self.models_info[str(obj_id)]
-                inst["model_info"] = model_info
-                # TODO: using full mask and full xyz
-                for key in ["bbox3d_and_center"]:
-                    inst[key] = self.models[cur_label][key]
-                record["annotations"] = [inst]
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        # if self.num_to_load > 0:
-        #     self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-        #     random.shuffle(dataset_dicts)
-        #     dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info(
-            "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
-        )
-
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # logger.info("load cached object models from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        # return 1
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-################################################################################
-
-SPLITS_LM_IMGN_13 = dict(
-    lm_imgn_13_train_1k_per_obj=dict(
-        name="lm_imgn_13_train_1k_per_obj",
-        dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "lm_imgn/image_set/{}_{}.txt".format("train", _obj),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn") for _obj in LM_13_OBJECTS],
-        xyz_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/xyz_crop_imgn/") for _obj in LM_13_OBJECTS],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        depth_factor=1000.0,
-        cam=ref.lm_full.camera_matrix,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        n_per_obj=1000,  # 1000 per class
-        filter_scene=True,
-        filter_invalid=False,
-        ref_key="lm_full",
-    )
-)
-
-# single obj splits
-for obj in ref.lm_full.objects:
-    for split in ["train"]:
-        name = "lm_imgn_13_{}_{}_1k".format(obj, split)
-        ann_files = [osp.join(DATASETS_ROOT, "lm_imgn/image_set/{}_{}.txt".format(split, obj))]
-        if split in ["train"]:
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM_IMGN_13:
-            SPLITS_LM_IMGN_13[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                objs=[obj],  # only this obj
-                ann_files=ann_files,
-                image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn/")],
-                xyz_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/xyz_crop_imgn/")],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                depth_factor=1000.0,
-                cam=ref.lm_full.camera_matrix,
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                n_per_obj=1000,
-                filter_invalid=False,
-                filter_scene=True,
-                ref_key="lm_full",
-            )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM_IMGN_13:
-        used_cfg = SPLITS_LM_IMGN_13[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_SYN_IMGN_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM_IMGN_13.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        anno = d["annotations"][0]  # only one instance per image
-        imH, imW = img.shape[:2]
-        mask = cocosegm2mask(anno["segmentation"], imH, imW)
-        bbox = anno["bbox"]
-        bbox_mode = anno["bbox_mode"]
-        bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
-        kpt3d = anno["bbox3d_and_center"]
-        quat = anno["quat"]
-        trans = anno["trans"]
-        R = quat2mat(quat)
-        # 0-based label
-        cat_id = anno["category_id"]
-        K = d["cam"]
-        kpt_2d = misc.project_pts(kpt3d, K, R, trans)
-        # # TODO: visualize pose and keypoints
-        label = objs[cat_id]
-        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
-        img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
-        img_vis_kpt2d = img.copy()
-        img_vis_kpt2d = misc.draw_projected_box3d(
-            img_vis_kpt2d,
-            kpt_2d,
-            middle_color=None,
-            bottom_color=(128, 128, 128),
-        )
-
-        xyz_info = mmcv.load(anno["xyz_path"])
-        xyz = np.zeros((imH, imW, 3), dtype=np.float32)
-        xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
-        x1, y1, x2, y2 = xyz_info["xyxy"]
-        xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
-        xyz_show = get_emb_show(xyz)
-
-        grid_show(
-            [
-                img[:, :, [2, 1, 0]],
-                img_vis[:, :, [2, 1, 0]],
-                img_vis_kpt2d[:, :, [2, 1, 0]],
-                depth,
-                xyz_show,
-            ],
-            ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"],
-            row=2,
-            col=3,
-        )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m core.datasets.lm_syn_imgn dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.utils import get_emb_show
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-    test_vis()
--- a/det/yolox/data/datasets/dataset_factory.py
+++ b/det/yolox/data/datasets/dataset_factory.py
@@ -3,12 +3,7 @@ import os.path as osp
 import mmcv
 from detectron2.data import DatasetCatalog
 from . import (
-    lm_syn_imgn,
-    lm_dataset_d2,
-    # lm_syn_egl,
    lm_pbr,
-    lm_blender,
-    # lm_dataset_crop_d2,
    ycbv_pbr,
    ycbv_d2,
    ycbv_bop_test,
@@ -16,8 +11,6 @@ from . import (
    hb_bop_val,
    hb_bop_test,
    hb_bench_driller_phone_d2,
-    # duck_frames,
-    # lm_new_duck_pbr,
    tudl_train_real,
    tudl_pbr,
    tudl_bop_test,

--- a/det/yolox/data/datasets/lm_blender.py
+++ b/det/yolox/data/datasets/lm_blender.py
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
-sys.path.insert(0, PROJ_ROOT)
-import ref
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
-from lib.utils.utils import dprint, lazy_property
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_BLENDER_Dataset(object):
-    """lm blender data, from pvnet-rendering."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.ann_files = data_cfg["ann_files"]  # json files with image ids and pose/bbox
-        self.image_prefixes = data_cfg["image_prefixes"]
-
-        self.dataset_root = data_cfg["dataset_root"]  # lm_renders_blender/
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
-        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
-        self.depth_factor = data_cfg["depth_factor"]  # 1000.0
-
-        self.cam = data_cfg["cam"]  #
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg["cache_dir"]  # .cache
-        self.use_cache = data_cfg["use_cache"]  # True
-        # sample uniformly to get n items
-        self.n_per_obj = data_cfg.get("n_per_obj", 10000)
-        self.filter_invalid = data_cfg["filter_invalid"]
-        ##################################################
-        if self.cam is None:
-            self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_BLENDER
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    self.n_per_obj,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  #######################################################
-        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
-
-        for ann_file, scene_root in zip(tqdm(self.ann_files), self.image_prefixes):
-            # each scene is an object
-            assert osp.exists(ann_file), ann_file
-            scene_gt_dict = mmcv.load(ann_file)
-            # sample uniformly (equal space)
-            indices = list(scene_gt_dict.keys())
-            if self.n_per_obj > 0:
-                sample_num = min(self.n_per_obj, len(scene_gt_dict))
-                sel_indices_idx = np.linspace(0, len(scene_gt_dict) - 1, sample_num, dtype=np.int32)
-                sel_indices = [indices[int(_i)] for _i in sel_indices_idx]
-            else:
-                sel_indices = indices
-
-            for str_im_id in tqdm(sel_indices):
-                int_im_id = int(str_im_id)
-                rgb_path = osp.join(scene_root, "{}.jpg").format(str_im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "{}_depth_opengl.png".format(str_im_id))
-
-                obj_name = osp.basename(ann_file).split("_")[0]  # obj_gt.json
-                obj_id = ref.lm_full.obj2id[obj_name]
-                if obj_name not in self.objs:
-                    continue
-
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": int_im_id,
-                    "scene_im_id": f"{obj_id}/{int_im_id}",
-                    "cam": self.cam,
-                    "img_type": "syn_blender",  # has bg
-                }
-
-                cur_label = self.obj2label[obj_name]  # 0-based label
-                anno = scene_gt_dict[str_im_id][0]  # only one object
-                R = np.array(anno["cam_R_m2c"]).reshape(3, 3)
-                t = np.array(anno["cam_t_m2c"]).reshape(-1) / 1000
-                pose = np.hstack([R, t.reshape(3, 1)])
-                quat = mat2quat(R).astype("float32")
-                proj = (record["cam"] @ t.T).T
-                proj = proj[:2] / proj[2]
-
-                bbox_visib = anno["bbox_visib"]
-                x1, y1, w, h = bbox_visib
-                if self.filter_invalid:
-                    if h <= 1 or w <= 1:
-                        self.num_instances_without_valid_box += 1
-                        continue
-
-                mask_path = osp.join(scene_root, "{}_mask_opengl.png".format(str_im_id))
-                mask = mmcv.imread(mask_path, "unchanged")
-                mask = (mask > 0).astype(np.uint8)
-
-                area = mask.sum()
-                if area < 3:  # filter out too small or nearly invisible instances
-                    self.num_instances_without_valid_segmentation += 1
-                    continue
-                mask_rle = binary_mask_to_rle(mask, compressed=True)
-
-                visib_fract = anno.get("visib_fract", 1.0)
-                inst = {
-                    "category_id": cur_label,  # 0-based label
-                    "bbox": bbox_visib,  # TODO: load both bbox_obj and bbox_visib
-                    "bbox_mode": BoxMode.XYWH_ABS,
-                    "pose": pose,
-                    "quat": quat,
-                    "trans": t,
-                    "centroid_2d": proj,  # absolute (cx, cy)
-                    "segmentation": mask_rle,
-                    "visib_fract": visib_fract,
-                }
-
-                model_info = self.models_info[str(obj_id)]
-                inst["model_info"] = model_info
-                for key in ["bbox3d_and_center"]:
-                    inst[key] = self.models[cur_label][key]
-                record["annotations"] = [inst]
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        # if self.num_to_load > 0:
-        #     self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-        #     random.shuffle(dataset_dicts)
-        #     dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info(
-            "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
-        )
-
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-LM_OCC_OBJECTS = [
-    "ape",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-]
-################################################################################
-
-SPLITS_LM_BLENDER = dict(
-    lm_blender_13_train=dict(
-        name="lm_blender_13_train",  # BB8 training set
-        dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "lm_renders_blender/renders/{}_gt.json".format(_obj),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_13_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        depth_factor=1000.0,
-        cam=ref.lm_full.camera_matrix,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        n_per_obj=-1,  # num per class, -1 for all 10k
-        filter_invalid=False,
-        ref_key="lm_full",
-    ),
-    lmo_blender_train=dict(
-        name="lmo_blender_train",
-        dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "lm_renders_blender/renders/{}_gt.json".format(_obj),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "lm_renders_blender/renders/{}".format(_obj)) for _obj in LM_OCC_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        depth_factor=1000.0,
-        cam=ref.lmo_full.camera_matrix,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        n_per_obj=-1,  # n per class, -1 for all 10k
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-)
-
-# single obj splits
-for obj in ref.lm_full.objects:
-    for split in ["train"]:
-        for name_prefix in ["lm", "lmo"]:
-            name = "{}_blender_{}_{}".format(name_prefix, obj, split)
-            ref_key = f"{name_prefix}_full"
-            ann_files = [
-                osp.join(
-                    DATASETS_ROOT,
-                    "lm_renders_blender/renders/{}_gt.json".format(obj),
-                )
-            ]
-            if split in ["train"]:
-                filter_invalid = True
-            else:
-                raise ValueError("{}".format(split))
-            if name not in SPLITS_LM_BLENDER:
-                SPLITS_LM_BLENDER[name] = dict(
-                    name=name,
-                    dataset_root=osp.join(DATASETS_ROOT, "lm_renders_blender/"),
-                    models_root=osp.join(DATASETS_ROOT, f"BOP_DATASETS/{name_prefix}/models"),
-                    objs=[obj],  # only this obj
-                    ann_files=ann_files,
-                    image_prefixes=[osp.join(DATASETS_ROOT, f"lm_renders_blender/renders/{obj}")],
-                    scale_to_meter=0.001,
-                    with_masks=True,  # (load masks but may not use it)
-                    with_depth=True,  # (load depth path here, but may not use it)
-                    depth_factor=1000.0,
-                    cam=ref.__dict__[ref_key].camera_matrix,
-                    height=480,
-                    width=640,
-                    cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                    use_cache=True,
-                    n_per_obj=-1,
-                    filter_invalid=False,
-                    ref_key=ref_key,
-                )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM_BLENDER:
-        used_cfg = SPLITS_LM_BLENDER[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_BLENDER_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="coco_bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM_BLENDER.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        anno = d["annotations"][0]  # only one instance per image
-        imH, imW = img.shape[:2]
-        mask = cocosegm2mask(anno["segmentation"], imH, imW)
-        bbox = anno["bbox"]
-        bbox_mode = anno["bbox_mode"]
-        bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
-        kpt3d = anno["bbox3d_and_center"]
-        quat = anno["quat"]
-        trans = anno["trans"]
-        R = quat2mat(quat)
-        # 0-based label
-        cat_id = anno["category_id"]
-        K = d["cam"]
-        kpt_2d = misc.project_pts(kpt3d, K, R, trans)
-        # # TODO: visualize pose and keypoints
-        label = objs[cat_id]
-        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
-        img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
-        img_vis_kpt2d = img.copy()
-        img_vis_kpt2d = misc.draw_projected_box3d(
-            img_vis_kpt2d,
-            kpt_2d,
-            middle_color=None,
-            bottom_color=(128, 128, 128),
-        )
-
-        grid_show(
-            [
-                img[:, :, [2, 1, 0]],
-                img_vis[:, :, [2, 1, 0]],
-                img_vis_kpt2d[:, :, [2, 1, 0]],
-                depth,
-            ],
-            ["img", "vis_img", "img_vis_kpts2d", "depth"],
-            row=2,
-            col=2,
-        )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m core.datasets.lm_blender dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-    test_vis()
--- a/det/yolox/data/datasets/lm_dataset_d2.py
+++ b/det/yolox/data/datasets/lm_dataset_d2.py
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
-sys.path.insert(0, PROJ_ROOT)
-
-import ref
-
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import binary_mask_to_rle, cocosegm2mask
-from lib.utils.utils import dprint, iprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_Dataset(object):
-    """lm splits."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.ann_files = data_cfg["ann_files"]  # idx files with image ids
-        self.image_prefixes = data_cfg["image_prefixes"]
-        self.xyz_prefixes = data_cfg["xyz_prefixes"]
-
-        self.dataset_root = data_cfg["dataset_root"]  # BOP_DATASETS/lm/
-        assert osp.exists(self.dataset_root), self.dataset_root
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
-        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
-
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg.get("cache_dir", osp.join(PROJ_ROOT, ".cache"))  # .cache
-        self.use_cache = data_cfg.get("use_cache", True)
-        self.num_to_load = data_cfg["num_to_load"]  # -1
-        self.filter_invalid = data_cfg["filter_invalid"]
-        self.filter_scene = data_cfg.get("filter_scene", False)
-        self.debug_im_id = data_cfg.get("debug_im_id", None)
-        ##################################################
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_Dataset
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.cache_dir,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  # ######################################################
-        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
-        assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}"
-        unique_im_id = 0
-        for ann_file, scene_root, xyz_root in zip(tqdm(self.ann_files), self.image_prefixes, self.xyz_prefixes):
-            # linemod each scene is an object
-            with open(ann_file, "r") as f_ann:
-                indices = [line.strip("\r\n") for line in f_ann.readlines()]  # string ids
-            gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json"))
-            gt_info_dict = mmcv.load(osp.join(scene_root, "scene_gt_info.json"))  # bbox_obj, bbox_visib
-            cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json"))
-            for im_id in tqdm(indices):
-                int_im_id = int(im_id)
-                str_im_id = str(int_im_id)
-                rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id))
-
-                scene_id = int(rgb_path.split("/")[-3])
-                scene_im_id = f"{scene_id}/{int_im_id}"
-
-                if self.debug_im_id is not None:
-                    if self.debug_im_id != scene_im_id:
-                        continue
-
-                K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3)
-                depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"]
-                if self.filter_scene:
-                    if scene_id not in self.cat_ids:
-                        continue
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": unique_im_id,
-                    "scene_im_id": scene_im_id,  # for evaluation
-                    "cam": K,
-                    "depth_factor": depth_factor,
-                    "img_type": "real",
-                }
-                unique_im_id += 1
-                insts = []
-                for anno_i, anno in enumerate(gt_dict[str_im_id]):
-                    obj_id = anno["obj_id"]
-                    if obj_id not in self.cat_ids:
-                        continue
-                    cur_label = self.cat2label[obj_id]  # 0-based label
-                    R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3)
-                    t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0
-                    pose = np.hstack([R, t.reshape(3, 1)])
-                    quat = mat2quat(R).astype("float32")
-
-                    proj = (record["cam"] @ t.T).T
-                    proj = proj[:2] / proj[2]
-
-                    bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"]
-                    bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"]
-                    x1, y1, w, h = bbox_visib
-                    if self.filter_invalid:
-                        if h <= 1 or w <= 1:
-                            self.num_instances_without_valid_box += 1
-                            continue
-
-                    mask_file = osp.join(
-                        scene_root,
-                        "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                    )
-                    mask_visib_file = osp.join(
-                        scene_root,
-                        "mask_visib/{:06d}_{:06d}.png".format(int_im_id, anno_i),
-                    )
-                    assert osp.exists(mask_file), mask_file
-                    assert osp.exists(mask_visib_file), mask_visib_file
-                    # load mask visib
-                    mask_single = mmcv.imread(mask_visib_file, "unchanged")
-                    mask_single = mask_single.astype("bool")
-                    area = mask_single.sum()
-                    if area < 3:  # filter out too small or nearly invisible instances
-                        self.num_instances_without_valid_segmentation += 1
-                        continue
-                    mask_rle = binary_mask_to_rle(mask_single, compressed=True)
-                    # load mask full
-                    mask_full = mmcv.imread(mask_file, "unchanged")
-                    mask_full = mask_full.astype("bool")
-                    mask_full_rle = binary_mask_to_rle(mask_full, compressed=True)
-
-                    inst = {
-                        "category_id": cur_label,  # 0-based label
-                        "bbox": bbox_obj,  # TODO: load both bbox_obj and bbox_visib
-                        "bbox_mode": BoxMode.XYWH_ABS,
-                        "pose": pose,
-                        "quat": quat,
-                        "trans": t,
-                        "centroid_2d": proj,  # absolute (cx, cy)
-                        "segmentation": mask_rle,
-                        "mask_full": mask_full_rle,
-                    }
-
-                    if "test" not in self.name.lower():
-                        # if True:
-                        xyz_path = osp.join(xyz_root, f"{int_im_id:06d}_{anno_i:06d}.pkl")
-                        assert osp.exists(xyz_path), xyz_path
-                        inst["xyz_path"] = xyz_path
-
-                    model_info = self.models_info[str(obj_id)]
-                    inst["model_info"] = model_info
-                    # TODO: using full mask and full xyz
-                    for key in ["bbox3d_and_center"]:
-                        inst[key] = self.models[cur_label][key]
-                    insts.append(inst)
-                if len(insts) == 0:  # filter im without anno
-                    continue
-                record["annotations"] = insts
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        if self.num_to_load > 0:
-            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-            dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info("loaded {} dataset dicts, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start))
-
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.cache_dir, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # dprint("{}: load cached object models from {}".format(self.name, cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.mkdir_or_exist(osp.dirname(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-LM_OCC_OBJECTS = [
-    "ape",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-]
-################################################################################
-
-SPLITS_LM = dict(
-    lm_13_train=dict(
-        name="lm_13_train",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "train"),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=True,
-        ref_key="lm_full",
-    ),
-    lm_13_test=dict(
-        name="lm_13_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "test"),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        # NOTE: scene root
-        image_prefixes=[
-            osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[_obj])
-            for _obj in LM_13_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[_obj]),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=False,
-        ref_key="lm_full",
-    ),
-    lmo_train=dict(
-        name="lmo_train",
-        # use lm real all (8 objects) to train for lmo
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_OCC_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(_obj, "all"),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        image_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/{:06d}".format(ref.lmo_full.obj2id[_obj]),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lmo_full.obj2id[_obj]),
-            )
-            for _obj in LM_OCC_OBJECTS
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=True,
-        filter_invalid=True,
-        ref_key="lmo_full",
-    ),
-    lmo_NoBopTest_train=dict(
-        name="lmo_NoBopTest_train",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt")],
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2),
-            )
-        ],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=True,
-        ref_key="lmo_full",
-    ),
-    lmo_test=dict(
-        name="lmo_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_test.txt")],
-        # NOTE: scene root
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[None],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-    lmo_bop_test=dict(
-        name="lmo_bop_test",
-        dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-        objs=LM_OCC_OBJECTS,
-        ann_files=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt")],
-        # NOTE: scene root
-        image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-        xyz_prefixes=[None],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        num_to_load=-1,
-        filter_scene=False,
-        filter_invalid=False,
-        ref_key="lmo_full",
-    ),
-)
-
-# single obj splits for lm real
-for obj in ref.lm_full.objects:
-    for split in ["train", "test", "all"]:
-        name = "lm_real_{}_{}".format(obj, split)
-        ann_files = [
-            osp.join(
-                DATASETS_ROOT,
-                "BOP_DATASETS/lm/image_set/{}_{}.txt".format(obj, split),
-            )
-        ]
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                objs=[obj],  # only this obj
-                ann_files=ann_files,
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])],
-                xyz_prefixes=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]),
-                    )
-                ],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_invalid=filter_invalid,
-                filter_scene=True,
-                ref_key="lm_full",
-            )
-
-# single obj splits for lmo_NoBopTest_train
-for obj in ref.lmo_full.objects:
-    for split in ["train"]:
-        name = "lmo_NoBopTest_{}_{}".format(obj, split)
-        if split in ["train"]:
-            filter_invalid = True
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_no_bop_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/test/xyz_crop/{:06d}".format(2),
-                    )
-                ],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=filter_invalid,
-                ref_key="lmo_full",
-            )
-
-# single obj splits for lmo_test
-for obj in ref.lmo_full.objects:
-    for split in ["test"]:
-        name = "lmo_{}_{}".format(obj, split)
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[None],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=False,
-                ref_key="lmo_full",
-            )
-
-# single obj splits for lmo_bop_test
-for obj in ref.lmo_full.objects:
-    for split in ["test"]:
-        name = "lmo_{}_bop_{}".format(obj, split)
-        if split in ["train", "all"]:  # all is used to train lmo
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM:
-            SPLITS_LM[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/models"),
-                objs=[obj],
-                ann_files=[
-                    osp.join(
-                        DATASETS_ROOT,
-                        "BOP_DATASETS/lmo/image_set/lmo_bop_test.txt",
-                    )
-                ],
-                # NOTE: scene root
-                image_prefixes=[osp.join(DATASETS_ROOT, "BOP_DATASETS/lmo/test/{:06d}").format(2)],
-                xyz_prefixes=[None],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                num_to_load=-1,
-                filter_scene=False,
-                filter_invalid=False,
-                ref_key="lmo_full",
-            )
-
-# ================ add single image dataset for debug =======================================
-debug_im_ids = {
-    "train": {obj: [] for obj in ref.lm_full.objects},
-    "test": {obj: [] for obj in ref.lm_full.objects},
-}
-for obj in ref.lm_full.objects:
-    for split in ["train", "test"]:
-        cur_ann_file = osp.join(DATASETS_ROOT, f"BOP_DATASETS/lm/image_set/{obj}_{split}.txt")
-        ann_files = [cur_ann_file]
-
-        im_ids = []
-        with open(cur_ann_file, "r") as f:
-            for line in f:
-                # scene_id(obj_id)/im_id
-                im_ids.append("{}/{}".format(ref.lm_full.obj2id[obj], int(line.strip("\r\n"))))
-
-        debug_im_ids[split][obj] = im_ids
-        for debug_im_id in debug_im_ids[split][obj]:
-            name = "lm_single_{}{}_{}".format(obj, debug_im_id.split("/")[1], split)
-            if name not in SPLITS_LM:
-                SPLITS_LM[name] = dict(
-                    name=name,
-                    dataset_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/"),
-                    models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                    objs=[obj],  # only this obj
-                    ann_files=ann_files,
-                    image_prefixes=[
-                        osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/test/{:06d}").format(ref.lm_full.obj2id[obj])
-                    ],
-                    xyz_prefixes=[
-                        osp.join(
-                            DATASETS_ROOT,
-                            "BOP_DATASETS/lm/test/xyz_crop/{:06d}".format(ref.lm_full.obj2id[obj]),
-                        )
-                    ],
-                    scale_to_meter=0.001,
-                    with_masks=True,  # (load masks but may not use it)
-                    with_depth=True,  # (load depth path here, but may not use it)
-                    height=480,
-                    width=640,
-                    cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                    use_cache=True,
-                    num_to_load=-1,
-                    filter_invalid=False,
-                    filter_scene=True,
-                    ref_key="lm_full",
-                    debug_im_id=debug_im_id,  # NOTE: debug im id
-                )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM:
-        used_cfg = SPLITS_LM[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        id="linemod",  # NOTE: for pvnet to determine module
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        imH, imW = img.shape[:2]
-        annos = d["annotations"]
-        masks = [cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos]
-        bboxes = [anno["bbox"] for anno in annos]
-        bbox_modes = [anno["bbox_mode"] for anno in annos]
-        bboxes_xyxy = np.array(
-            [BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes)]
-        )
-        kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos]
-        quats = [anno["quat"] for anno in annos]
-        transes = [anno["trans"] for anno in annos]
-        Rs = [quat2mat(quat) for quat in quats]
-        # 0-based label
-        cat_ids = [anno["category_id"] for anno in annos]
-        K = d["cam"]
-        kpts_2d = [misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes)]
-        # # TODO: visualize pose and keypoints
-        labels = [objs[cat_id] for cat_id in cat_ids]
-        for _i in range(len(annos)):
-            img_vis = vis_image_mask_bbox_cv2(
-                img,
-                masks[_i : _i + 1],
-                bboxes=bboxes_xyxy[_i : _i + 1],
-                labels=labels[_i : _i + 1],
-            )
-            img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i])
-            if "test" not in dset_name.lower():
-                xyz_path = annos[_i]["xyz_path"]
-                xyz_info = mmcv.load(xyz_path)
-                x1, y1, x2, y2 = xyz_info["xyxy"]
-                xyz_crop = xyz_info["xyz_crop"].astype(np.float32)
-                xyz = np.zeros((imH, imW, 3), dtype=np.float32)
-                xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop
-                xyz_show = get_emb_show(xyz)
-                xyz_crop_show = get_emb_show(xyz_crop)
-                img_xyz = img.copy() / 255.0
-                mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8")
-                fg_idx = np.where(mask_xyz != 0)
-                img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3]
-                img_xyz_crop = img_xyz[y1 : y2 + 1, x1 : x2 + 1, :]
-                img_vis_crop = img_vis[y1 : y2 + 1, x1 : x2 + 1, :]
-                # diff mask
-                diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1 : y2 + 1, x1 : x2 + 1]
-
-                grid_show(
-                    [
-                        img[:, :, [2, 1, 0]],
-                        img_vis[:, :, [2, 1, 0]],
-                        img_vis_kpts2d[:, :, [2, 1, 0]],
-                        depth,
-                        # xyz_show,
-                        diff_mask_xyz,
-                        xyz_crop_show,
-                        img_xyz[:, :, [2, 1, 0]],
-                        img_xyz_crop[:, :, [2, 1, 0]],
-                        img_vis_crop,
-                    ],
-                    [
-                        "img",
-                        "vis_img",
-                        "img_vis_kpts2d",
-                        "depth",
-                        "diff_mask_xyz",
-                        "xyz_crop_show",
-                        "img_xyz",
-                        "img_xyz_crop",
-                        "img_vis_crop",
-                    ],
-                    row=3,
-                    col=3,
-                )
-            else:
-                grid_show(
-                    [
-                        img[:, :, [2, 1, 0]],
-                        img_vis[:, :, [2, 1, 0]],
-                        img_vis_kpts2d[:, :, [2, 1, 0]],
-                        depth,
-                    ],
-                    ["img", "vis_img", "img_vis_kpts2d", "depth"],
-                    row=2,
-                    col=2,
-                )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    python this_file.py dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.utils import get_emb_show
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-
-    test_vis()
--- a/det/yolox/data/datasets/lm_syn_imgn.py
+++ b/det/yolox/data/datasets/lm_syn_imgn.py
-import hashlib
-import logging
-import os
-import os.path as osp
-import sys
-
-import time
-from collections import OrderedDict
-import mmcv
-import numpy as np
-from tqdm import tqdm
-from transforms3d.quaternions import mat2quat, quat2mat
-from detectron2.data import DatasetCatalog, MetadataCatalog
-from detectron2.structures import BoxMode
-
-cur_dir = osp.dirname(osp.abspath(__file__))
-PROJ_ROOT = osp.normpath(osp.join(cur_dir, "../../../.."))
-sys.path.insert(0, PROJ_ROOT)
-import ref
-
-from lib.pysixd import inout, misc
-from lib.utils.mask_utils import (
-    binary_mask_to_rle,
-    cocosegm2mask,
-    mask2bbox_xywh,
-)
-from lib.utils.utils import dprint, lazy_property
-
-
-logger = logging.getLogger(__name__)
-DATASETS_ROOT = osp.normpath(osp.join(PROJ_ROOT, "datasets"))
-
-
-class LM_SYN_IMGN_Dataset(object):
-    """lm synthetic data, imgn(imagine) from DeepIM."""
-
-    def __init__(self, data_cfg):
-        """
-        Set with_depth and with_masks default to True,
-        and decide whether to load them into dataloader/network later
-        with_masks:
-        """
-        self.name = data_cfg["name"]
-        self.data_cfg = data_cfg
-
-        self.objs = data_cfg["objs"]  # selected objects
-
-        self.ann_files = data_cfg["ann_files"]  # idx files with image ids
-        self.image_prefixes = data_cfg["image_prefixes"]
-
-        self.dataset_root = data_cfg["dataset_root"]  # lm_imgn
-        self.models_root = data_cfg["models_root"]  # BOP_DATASETS/lm/models
-        self.scale_to_meter = data_cfg["scale_to_meter"]  # 0.001
-
-        self.with_masks = data_cfg["with_masks"]  # True (load masks but may not use it)
-        self.with_depth = data_cfg["with_depth"]  # True (load depth path here, but may not use it)
-        self.depth_factor = data_cfg["depth_factor"]  # 1000.0
-
-        self.cam = data_cfg["cam"]  #
-        self.height = data_cfg["height"]  # 480
-        self.width = data_cfg["width"]  # 640
-
-        self.cache_dir = data_cfg["cache_dir"]  # .cache
-        self.use_cache = data_cfg["use_cache"]  # True
-        # sample uniformly to get n items
-        self.n_per_obj = data_cfg.get("n_per_obj", 1000)
-        self.filter_invalid = data_cfg["filter_invalid"]
-        self.filter_scene = data_cfg.get("filter_scene", False)
-        ##################################################
-        if self.cam is None:
-            self.cam = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]])
-
-        # NOTE: careful! Only the selected objects
-        self.cat_ids = [cat_id for cat_id, obj_name in ref.lm_full.id2obj.items() if obj_name in self.objs]
-        # map selected objs to [0, num_objs-1]
-        self.cat2label = {v: i for i, v in enumerate(self.cat_ids)}  # id_map
-        self.label2cat = {label: cat for cat, label in self.cat2label.items()}
-        self.obj2label = OrderedDict((obj, obj_id) for obj_id, obj in enumerate(self.objs))
-        ##########################################################
-
-    def __call__(self):  # LM_SYN_IMGN_Dataset
-        """Load light-weight instance annotations of all images into a list of
-        dicts in Detectron2 format.
-
-        Do not load heavy data into memory in this file, since we will
-        load the annotations of all images into memory.
-        """
-        # cache the dataset_dicts to avoid loading masks from files
-        hashed_file_name = hashlib.md5(
-            (
-                "".join([str(fn) for fn in self.objs])
-                + "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
-                    self.name,
-                    self.dataset_root,
-                    self.with_masks,
-                    self.with_depth,
-                    self.n_per_obj,
-                    __name__,
-                )
-            ).encode("utf-8")
-        ).hexdigest()
-        cache_path = osp.join(
-            self.dataset_root,
-            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name),
-        )
-
-        if osp.exists(cache_path) and self.use_cache:
-            logger.info("load cached dataset dicts from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        t_start = time.perf_counter()
-
-        logger.info("loading dataset dicts: {}".format(self.name))
-        self.num_instances_without_valid_segmentation = 0
-        self.num_instances_without_valid_box = 0
-        dataset_dicts = []  #######################################################
-        assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}"
-        for ann_file, scene_root in zip(self.ann_files, self.image_prefixes):
-            # linemod each scene is an object
-            with open(ann_file, "r") as f_ann:
-                indices = [line.strip("\r\n").split()[-1] for line in f_ann.readlines()]  # string ids
-            # sample uniformly (equal space)
-            if self.n_per_obj > 0:
-                sample_num = min(self.n_per_obj, len(indices))
-                sel_indices_idx = np.linspace(0, len(indices) - 1, sample_num, dtype=np.int32)
-                sel_indices = [indices[int(_i)] for _i in sel_indices_idx]
-            else:
-                sel_indices = indices
-
-            for im_id in tqdm(sel_indices):
-                rgb_path = osp.join(scene_root, "{}-color.png").format(im_id)
-                assert osp.exists(rgb_path), rgb_path
-
-                depth_path = osp.join(scene_root, "{}-depth.png".format(im_id))
-
-                obj_name = im_id.split("/")[0]
-                if obj_name == "benchviseblue":
-                    obj_name = "benchvise"
-                obj_id = ref.lm_full.obj2id[obj_name]
-                if self.filter_scene:
-                    if obj_name not in self.objs:
-                        continue
-                record = {
-                    "dataset_name": self.name,
-                    "file_name": osp.relpath(rgb_path, PROJ_ROOT),
-                    "depth_file": osp.relpath(depth_path, PROJ_ROOT),
-                    "height": self.height,
-                    "width": self.width,
-                    "image_id": im_id.split("/")[-1],
-                    "scene_im_id": im_id,
-                    "cam": self.cam,
-                    "img_type": "syn",
-                }
-
-                cur_label = self.obj2label[obj_name]  # 0-based label
-                pose_path = osp.join(scene_root, "{}-pose.txt".format(im_id))
-                pose = np.loadtxt(pose_path, skiprows=1)
-                R = pose[:3, :3]
-                t = pose[:3, 3]
-                quat = mat2quat(R).astype("float32")
-                proj = (record["cam"] @ t.T).T
-                proj = proj[:2] / proj[2]
-
-                depth = mmcv.imread(depth_path, "unchanged") / 1000.0
-                mask = (depth > 0).astype(np.uint8)
-
-                bbox_obj = mask2bbox_xywh(mask)
-                x1, y1, w, h = bbox_obj
-                if self.filter_invalid:
-                    if h <= 1 or w <= 1:
-                        self.num_instances_without_valid_box += 1
-                        continue
-                area = mask.sum()
-                if area < 3:  # filter out too small or nearly invisible instances
-                    self.num_instances_without_valid_segmentation += 1
-                    continue
-                mask_rle = binary_mask_to_rle(mask, compressed=True)
-
-                inst = {
-                    "category_id": cur_label,  # 0-based label
-                    "bbox": bbox_obj,  # TODO: load both bbox_obj and bbox_visib
-                    "bbox_mode": BoxMode.XYWH_ABS,
-                    "pose": pose,
-                    "quat": quat,
-                    "trans": t,
-                    "centroid_2d": proj,  # absolute (cx, cy)
-                    "segmentation": mask_rle,
-                }
-
-                model_info = self.models_info[str(obj_id)]
-                inst["model_info"] = model_info
-                # TODO: using full mask
-                for key in ["bbox3d_and_center"]:
-                    inst[key] = self.models[cur_label][key]
-                record["annotations"] = [inst]
-                dataset_dicts.append(record)
-
-        if self.num_instances_without_valid_segmentation > 0:
-            logger.warning(
-                "Filtered out {} instances without valid segmentation. "
-                "There might be issues in your dataset generation process.".format(
-                    self.num_instances_without_valid_segmentation
-                )
-            )
-        if self.num_instances_without_valid_box > 0:
-            logger.warning(
-                "Filtered out {} instances without valid box. "
-                "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box)
-            )
-        ##########################################################################
-        # if self.num_to_load > 0:
-        #     self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
-        #     random.shuffle(dataset_dicts)
-        #     dataset_dicts = dataset_dicts[: self.num_to_load]
-        logger.info(
-            "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start)
-        )
-
-        mmcv.dump(dataset_dicts, cache_path, protocol=4)
-        logger.info("Dumped dataset_dicts to {}".format(cache_path))
-        return dataset_dicts
-
-    @lazy_property
-    def models_info(self):
-        models_info_path = osp.join(self.models_root, "models_info.json")
-        assert osp.exists(models_info_path), models_info_path
-        models_info = mmcv.load(models_info_path)  # key is str(obj_id)
-        return models_info
-
-    @lazy_property
-    def models(self):
-        """Load models into a list."""
-        cache_path = osp.join(self.models_root, "models_{}.pkl".format("_".join(self.objs)))
-        if osp.exists(cache_path) and self.use_cache:
-            # logger.info("load cached object models from {}".format(cache_path))
-            return mmcv.load(cache_path)
-
-        models = []
-        for obj_name in self.objs:
-            model = inout.load_ply(
-                osp.join(
-                    self.models_root,
-                    f"obj_{ref.lm_full.obj2id[obj_name]:06d}.ply",
-                ),
-                vertex_scale=self.scale_to_meter,
-            )
-            # NOTE: the bbox3d_and_center is not obtained from centered vertices
-            # for BOP models, not a big problem since they had been centered
-            model["bbox3d_and_center"] = misc.get_bbox3d_and_center(model["pts"])
-
-            models.append(model)
-        logger.info("cache models to {}".format(cache_path))
-        mmcv.dump(models, cache_path, protocol=4)
-        return models
-
-    def image_aspect_ratio(self):
-        # return 1
-        return self.width / self.height  # 4/3
-
-
-########### register datasets ############################################################
-
-
-def get_lm_metadata(obj_names, ref_key):
-    """task specific metadata."""
-
-    data_ref = ref.__dict__[ref_key]
-
-    cur_sym_infos = {}  # label based key
-    loaded_models_info = data_ref.get_models_info()
-
-    for i, obj_name in enumerate(obj_names):
-        obj_id = data_ref.obj2id[obj_name]
-        model_info = loaded_models_info[str(obj_id)]
-        if "symmetries_discrete" in model_info or "symmetries_continuous" in model_info:
-            sym_transforms = misc.get_symmetry_transformations(model_info, max_sym_disc_step=0.01)
-            sym_info = np.array([sym["R"] for sym in sym_transforms], dtype=np.float32)
-        else:
-            sym_info = None
-        cur_sym_infos[i] = sym_info
-
-    meta = {"thing_classes": obj_names, "sym_infos": cur_sym_infos}
-    return meta
-
-
-LM_13_OBJECTS = [
-    "ape",
-    "benchvise",
-    "camera",
-    "can",
-    "cat",
-    "driller",
-    "duck",
-    "eggbox",
-    "glue",
-    "holepuncher",
-    "iron",
-    "lamp",
-    "phone",
-]  # no bowl, cup
-################################################################################
-
-SPLITS_LM_IMGN_13 = dict(
-    lm_imgn_13_train_1k_per_obj=dict(
-        name="lm_imgn_13_train_1k_per_obj",  # BB8 training set
-        dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"),
-        models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-        objs=LM_13_OBJECTS,  # selected objects
-        ann_files=[
-            osp.join(
-                DATASETS_ROOT,
-                "lm_imgn/image_set/{}_{}.txt".format("train", _obj),
-            )
-            for _obj in LM_13_OBJECTS
-        ],
-        image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn") for _obj in LM_13_OBJECTS],
-        scale_to_meter=0.001,
-        with_masks=True,  # (load masks but may not use it)
-        with_depth=True,  # (load depth path here, but may not use it)
-        depth_factor=1000.0,
-        cam=ref.lm_full.camera_matrix,
-        height=480,
-        width=640,
-        cache_dir=osp.join(PROJ_ROOT, ".cache"),
-        use_cache=True,
-        n_per_obj=1000,  # 1000 per class
-        filter_scene=True,
-        filter_invalid=False,
-        ref_key="lm_full",
-    )
-)
-
-# single obj splits
-for obj in ref.lm_full.objects:
-    for split in ["train"]:
-        name = "lm_imgn_13_{}_{}_1k".format(obj, split)
-        ann_files = [osp.join(DATASETS_ROOT, "lm_imgn/image_set/{}_{}.txt".format(split, obj))]
-        if split in ["train"]:
-            filter_invalid = True
-        elif split in ["test"]:
-            filter_invalid = False
-        else:
-            raise ValueError("{}".format(split))
-        if name not in SPLITS_LM_IMGN_13:
-            SPLITS_LM_IMGN_13[name] = dict(
-                name=name,
-                dataset_root=osp.join(DATASETS_ROOT, "lm_imgn/"),
-                models_root=osp.join(DATASETS_ROOT, "BOP_DATASETS/lm/models"),
-                objs=[obj],  # only this obj
-                ann_files=ann_files,
-                image_prefixes=[osp.join(DATASETS_ROOT, "lm_imgn/imgn/")],
-                scale_to_meter=0.001,
-                with_masks=True,  # (load masks but may not use it)
-                with_depth=True,  # (load depth path here, but may not use it)
-                depth_factor=1000.0,
-                cam=ref.lm_full.camera_matrix,
-                height=480,
-                width=640,
-                cache_dir=osp.join(PROJ_ROOT, ".cache"),
-                use_cache=True,
-                n_per_obj=1000,
-                filter_invalid=False,
-                filter_scene=True,
-                ref_key="lm_full",
-            )
-
-
-def register_with_name_cfg(name, data_cfg=None):
-    """Assume pre-defined datasets live in `./datasets`.
-
-    Args:
-        name: datasnet_name,
-        data_cfg: if name is in existing SPLITS, use pre-defined data_cfg
-            otherwise requires data_cfg
-            data_cfg can be set in cfg.DATA_CFG.name
-    """
-    dprint("register dataset: {}".format(name))
-    if name in SPLITS_LM_IMGN_13:
-        used_cfg = SPLITS_LM_IMGN_13[name]
-    else:
-        assert data_cfg is not None, f"dataset name {name} is not registered"
-        used_cfg = data_cfg
-    DatasetCatalog.register(name, LM_SYN_IMGN_Dataset(used_cfg))
-    # something like eval_types
-    MetadataCatalog.get(name).set(
-        ref_key=used_cfg["ref_key"],
-        objs=used_cfg["objs"],
-        eval_error_types=["ad", "rete", "proj"],
-        evaluator_type="coco_bop",
-        **get_lm_metadata(obj_names=used_cfg["objs"], ref_key=used_cfg["ref_key"]),
-    )
-
-
-def get_available_datasets():
-    return list(SPLITS_LM_IMGN_13.keys())
-
-
-#### tests ###############################################
-def test_vis():
-    dset_name = sys.argv[1]
-    assert dset_name in DatasetCatalog.list()
-
-    meta = MetadataCatalog.get(dset_name)
-    dprint("MetadataCatalog: ", meta)
-    objs = meta.objs
-
-    t_start = time.perf_counter()
-    dicts = DatasetCatalog.get(dset_name)
-    logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start))
-
-    dirname = "output/{}-data-vis".format(dset_name)
-    os.makedirs(dirname, exist_ok=True)
-    for d in dicts:
-        img = read_image_mmcv(d["file_name"], format="BGR")
-        depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0
-
-        anno = d["annotations"][0]  # only one instance per image
-        imH, imW = img.shape[:2]
-        mask = cocosegm2mask(anno["segmentation"], imH, imW)
-        bbox = anno["bbox"]
-        bbox_mode = anno["bbox_mode"]
-        bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS))
-        kpt3d = anno["bbox3d_and_center"]
-        quat = anno["quat"]
-        trans = anno["trans"]
-        R = quat2mat(quat)
-        # 0-based label
-        cat_id = anno["category_id"]
-        K = d["cam"]
-        kpt_2d = misc.project_pts(kpt3d, K, R, trans)
-        # # TODO: visualize pose and keypoints
-        label = objs[cat_id]
-        # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels)
-        img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label])
-        img_vis_kpt2d = img.copy()
-        img_vis_kpt2d = misc.draw_projected_box3d(
-            img_vis_kpt2d,
-            kpt_2d,
-            middle_color=None,
-            bottom_color=(128, 128, 128),
-        )
-
-        grid_show(
-            [
-                img[:, :, [2, 1, 0]],
-                img_vis[:, :, [2, 1, 0]],
-                img_vis_kpt2d[:, :, [2, 1, 0]],
-                depth,
-            ],
-            ["img", "vis_img", "img_vis_kpts2d", "depth"],
-            row=2,
-            col=2,
-        )
-
-
-if __name__ == "__main__":
-    """Test the  dataset loader.
-
-    Usage:
-        python -m det.yolov4.datasets.lm_syn_imgn dataset_name
-    """
-    from lib.vis_utils.image import grid_show
-    from lib.utils.setup_logger import setup_my_logger
-
-    import detectron2.data.datasets  # noqa # add pre-defined metadata
-    from lib.vis_utils.image import vis_image_mask_bbox_cv2
-    from core.utils.data_utils import read_image_mmcv
-
-    print("sys.argv:", sys.argv)
-    logger = setup_my_logger(name="core")
-    register_with_name_cfg(sys.argv[1])
-    print("dataset catalog: ", DatasetCatalog.list())
-    test_vis()