From ee847a37785e660a3e11cb3cce834b9773e83e1a Mon Sep 17 00:00:00 2001
From: jwangzzz <j96w@qq.com>
Date: Tue, 15 Jan 2019 23:08:17 +0800
Subject: [PATCH] clean

---
 README.md           |   6 +-
 lib/extractors.py   | 213 ++------------------------------------------
 lib/loss_refiner.py |   1 -
 lib/network.py      |  11 +--
 lib/pspnet.py       |   2 +-
 5 files changed, 11 insertions(+), 222 deletions(-)

diff --git a/README.md b/README.md
index 7f25ec3..60acb30 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@
 
 ## Overview
 
-This repository is the implementation code of the paper "DenseFusion: 6D Object Pose Estimation by Iterative Dense Fusion"([arXiv](), [website](https://sites.google.com/view/densefusion), [video](https://www.youtube.com/watch?v=SsE5-FuK5jo)) by Chen et al. The model takes an RGB-D image as input and predicts the 6D pose of the each object in the frame. This network is implemented using [PyTorch](https://pytorch.org/) and the rest of the framework is in Python. Since this project focuses on the 6D pose estimation process, we do not specifically limit the choice of the segmentation models. You can choose your preferred semantic-segmentation/instance-segmentation methods according to your needs. In this repo, we provide our full implementation code of the DenseFusion model, Iterative Refinement model and a vanilla SegNet semantic-segmentation model used in our real-robot grasping experiment. The ROS code of the real robot grasping experiment is not included.
+This repository is the implementation code of the paper "DenseFusion: 6D Object Pose Estimation by Iterative Dense Fusion"([arXiv](), [Project](https://sites.google.com/view/densefusion), [Video](https://www.youtube.com/watch?v=SsE5-FuK5jo)) by Chen et al. at [Stanford Vision and Learning Lab](http://svl.stanford.edu/). The model takes an RGB-D image as input and predicts the 6D pose of the each object in the frame. This network is implemented using [PyTorch](https://pytorch.org/) and the rest of the framework is in Python. Since this project focuses on the 6D pose estimation process, we do not specifically limit the choice of the segmentation models. You can choose your preferred semantic-segmentation/instance-segmentation methods according to your needs. In this repo, we provide our full implementation code of the DenseFusion model, Iterative Refinement model and a vanilla SegNet semantic-segmentation model used in our real-robot grasping experiment. The ROS code of the real robot grasping experiment is not included.
 
 
 ## Requirements
@@ -56,7 +56,7 @@ This repository is the implementation code of the paper "DenseFusion: 6D Object
 	* **lib/loss_refiner.py**: Loss calculation for iterative refinement model.
 	* **lib/transformations.py**: [Transformation Function Library](https://www.lfd.uci.edu/~gohlke/code/transformations.py.html).
     * **lib/network.py**: Network architecture.
-    * **lib/extractors.py**: Encoder network architecture adapted from [pspnet-pytorch](https://github.com/Lextal/pspnet-pytorch)
+    * **lib/extractors.py**: Encoder network architecture adapted from [pspnet-pytorch](https://github.com/Lextal/pspnet-pytorch).
     * **lib/pspnet.py**: Decoder network architecture.
     * **lib/utils.py**: Logger code.
     * **lib/knn/**: CUDA K-nearest neighbours library adapted from [pytorch_knn_cuda](https://github.com/chrischoy/pytorch_knn_cuda).
@@ -134,7 +134,7 @@ Please run:
 ```
 ./experiments/scripts/eval_ycb.sh
 ```
-This script will first download the `YCB_Video_toolbox` to the root folder of this repo and test the selected DenseFusion and Iterative Refinement models on the 2949 keyframes of the 10 testing video in YCB_Video Dataset with the same segmentation result of PoseCNN. The result without refinement is stored in `eval_result/ycb/Densefusion_wo_refine_result` and the refined result is in `eval_result/ycb/Densefusion_iterative_result`.
+This script will first download the `YCB_Video_toolbox` to the root folder of this repo and test the selected DenseFusion and Iterative Refinement models on the 2949 keyframes of the 10 testing video in YCB_Video Dataset with the same segmentation result of PoseCNN. The result without refinement is stored in `experiments/eval_result/ycb/Densefusion_wo_refine_result` and the refined result is in `experiments/eval_result/ycb/Densefusion_iterative_result`.
 
 After that, you can add the path of `experiments/eval_result/ycb/Densefusion_wo_refine_result/` and `experiments/eval_result/ycb/Densefusion_iterative_result/` to the code `YCB_Video_toolbox/evaluate_poses_keyframe.m` and run it with [MATLAB](https://www.mathworks.com/products/matlab.html). The code `YCB_Video_toolbox/plot_accuracy_keyframe.m` can show you the comparsion plot result. You can easily make it by copying the adapted codes from the `replace_ycb_toolbox/` folder and replace them in the `YCB_Video_toolbox/` folder. But you might still need to change the path of your `YCB_Video Dataset/` in the `globals.m` and copy two result folders(`Densefusion_wo_refine_result/` and `Densefusion_iterative_result/`) to the `YCB_Video_toolbox/` folder. 
 
diff --git a/lib/extractors.py b/lib/extractors.py
index 8facb37..1a5d77c 100755
--- a/lib/extractors.py
+++ b/lib/extractors.py
@@ -4,10 +4,6 @@ import random
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.utils import model_zoo
-from torchvision.models.densenet import densenet121, densenet161
-from torchvision.models.squeezenet import squeezenet1_1
-
 
 def load_weights_sequential(target, source_state):
     new_dict = OrderedDict()
@@ -15,21 +11,10 @@ def load_weights_sequential(target, source_state):
         new_dict[k1] = v2
     target.load_state_dict(new_dict)
 
-
-model_urls = {
-    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
-    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
-    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
-    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
-    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
-}
-
-
 def conv3x3(in_planes, out_planes, stride=1, dilation=1):
     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                      padding=dilation, dilation=dilation, bias=False)
 
-
 class BasicBlock(nn.Module):
     expansion = 1
 
@@ -60,7 +45,6 @@ class BasicBlock(nn.Module):
 
 class Bottleneck(nn.Module):
     expansion = 4
-
     def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
         super(Bottleneck, self).__init__()
         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
@@ -140,207 +124,22 @@ class ResNet(nn.Module):
         return x, x_3
 
 
-
-class _DenseLayer(nn.Sequential):
-    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
-        super(_DenseLayer, self).__init__()
-        self.add_module('norm.1', nn.BatchNorm2d(num_input_features)),
-        self.add_module('relu.1', nn.ReLU(inplace=True)),
-        self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size *
-                                            growth_rate, kernel_size=1, stride=1, bias=False)),
-        self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)),
-        self.add_module('relu.2', nn.ReLU(inplace=True)),
-        self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate,
-                                            kernel_size=3, stride=1, padding=1, bias=False)),
-        self.drop_rate = drop_rate
-
-    def forward(self, x):
-        new_features = super(_DenseLayer, self).forward(x)
-        if self.drop_rate > 0:
-            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
-        return torch.cat([x, new_features], 1)
-
-
-class _DenseBlock(nn.Sequential):
-    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
-        super(_DenseBlock, self).__init__()
-        for i in range(num_layers):
-            layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
-            self.add_module('denselayer%d' % (i + 1), layer)
-
-
-class _Transition(nn.Sequential):
-    def __init__(self, num_input_features, num_output_features, downsample=True):
-        super(_Transition, self).__init__()
-        self.add_module('norm', nn.BatchNorm2d(num_input_features))
-        self.add_module('relu', nn.ReLU(inplace=True))
-        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
-                                          kernel_size=1, stride=1, bias=False))
-        if downsample:
-            self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
-        else:
-            self.add_module('pool', nn.AvgPool2d(kernel_size=1, stride=1))  # compatibility hack
-
-
-class DenseNet(nn.Module):
-    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
-                 num_init_features=64, bn_size=4, drop_rate=0, pretrained=True):
-
-        super(DenseNet, self).__init__()
-
-        # First convolution
-        self.start_features = nn.Sequential(OrderedDict([
-            ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
-            ('norm0', nn.BatchNorm2d(num_init_features)),
-            ('relu0', nn.ReLU(inplace=True)),
-            ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
-        ]))
-
-        # Each denseblock
-        num_features = num_init_features
-
-        init_weights = list(densenet121(pretrained=True).features.children())
-        start = 0
-        for i, c in enumerate(self.start_features.children()):
-            if pretrained:
-                c.load_state_dict(init_weights[i].state_dict())
-            start += 1
-        self.blocks = nn.ModuleList()
-        for i, num_layers in enumerate(block_config):
-            block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
-                                bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
-            if pretrained:
-                block.load_state_dict(init_weights[start].state_dict())
-            start += 1
-            self.blocks.append(block)
-            setattr(self, 'denseblock%d' % (i + 1), block)
-
-            num_features = num_features + num_layers * growth_rate
-            if i != len(block_config) - 1:
-                downsample = i < 1
-                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2,
-                                    downsample=downsample)
-                if pretrained:
-                    trans.load_state_dict(init_weights[start].state_dict())
-                start += 1
-                self.blocks.append(trans)
-                setattr(self, 'transition%d' % (i + 1), trans)
-                num_features = num_features // 2
-
-    def forward(self, x):
-        out = self.start_features(x)
-        deep_features = None
-        for i, block in enumerate(self.blocks):
-            out = block(out)
-            if i == 5:
-                deep_features = out
-
-        return out, deep_features
-
-
-class Fire(nn.Module):
-
-    def __init__(self, inplanes, squeeze_planes,
-                 expand1x1_planes, expand3x3_planes, dilation=1):
-        super(Fire, self).__init__()
-        self.inplanes = inplanes
-        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
-        self.squeeze_activation = nn.ReLU(inplace=True)
-        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
-                                   kernel_size=1)
-        self.expand1x1_activation = nn.ReLU(inplace=True)
-        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
-                                   kernel_size=3, padding=dilation, dilation=dilation)
-        self.expand3x3_activation = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.squeeze_activation(self.squeeze(x))
-        return torch.cat([
-            self.expand1x1_activation(self.expand1x1(x)),
-            self.expand3x3_activation(self.expand3x3(x))
-        ], 1)
-
-
-class SqueezeNet(nn.Module):
-
-    def __init__(self, pretrained=False):
-        super(SqueezeNet, self).__init__()
-
-        self.feat_1 = nn.Sequential(
-            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
-            nn.ReLU(inplace=True)
-        )
-        self.feat_2 = nn.Sequential(
-            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
-            Fire(64, 16, 64, 64),
-            Fire(128, 16, 64, 64)
-        )
-        self.feat_3 = nn.Sequential(
-            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
-            Fire(128, 32, 128, 128, 2),
-            Fire(256, 32, 128, 128, 2)
-        )
-        self.feat_4 = nn.Sequential(
-            Fire(256, 48, 192, 192, 4),
-            Fire(384, 48, 192, 192, 4),
-            Fire(384, 64, 256, 256, 4),
-            Fire(512, 64, 256, 256, 4)
-        )
-        if pretrained:
-            weights = squeezenet1_1(pretrained=True).features.state_dict()
-            load_weights_sequential(self, weights)
-
-    def forward(self, x):
-        f1 = self.feat_1(x)
-        f2 = self.feat_2(f1)
-        f3 = self.feat_3(f2)
-        f4 = self.feat_4(f3)
-        return f4, f3
-
-
-'''
-    Handy methods for construction
-'''
-
-
-def squeezenet(pretrained=True):
-    return SqueezeNet(pretrained)
-
-
-def densenet(pretrained=True):
-    return DenseNet(pretrained=pretrained)
-
-
-def resnet18(pretrained=True):
+def resnet18(pretrained=False):
     model = ResNet(BasicBlock, [2, 2, 2, 2])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet18']))
     return model
 
-
-def resnet34(pretrained=True):
+def resnet34(pretrained=False):
     model = ResNet(BasicBlock, [3, 4, 6, 3])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet34']))
     return model
 
-
-def resnet50(pretrained=True):
+def resnet50(pretrained=False):
     model = ResNet(Bottleneck, [3, 4, 6, 3])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet50']))
     return model
 
-
-def resnet101(pretrained=True):
+def resnet101(pretrained=False):
     model = ResNet(Bottleneck, [3, 4, 23, 3])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet101']))
     return model
 
-
-def resnet152(pretrained=True):
+def resnet152(pretrained=False):
     model = ResNet(Bottleneck, [3, 8, 36, 3])
-    if pretrained:
-        load_weights_sequential(model, model_zoo.load_url(model_urls['resnet152']))
-    return model
+    return model
\ No newline at end of file
diff --git a/lib/loss_refiner.py b/lib/loss_refiner.py
index 04430db..d2c2931 100755
--- a/lib/loss_refiner.py
+++ b/lib/loss_refiner.py
@@ -59,7 +59,6 @@ def loss_calculation(pred_r, pred_t, target, model_points, idx, points, num_poin
     ori_t = t.repeat(num_point_mesh, 1).contiguous().view(1, num_point_mesh, 3)
     new_target = torch.bmm((new_target - ori_t), ori_base).contiguous()
 
-
     # print('------------> ', dis.item(), idx[0].item())
 
     return dis, new_points.detach(), new_target.detach()
diff --git a/lib/network.py b/lib/network.py
index 30f79e4..f3fd47d 100755
--- a/lib/network.py
+++ b/lib/network.py
@@ -17,8 +17,6 @@ import torch.nn.functional as F
 from lib.pspnet import PSPNet
 
 psp_models = {
-    'squeezenet': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='squeezenet'),
-    'densenet': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=1024, deep_features_size=512, backend='densenet'),
     'resnet18': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet18'),
     'resnet34': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34'),
     'resnet50': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet50'),
@@ -126,10 +124,7 @@ class PoseNet(nn.Module):
         out_rx = torch.index_select(rx[b], 0, obj[b])
         out_tx = torch.index_select(tx[b], 0, obj[b])
         out_cx = torch.index_select(cx[b], 0, obj[b])
-        # for b in range(1, bs):
-        #     out_rx = torch.cat((out_rx, torch.index_select(rx[b], 0, obj[b])), dim=0)
-        #     out_tx = torch.cat((out_tx, torch.index_select(tx[b], 0, obj[b])), dim=0)
-        #     out_cx = torch.cat((out_cx, torch.index_select(cx[b], 0, obj[b])), dim=0)
+        
         out_rx = out_rx.contiguous().transpose(2, 1).contiguous()
         out_cx = out_cx.contiguous().transpose(2, 1).contiguous()
         out_tx = out_tx.contiguous().transpose(2, 1).contiguous()
@@ -208,8 +203,4 @@ class PoseRefineNet(nn.Module):
         out_rx = torch.index_select(rx[b], 0, obj[b])
         out_tx = torch.index_select(tx[b], 0, obj[b])
 
-        # for b in range(1, bs):
-        #     out_rx = torch.cat((out_rx, torch.index_select(rx[b], 0, obj[b])), dim=0)
-        #     out_tx = torch.cat((out_tx, torch.index_select(tx[b], 0, obj[b])), dim=0)
-
         return out_rx, out_tx
diff --git a/lib/pspnet.py b/lib/pspnet.py
index 64c87a8..2e39bbf 100755
--- a/lib/pspnet.py
+++ b/lib/pspnet.py
@@ -38,7 +38,7 @@ class PSPUpsample(nn.Module):
 
 
 class PSPNet(nn.Module):
-    def __init__(self, n_classes=21, sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet34',
+    def __init__(self, n_classes=21, sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet18',
                  pretrained=False):
         super(PSPNet, self).__init__()
         self.feats = getattr(extractors, backend)(pretrained)
-- 
GitLab