From 4f593d08619646efc7a1f9254187ef1427bb7cf5 Mon Sep 17 00:00:00 2001 From: Thomas Pickles <thomas.pickles@ens-lyon.fr> Date: Mon, 6 Mar 2023 13:27:03 +0100 Subject: [PATCH] Greyscale now implemented as command line option Options can now be selected at command line without requiring separate builds of the code --- include/neural-graphics-primitives/common.h | 3 +- include/neural-graphics-primitives/testbed.h | 5 + scripts/run.py | 19 +++- src/python_api.cu | 4 + src/testbed.cu | 1 + src/testbed_nerf.cu | 111 ++++++++++++++----- 6 files changed, 114 insertions(+), 29 deletions(-) diff --git a/include/neural-graphics-primitives/common.h b/include/neural-graphics-primitives/common.h index 3a632b4..b9401c4 100644 --- a/include/neural-graphics-primitives/common.h +++ b/include/neural-graphics-primitives/common.h @@ -135,12 +135,13 @@ enum class ELossType : int { static constexpr const char* LossTypeStr = "L2\0L1\0MAPE\0SMAPE\0Huber\0LogL1\0RelativeL2\0\0"; enum class ENerfActivation : int { + Constant, None, ReLU, Logistic, Exponential, }; -static constexpr const char* NerfActivationStr = "None\0ReLU\0Logistic\0Exponential\0\0"; +static constexpr const char* NerfActivationStr = "Constant\0None\0ReLU\0Logistic\0Exponential\0\0"; enum class EMeshSdfMode : int { Watertight, diff --git a/include/neural-graphics-primitives/testbed.h b/include/neural-graphics-primitives/testbed.h index af403d8..12e366a 100644 --- a/include/neural-graphics-primitives/testbed.h +++ b/include/neural-graphics-primitives/testbed.h @@ -191,6 +191,7 @@ public: int visualized_dim, ENerfActivation rgb_activation, ENerfActivation density_activation, + int render_no_attenuation, int show_accel, float min_transmittance, float glow_y_cutoff, @@ -683,6 +684,7 @@ public: NerfCounters counters_rgb; bool random_bg_color = true; + bool grey_loss = false; bool linear_colors = false; ELossType loss_type = ELossType::L2; ELossType depth_loss_type = ELossType::L1; @@ -755,6 +757,9 @@ public: float render_min_transmittance = 0.01f; + bool render_no_attenuation = false; + bool train_no_attenuation = false; + float glow_y_cutoff = 0.f; int glow_mode = 0; } m_nerf; diff --git a/scripts/run.py b/scripts/run.py index 526b191..ca20be5 100644 --- a/scripts/run.py +++ b/scripts/run.py @@ -57,7 +57,7 @@ def parse_args(): parser.add_argument("--save_mesh", default="", help="Output a marching-cubes based mesh from the NeRF or SDF model. Supports OBJ and PLY format.") parser.add_argument("--marching_cubes_res", default=256, type=int, help="Sets the resolution for the marching cubes grid.") - parser.add_argument("--save_slices", action="store_true", help="Output slices after training.") + parser.add_argument("--save_slices", action="store_true", help="Output slices of density after training.") parser.add_argument("--slices_res", default=256, type=int, help="Sets the resolution for the slices png.") parser.add_argument("--width", "--screenshot_w", type=int, default=0, help="Resolution width of GUI and screenshots.") @@ -75,6 +75,11 @@ def parse_args(): parser.add_argument("--tomonerf", type=int, default=0, help="Which transforms to apply [single_channel, exponentiate img data, crop].") parser.add_argument("--crop", nargs=3, type=float, help="Percentage to crop unit cube to.") + parser.add_argument("--grey_loss",action="store_true", help="Ignore colour info in target loss function.") + parser.add_argument("--no_colour",action="store_true", help="Network to grey.") + parser.add_argument("--train_no_attenuation",action="store_true", help="Not totally sure about this one, tbh! But it leads to the training exploding!") + parser.add_argument("--render_no_attenuation",action="store_true", help="Render images without attenuation of voxels in front.") + return parser.parse_args() def get_scene(scene): @@ -189,6 +194,18 @@ if __name__ == "__main__": testbed.nerf.render_with_lens_distortion = True + if args.grey_loss: + testbed.nerf.training.grey_loss = True + + if args.no_colour: + testbed.nerf.rgb_activation = ngp.NerfActivation.Constant + + if args.train_no_attenuation: + testbed.nerf.train_no_attenuation = True + + if args.render_no_attenuation: + testbed.nerf.render_no_attenuation = True + network_stem = os.path.splitext(os.path.basename(args.network))[0] if args.network else "base" if testbed.mode == ngp.TestbedMode.Sdf: setup_colored_sdf(testbed, args.scene) diff --git a/src/python_api.cu b/src/python_api.cu index 7f8b4f8..eabab2e 100644 --- a/src/python_api.cu +++ b/src/python_api.cu @@ -292,6 +292,7 @@ PYBIND11_MODULE(pyngp, m) { .export_values(); py::enum_<ENerfActivation>(m, "NerfActivation") + .value("Constant", ENerfActivation::Constant) .value("None", ENerfActivation::None) .value("ReLU", ENerfActivation::ReLU) .value("Logistic", ENerfActivation::Logistic) @@ -570,6 +571,8 @@ PYBIND11_MODULE(pyngp, m) { .def_readwrite("render_lens", &Testbed::Nerf::render_lens) .def_readwrite("rendering_min_transmittance", &Testbed::Nerf::render_min_transmittance) .def_readwrite("render_min_transmittance", &Testbed::Nerf::render_min_transmittance) + .def_readwrite("render_no_attenuation", &Testbed::Nerf::render_no_attenuation) + .def_readwrite("train_no_attenuation", &Testbed::Nerf::train_no_attenuation) .def_readwrite("cone_angle_constant", &Testbed::Nerf::cone_angle_constant) .def_readwrite("visualize_cameras", &Testbed::Nerf::visualize_cameras) .def_readwrite("glow_y_cutoff", &Testbed::Nerf::glow_y_cutoff) @@ -619,6 +622,7 @@ PYBIND11_MODULE(pyngp, m) { ; py::class_<Testbed::Nerf::Training>(nerf, "Training") + .def_readwrite("grey_loss", &Testbed::Nerf::Training::grey_loss) .def_readwrite("random_bg_color", &Testbed::Nerf::Training::random_bg_color) .def_readwrite("n_images_for_training", &Testbed::Nerf::Training::n_images_for_training) .def_readwrite("linear_colors", &Testbed::Nerf::Training::linear_colors) diff --git a/src/testbed.cu b/src/testbed.cu index 2f06b23..8c41134 100644 --- a/src/testbed.cu +++ b/src/testbed.cu @@ -858,6 +858,7 @@ void Testbed::imgui() { if (m_testbed_mode == ETestbedMode::Nerf && ImGui::TreeNode("NeRF training options")) { ImGui::Checkbox("Random bg color", &m_nerf.training.random_bg_color); + ImGui::Checkbox("Learn grey", &m_nerf.training.grey_loss); ImGui::SameLine(); ImGui::Checkbox("Snap to pixel centers", &m_nerf.training.snap_to_pixel_centers); ImGui::SliderFloat("Near distance", &m_nerf.training.near_distance, 0.0f, 1.0f); diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu index bf08d18..708ffce 100644 --- a/src/testbed_nerf.cu +++ b/src/testbed_nerf.cu @@ -214,27 +214,27 @@ inline __device__ float advance_to_next_voxel(float t, float cone_angle, const V } __device__ float network_to_rgb(float val, ENerfActivation activation) { - return 1.f; // always return 1 - // switch (activation) { - // case ENerfActivation::None: return val; - // case ENerfActivation::ReLU: return val > 0.0f ? val : 0.0f; - // case ENerfActivation::Logistic: return tcnn::logistic(val); - // case ENerfActivation::Exponential: return __expf(tcnn::clamp(val, -10.0f, 10.0f)); - // default: assert(false); - // } - // return 0.0f; + switch (activation) { + case ENerfActivation::Constant: return 1.0f; + case ENerfActivation::None: return val; + case ENerfActivation::ReLU: return val > 0.0f ? val : 0.0f; + case ENerfActivation::Logistic: return tcnn::logistic(val); + case ENerfActivation::Exponential: return __expf(tcnn::clamp(val, -10.0f, 10.0f)); + default: assert(false); + } + return 0.0f; } // No way to modify the derivative for rgb __device__ float network_to_rgb_derivative(float val, ENerfActivation activation) { - return 0.f; // no way to change rgb value - // switch (activation) { - // case ENerfActivation::None: return 1.0f; - // case ENerfActivation::ReLU: return val > 0.0f ? 1.0f : 0.0f; - // case ENerfActivation::Logistic: { float density = tcnn::logistic(val); return density * (1 - density); }; - // case ENerfActivation::Exponential: return __expf(tcnn::clamp(val, -10.0f, 10.0f)); - // default: assert(false); - // } + switch (activation) { + case ENerfActivation::Constant: return 0.0f; + case ENerfActivation::None: return 1.0f; + case ENerfActivation::ReLU: return val > 0.0f ? 1.0f : 0.0f; + case ENerfActivation::Logistic: { float density = tcnn::logistic(val); return density * (1 - density); }; + case ENerfActivation::Exponential: return __expf(tcnn::clamp(val, -10.0f, 10.0f)); + default: assert(false); + } } __device__ float network_to_density(float val, ENerfActivation activation) { @@ -259,7 +259,6 @@ __device__ float network_to_density_derivative(float val, ENerfActivation activa return 0.0f; } -// Ignore neurons 0, 1 and 2! __device__ Array3f network_to_rgb(const tcnn::vector_t<tcnn::network_precision_t, 4>& local_network_output, ENerfActivation activation) { return { network_to_rgb(float(local_network_output[0]), activation), @@ -806,6 +805,30 @@ __global__ void generate_next_nerf_network_inputs( payload.n_steps = n_steps; } +// I'm a bit confused about the process. There's +// this function, `composite_kernel_nerf`, where +// the network outputs are converted to rgba values +// and involves the rendering equation. +// There's also another function called +// `compute_loss_kernel_train_nerf` which seems to +// involve the rendering equation too. It seems +// like these two need to be kept in sync with +// each other regarding the method of rendering +// Do these two get called one-at-a-time, in +// sequence? Forward pass, backward pass? +// If so, where? + +// I think this creates the pixels in the rendered image +// That is, from the nerf cloud, given our desired viewpoint, +// create the 2d image. + +// Note that we can also choose to render different types +// of images, such as normals, albedo, etc + +// This might also be the place to change our projection +// method, since that way we can generate images in the +// same manner that Astra does, but still keep the NeRF +// learning done in the same way __global__ void composite_kernel_nerf( const uint32_t n_elements, const uint32_t stride, @@ -827,6 +850,7 @@ __global__ void composite_kernel_nerf( const uint8_t* __restrict__ density_grid, ENerfActivation rgb_activation, ENerfActivation density_activation, + int render_no_attenuation, int show_accel, float min_transmittance ) { @@ -993,8 +1017,20 @@ __global__ void composite_kernel_nerf( rgb = Array3f::Constant(alpha); } - local_rgba.head<3>() += rgb * weight; - local_rgba.w() += weight; + // Without attenuation, we want + // voxels at the back to shine + // through just as brightly as voxels + // at the front in our rendered image + float contribution = render_no_attenuation == 1 ? alpha : weight; + + // TODO: I've got some concerns about this, + // principally because I don't totally + // understand what's the difference between + // the alpha channel (represented here as + // .w()), and the multiplier of the rgb + local_rgba.head<3>() += rgb * contribution; + local_rgba.w() += contribution; + if (weight > payload.max_weight) { payload.max_weight = weight; local_depth = cam_fwd.dot(pos - camera_matrix.col(3)); @@ -1281,6 +1317,7 @@ __device__ LossAndGradient loss_and_gradient(const Vector3f& target, const Vecto } } +// I think this is the forward step of the training __global__ void compute_loss_kernel_train_nerf( const uint32_t n_rays, BoundingBox aabb, @@ -1297,6 +1334,7 @@ __global__ void compute_loss_kernel_train_nerf( Array3f background_color, EColorSpace color_space, bool train_with_random_bg_color, + bool train_with_grey_loss, bool train_in_linear_colors, const uint32_t n_training_images, const TrainingImageMetadata* __restrict__ metadata, @@ -1316,6 +1354,7 @@ __global__ void compute_loss_kernel_train_nerf( ENerfActivation rgb_activation, ENerfActivation density_activation, bool snap_to_pixel_centers, + bool train_no_attenuation, float* __restrict__ error_map, const float* __restrict__ cdf_x_cond_y, const float* __restrict__ cdf_y, @@ -1367,7 +1406,13 @@ __global__ void compute_loss_kernel_train_nerf( const float alpha = 1.f - __expf(-density * dt); const float weight = alpha * T; - rgb_ray += weight * rgb; + + const float contribution = train_no_attenuation ? alpha : weight; + + rgb_ray += contribution * rgb; + + // TODO: what to do about depth + // and hitpoint? hitpoint += weight * pos; depth_ray += weight * cur_depth; T *= (1.f - alpha); @@ -1417,11 +1462,15 @@ __global__ void compute_loss_kernel_train_nerf( // of our training data and ask the network to only learn // the alpha. rgb values are now irrelevant, so shade // will always be [1,1,1] - Array3f grey = Array3f::Constant(texsamp.w()); + Array3f pixel_colour = texsamp.head<3>(); + + if (train_with_grey_loss) { + pixel_colour = Array3f::Constant(texsamp.w()) ; + }; Array3f rgbtarget; if (train_in_linear_colors || color_space == EColorSpace::Linear) { - rgbtarget = exposure_scale * grey + (1.0f - texsamp.w()) * background_color; + rgbtarget = exposure_scale * pixel_colour + (1.0f - texsamp.w()) * background_color; if (!train_in_linear_colors) { rgbtarget = linear_to_srgb(rgbtarget); @@ -1430,7 +1479,7 @@ __global__ void compute_loss_kernel_train_nerf( } else if (color_space == EColorSpace::SRGB) { background_color = linear_to_srgb(background_color); if (texsamp.w() > 0) { - rgbtarget = linear_to_srgb(exposure_scale * grey / texsamp.w()) * texsamp.w() + (1.0f - texsamp.w()) * background_color; + rgbtarget = linear_to_srgb(exposure_scale * pixel_colour / texsamp.w()) * texsamp.w() + (1.0f - texsamp.w()) * background_color; } else { rgbtarget = background_color; } @@ -1534,7 +1583,12 @@ __global__ void compute_loss_kernel_train_nerf( const float density = network_to_density(float(local_network_output[3]), density_activation); const float alpha = 1.f - __expf(-density * dt); const float weight = alpha * T; - rgb_ray2 += weight * rgb; + + const float contribution = train_no_attenuation ? alpha : weight; + + rgb_ray2 += contribution * rgb; + + // TODO: contribution to depth?! depth_ray2 += weight * depth; T *= (1.f - alpha); @@ -1544,8 +1598,6 @@ __global__ void compute_loss_kernel_train_nerf( tcnn::vector_t<tcnn::network_precision_t, 4> local_dL_doutput; - // TURN OFF COLOUR-BASED TRAINING: - // chain rule to go from dloss/drgb to dloss/dmlp_output local_dL_doutput[0] = loss_scale * (dloss_by_drgb.x() * network_to_rgb_derivative(local_network_output[0], rgb_activation) + fmaxf(0.0f, output_l2_reg * (float)local_network_output[0])); // Penalize way too large color values local_dL_doutput[1] = loss_scale * (dloss_by_drgb.y() * network_to_rgb_derivative(local_network_output[1], rgb_activation) + fmaxf(0.0f, output_l2_reg * (float)local_network_output[1])); @@ -2100,6 +2152,7 @@ uint32_t Testbed::NerfTracer::trace( int visualized_dim, ENerfActivation rgb_activation, ENerfActivation density_activation, + int render_no_attenuation, int show_accel, float min_transmittance, float glow_y_cutoff, @@ -2194,6 +2247,7 @@ uint32_t Testbed::NerfTracer::trace( grid, rgb_activation, density_activation, + render_no_attenuation, show_accel, min_transmittance ); @@ -2362,6 +2416,7 @@ void Testbed::render_nerf( visualized_dimension, m_nerf.rgb_activation, m_nerf.density_activation, + m_nerf.render_no_attenuation ? 1 : 0, m_nerf.show_accel, m_nerf.render_min_transmittance, m_nerf.glow_y_cutoff, @@ -3271,6 +3326,7 @@ void Testbed::train_nerf_step(uint32_t target_batch_size, Testbed::NerfCounters& m_background_color.head<3>(), m_color_space, m_nerf.training.random_bg_color, + m_nerf.training.grey_loss, m_nerf.training.linear_colors, m_nerf.training.n_images_for_training, m_nerf.training.dataset.metadata_gpu.data(), @@ -3290,6 +3346,7 @@ void Testbed::train_nerf_step(uint32_t target_batch_size, Testbed::NerfCounters& m_nerf.rgb_activation, m_nerf.density_activation, m_nerf.training.snap_to_pixel_centers, + m_nerf.train_no_attenuation, accumulate_error ? m_nerf.training.error_map.data.data() : nullptr, sample_focal_plane_proportional_to_error ? m_nerf.training.error_map.cdf_x_cond_y.data() : nullptr, sample_focal_plane_proportional_to_error ? m_nerf.training.error_map.cdf_y.data() : nullptr, -- GitLab