From 4f593d08619646efc7a1f9254187ef1427bb7cf5 Mon Sep 17 00:00:00 2001
From: Thomas Pickles <thomas.pickles@ens-lyon.fr>
Date: Mon, 6 Mar 2023 13:27:03 +0100
Subject: [PATCH] Greyscale now implemented as command line option

Options can now be selected at
command line without requiring
separate builds of the code
---
 include/neural-graphics-primitives/common.h  |   3 +-
 include/neural-graphics-primitives/testbed.h |   5 +
 scripts/run.py                               |  19 +++-
 src/python_api.cu                            |   4 +
 src/testbed.cu                               |   1 +
 src/testbed_nerf.cu                          | 111 ++++++++++++++-----
 6 files changed, 114 insertions(+), 29 deletions(-)

diff --git a/include/neural-graphics-primitives/common.h b/include/neural-graphics-primitives/common.h
index 3a632b4..b9401c4 100644
--- a/include/neural-graphics-primitives/common.h
+++ b/include/neural-graphics-primitives/common.h
@@ -135,12 +135,13 @@ enum class ELossType : int {
 static constexpr const char* LossTypeStr = "L2\0L1\0MAPE\0SMAPE\0Huber\0LogL1\0RelativeL2\0\0";
 
 enum class ENerfActivation : int {
+	Constant,
 	None,
 	ReLU,
 	Logistic,
 	Exponential,
 };
-static constexpr const char* NerfActivationStr = "None\0ReLU\0Logistic\0Exponential\0\0";
+static constexpr const char* NerfActivationStr = "Constant\0None\0ReLU\0Logistic\0Exponential\0\0";
 
 enum class EMeshSdfMode : int {
 	Watertight,
diff --git a/include/neural-graphics-primitives/testbed.h b/include/neural-graphics-primitives/testbed.h
index af403d8..12e366a 100644
--- a/include/neural-graphics-primitives/testbed.h
+++ b/include/neural-graphics-primitives/testbed.h
@@ -191,6 +191,7 @@ public:
 			int visualized_dim,
 			ENerfActivation rgb_activation,
 			ENerfActivation density_activation,
+			int render_no_attenuation,
 			int show_accel,
 			float min_transmittance,
 			float glow_y_cutoff,
@@ -683,6 +684,7 @@ public:
 			NerfCounters counters_rgb;
 
 			bool random_bg_color = true;
+			bool grey_loss = false;
 			bool linear_colors = false;
 			ELossType loss_type = ELossType::L2;
 			ELossType depth_loss_type = ELossType::L1;
@@ -755,6 +757,9 @@ public:
 
 		float render_min_transmittance = 0.01f;
 
+		bool render_no_attenuation = false;
+		bool train_no_attenuation = false;
+
 		float glow_y_cutoff = 0.f;
 		int glow_mode = 0;
 	} m_nerf;
diff --git a/scripts/run.py b/scripts/run.py
index 526b191..ca20be5 100644
--- a/scripts/run.py
+++ b/scripts/run.py
@@ -57,7 +57,7 @@ def parse_args():
 	parser.add_argument("--save_mesh", default="", help="Output a marching-cubes based mesh from the NeRF or SDF model. Supports OBJ and PLY format.")
 	parser.add_argument("--marching_cubes_res", default=256, type=int, help="Sets the resolution for the marching cubes grid.")
 
-	parser.add_argument("--save_slices", action="store_true", help="Output slices after training.")
+	parser.add_argument("--save_slices", action="store_true", help="Output slices of density after training.")
 	parser.add_argument("--slices_res", default=256, type=int, help="Sets the resolution for the slices png.")
 
 	parser.add_argument("--width", "--screenshot_w", type=int, default=0, help="Resolution width of GUI and screenshots.")
@@ -75,6 +75,11 @@ def parse_args():
 	parser.add_argument("--tomonerf", type=int, default=0, help="Which transforms to apply [single_channel, exponentiate img data, crop].")
 	parser.add_argument("--crop", nargs=3, type=float, help="Percentage to crop unit cube to.")
 
+	parser.add_argument("--grey_loss",action="store_true", help="Ignore colour info in target loss function.")
+	parser.add_argument("--no_colour",action="store_true", help="Network to grey.")
+	parser.add_argument("--train_no_attenuation",action="store_true", help="Not totally sure about this one, tbh! But it leads to the training exploding!")
+	parser.add_argument("--render_no_attenuation",action="store_true", help="Render images without attenuation of voxels in front.")
+
 	return parser.parse_args()
 
 def get_scene(scene):
@@ -189,6 +194,18 @@ if __name__ == "__main__":
 
 	testbed.nerf.render_with_lens_distortion = True
 
+	if args.grey_loss:
+		testbed.nerf.training.grey_loss = True
+
+	if args.no_colour:
+		testbed.nerf.rgb_activation = ngp.NerfActivation.Constant
+
+	if args.train_no_attenuation:
+		testbed.nerf.train_no_attenuation = True
+
+	if args.render_no_attenuation:
+		testbed.nerf.render_no_attenuation = True
+
 	network_stem = os.path.splitext(os.path.basename(args.network))[0] if args.network else "base"
 	if testbed.mode == ngp.TestbedMode.Sdf:
 		setup_colored_sdf(testbed, args.scene)
diff --git a/src/python_api.cu b/src/python_api.cu
index 7f8b4f8..eabab2e 100644
--- a/src/python_api.cu
+++ b/src/python_api.cu
@@ -292,6 +292,7 @@ PYBIND11_MODULE(pyngp, m) {
 		.export_values();
 
 	py::enum_<ENerfActivation>(m, "NerfActivation")
+		.value("Constant", ENerfActivation::Constant)
 		.value("None", ENerfActivation::None)
 		.value("ReLU", ENerfActivation::ReLU)
 		.value("Logistic", ENerfActivation::Logistic)
@@ -570,6 +571,8 @@ PYBIND11_MODULE(pyngp, m) {
 		.def_readwrite("render_lens", &Testbed::Nerf::render_lens)
 		.def_readwrite("rendering_min_transmittance", &Testbed::Nerf::render_min_transmittance)
 		.def_readwrite("render_min_transmittance", &Testbed::Nerf::render_min_transmittance)
+		.def_readwrite("render_no_attenuation", &Testbed::Nerf::render_no_attenuation)
+		.def_readwrite("train_no_attenuation", &Testbed::Nerf::train_no_attenuation)
 		.def_readwrite("cone_angle_constant", &Testbed::Nerf::cone_angle_constant)
 		.def_readwrite("visualize_cameras", &Testbed::Nerf::visualize_cameras)
 		.def_readwrite("glow_y_cutoff", &Testbed::Nerf::glow_y_cutoff)
@@ -619,6 +622,7 @@ PYBIND11_MODULE(pyngp, m) {
 		;
 
 	py::class_<Testbed::Nerf::Training>(nerf, "Training")
+		.def_readwrite("grey_loss", &Testbed::Nerf::Training::grey_loss)
 		.def_readwrite("random_bg_color", &Testbed::Nerf::Training::random_bg_color)
 		.def_readwrite("n_images_for_training", &Testbed::Nerf::Training::n_images_for_training)
 		.def_readwrite("linear_colors", &Testbed::Nerf::Training::linear_colors)
diff --git a/src/testbed.cu b/src/testbed.cu
index 2f06b23..8c41134 100644
--- a/src/testbed.cu
+++ b/src/testbed.cu
@@ -858,6 +858,7 @@ void Testbed::imgui() {
 
 		if (m_testbed_mode == ETestbedMode::Nerf && ImGui::TreeNode("NeRF training options")) {
 			ImGui::Checkbox("Random bg color", &m_nerf.training.random_bg_color);
+			ImGui::Checkbox("Learn grey", &m_nerf.training.grey_loss);
 			ImGui::SameLine();
 			ImGui::Checkbox("Snap to pixel centers", &m_nerf.training.snap_to_pixel_centers);
 			ImGui::SliderFloat("Near distance", &m_nerf.training.near_distance, 0.0f, 1.0f);
diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu
index bf08d18..708ffce 100644
--- a/src/testbed_nerf.cu
+++ b/src/testbed_nerf.cu
@@ -214,27 +214,27 @@ inline __device__ float advance_to_next_voxel(float t, float cone_angle, const V
 }
 
 __device__ float network_to_rgb(float val, ENerfActivation activation) {
-	return 1.f; // always return 1
-	// switch (activation) {
-	// 	case ENerfActivation::None: return val;
-	// 	case ENerfActivation::ReLU: return val > 0.0f ? val : 0.0f;
-	// 	case ENerfActivation::Logistic: return tcnn::logistic(val);
-	// 	case ENerfActivation::Exponential: return __expf(tcnn::clamp(val, -10.0f, 10.0f));
-	// 	default: assert(false);
-	// }
-	// return 0.0f;
+	switch (activation) {
+		case ENerfActivation::Constant: return 1.0f;
+		case ENerfActivation::None: return val;
+		case ENerfActivation::ReLU: return val > 0.0f ? val : 0.0f;
+		case ENerfActivation::Logistic: return tcnn::logistic(val);
+		case ENerfActivation::Exponential: return __expf(tcnn::clamp(val, -10.0f, 10.0f));
+		default: assert(false);
+	}
+	return 0.0f;
 }
 
 // No way to modify the derivative for rgb
 __device__ float network_to_rgb_derivative(float val, ENerfActivation activation) {
-	return 0.f; // no way to change rgb value
-	// switch (activation) {
-	// 	case ENerfActivation::None: return 1.0f;
-	// 	case ENerfActivation::ReLU: return val > 0.0f ? 1.0f : 0.0f;
-	// 	case ENerfActivation::Logistic: { float density = tcnn::logistic(val); return density * (1 - density); };
-	// 	case ENerfActivation::Exponential: return __expf(tcnn::clamp(val, -10.0f, 10.0f));
-	// 	default: assert(false);
-	// }
+	switch (activation) {
+		case ENerfActivation::Constant: return 0.0f;
+		case ENerfActivation::None: return 1.0f;
+		case ENerfActivation::ReLU: return val > 0.0f ? 1.0f : 0.0f;
+		case ENerfActivation::Logistic: { float density = tcnn::logistic(val); return density * (1 - density); };
+		case ENerfActivation::Exponential: return __expf(tcnn::clamp(val, -10.0f, 10.0f));
+		default: assert(false);
+	}
 }
 
 __device__ float network_to_density(float val, ENerfActivation activation) {
@@ -259,7 +259,6 @@ __device__ float network_to_density_derivative(float val, ENerfActivation activa
 	return 0.0f;
 }
 
-// Ignore neurons 0, 1 and 2!
 __device__ Array3f network_to_rgb(const tcnn::vector_t<tcnn::network_precision_t, 4>& local_network_output, ENerfActivation activation) {
 	return {
         network_to_rgb(float(local_network_output[0]), activation),
@@ -806,6 +805,30 @@ __global__ void generate_next_nerf_network_inputs(
 	payload.n_steps = n_steps;
 }
 
+// I'm a bit confused about the process.  There's
+// this function, `composite_kernel_nerf`, where
+// the network outputs are converted to rgba values
+// and involves the rendering equation.
+// There's also another function called
+// `compute_loss_kernel_train_nerf` which seems to
+// involve the rendering equation too.  It seems 
+// like these two need to be kept in sync with
+// each other regarding the method of rendering
+// Do these two get called one-at-a-time, in 
+// sequence?  Forward pass, backward pass?
+//  If so, where?
+
+// I think this creates the pixels in the rendered image
+// That is, from the nerf cloud, given our desired viewpoint,
+// create the 2d image.  
+
+// Note that we can also choose to render different types
+// of images, such as normals, albedo, etc
+
+// This might also be the place to change our projection
+// method, since that way we can generate images in the
+// same manner that Astra does, but still keep the NeRF
+// learning done in the same way
 __global__ void composite_kernel_nerf(
 	const uint32_t n_elements,
 	const uint32_t stride,
@@ -827,6 +850,7 @@ __global__ void composite_kernel_nerf(
 	const uint8_t* __restrict__ density_grid,
 	ENerfActivation rgb_activation,
 	ENerfActivation density_activation,
+	int render_no_attenuation,
 	int show_accel,
 	float min_transmittance
 ) {
@@ -993,8 +1017,20 @@ __global__ void composite_kernel_nerf(
 			rgb = Array3f::Constant(alpha);
 		}
 
-		local_rgba.head<3>() += rgb * weight;
-		local_rgba.w() += weight;
+		// Without attenuation, we want
+		// voxels at the back to shine
+		// through just as brightly as voxels
+		// at the front in our rendered image 
+		float contribution = render_no_attenuation == 1 ? alpha : weight;
+		
+		// TODO: I've got some concerns about this,
+		// principally because I don't totally
+		// understand what's the difference between
+		// the alpha channel (represented here as
+		// .w()), and the multiplier of the rgb
+		local_rgba.head<3>() += rgb * contribution; 
+		local_rgba.w() += contribution;
+
 		if (weight > payload.max_weight) {
 			payload.max_weight = weight;
 			local_depth = cam_fwd.dot(pos - camera_matrix.col(3));
@@ -1281,6 +1317,7 @@ __device__ LossAndGradient loss_and_gradient(const Vector3f& target, const Vecto
 	}
 }
 
+// I think this is the forward step of the training
 __global__ void compute_loss_kernel_train_nerf(
 	const uint32_t n_rays,
 	BoundingBox aabb,
@@ -1297,6 +1334,7 @@ __global__ void compute_loss_kernel_train_nerf(
 	Array3f background_color,
 	EColorSpace color_space,
 	bool train_with_random_bg_color,
+	bool train_with_grey_loss,
 	bool train_in_linear_colors,
 	const uint32_t n_training_images,
 	const TrainingImageMetadata* __restrict__ metadata,
@@ -1316,6 +1354,7 @@ __global__ void compute_loss_kernel_train_nerf(
 	ENerfActivation rgb_activation,
 	ENerfActivation density_activation,
 	bool snap_to_pixel_centers,
+	bool train_no_attenuation,
 	float* __restrict__ error_map,
 	const float* __restrict__ cdf_x_cond_y,
 	const float* __restrict__ cdf_y,
@@ -1367,7 +1406,13 @@ __global__ void compute_loss_kernel_train_nerf(
 
 		const float alpha = 1.f - __expf(-density * dt);
 		const float weight = alpha * T;
-		rgb_ray += weight * rgb;
+
+		const float contribution = train_no_attenuation ? alpha : weight;
+		
+		rgb_ray += contribution * rgb;
+
+		// TODO: what to do about depth
+		// and hitpoint?
 		hitpoint += weight * pos;
 		depth_ray += weight * cur_depth;
 		T *= (1.f - alpha);
@@ -1417,11 +1462,15 @@ __global__ void compute_loss_kernel_train_nerf(
 	// of our training data and ask the network to only learn 
 	// the alpha.  rgb values are now irrelevant, so shade
 	// will always be [1,1,1]
-	Array3f grey = Array3f::Constant(texsamp.w());
+	Array3f pixel_colour =	texsamp.head<3>();
+	
+	if (train_with_grey_loss) {
+		pixel_colour = Array3f::Constant(texsamp.w()) ;
+	};
 
 	Array3f rgbtarget;
 	if (train_in_linear_colors || color_space == EColorSpace::Linear) {
-		rgbtarget = exposure_scale * grey + (1.0f - texsamp.w()) * background_color;
+		rgbtarget = exposure_scale * pixel_colour + (1.0f - texsamp.w()) * background_color;
 
 		if (!train_in_linear_colors) {
 			rgbtarget = linear_to_srgb(rgbtarget);
@@ -1430,7 +1479,7 @@ __global__ void compute_loss_kernel_train_nerf(
 	} else if (color_space == EColorSpace::SRGB) {
 		background_color = linear_to_srgb(background_color);
 		if (texsamp.w() > 0) {
-			rgbtarget = linear_to_srgb(exposure_scale * grey / texsamp.w()) * texsamp.w() + (1.0f - texsamp.w()) * background_color;
+			rgbtarget = linear_to_srgb(exposure_scale * pixel_colour / texsamp.w()) * texsamp.w() + (1.0f - texsamp.w()) * background_color;
 		} else {
 			rgbtarget = background_color;
 		}
@@ -1534,7 +1583,12 @@ __global__ void compute_loss_kernel_train_nerf(
 		const float density = network_to_density(float(local_network_output[3]), density_activation);
 		const float alpha = 1.f - __expf(-density * dt);
 		const float weight = alpha * T;
-		rgb_ray2 += weight * rgb;
+
+		const float contribution = train_no_attenuation ? alpha : weight;
+
+		rgb_ray2 += contribution * rgb;
+
+		// TODO: contribution to depth?!
 		depth_ray2 += weight * depth;
 		T *= (1.f - alpha);
 
@@ -1544,8 +1598,6 @@ __global__ void compute_loss_kernel_train_nerf(
 
 		tcnn::vector_t<tcnn::network_precision_t, 4> local_dL_doutput;
 
-		// TURN OFF COLOUR-BASED TRAINING:
-		
 		// chain rule to go from dloss/drgb to dloss/dmlp_output
 		local_dL_doutput[0] = loss_scale * (dloss_by_drgb.x() * network_to_rgb_derivative(local_network_output[0], rgb_activation) + fmaxf(0.0f, output_l2_reg * (float)local_network_output[0])); // Penalize way too large color values
 		local_dL_doutput[1] = loss_scale * (dloss_by_drgb.y() * network_to_rgb_derivative(local_network_output[1], rgb_activation) + fmaxf(0.0f, output_l2_reg * (float)local_network_output[1]));
@@ -2100,6 +2152,7 @@ uint32_t Testbed::NerfTracer::trace(
 	int visualized_dim,
 	ENerfActivation rgb_activation,
 	ENerfActivation density_activation,
+	int render_no_attenuation,
 	int show_accel,
 	float min_transmittance,
 	float glow_y_cutoff,
@@ -2194,6 +2247,7 @@ uint32_t Testbed::NerfTracer::trace(
 			grid,
 			rgb_activation,
 			density_activation,
+			render_no_attenuation,
 			show_accel,
 			min_transmittance
 		);
@@ -2362,6 +2416,7 @@ void Testbed::render_nerf(
 			visualized_dimension,
 			m_nerf.rgb_activation,
 			m_nerf.density_activation,
+			m_nerf.render_no_attenuation ? 1 : 0,
 			m_nerf.show_accel,
 			m_nerf.render_min_transmittance,
 			m_nerf.glow_y_cutoff,
@@ -3271,6 +3326,7 @@ void Testbed::train_nerf_step(uint32_t target_batch_size, Testbed::NerfCounters&
 		m_background_color.head<3>(),
 		m_color_space,
 		m_nerf.training.random_bg_color,
+		m_nerf.training.grey_loss,
 		m_nerf.training.linear_colors,
 		m_nerf.training.n_images_for_training,
 		m_nerf.training.dataset.metadata_gpu.data(),
@@ -3290,6 +3346,7 @@ void Testbed::train_nerf_step(uint32_t target_batch_size, Testbed::NerfCounters&
 		m_nerf.rgb_activation,
 		m_nerf.density_activation,
 		m_nerf.training.snap_to_pixel_centers,
+		m_nerf.train_no_attenuation,
 		accumulate_error ? m_nerf.training.error_map.data.data() : nullptr,
 		sample_focal_plane_proportional_to_error ? m_nerf.training.error_map.cdf_x_cond_y.data() : nullptr,
 		sample_focal_plane_proportional_to_error ? m_nerf.training.error_map.cdf_y.data() : nullptr,
-- 
GitLab