diff --git a/include/neural-graphics-primitives/camera_path.h b/include/neural-graphics-primitives/camera_path.h
index c4cba346525477883d45d474609ef51926506f81..b520b69576be19a8e0dde37792d06cc70cf36a24 100644
--- a/include/neural-graphics-primitives/camera_path.h
+++ b/include/neural-graphics-primitives/camera_path.h
@@ -53,11 +53,11 @@ struct CameraKeyframe {
 	CameraKeyframe() = default;
 	CameraKeyframe(const Eigen::Vector4f &r, const Eigen::Vector3f &t, float sl, float sc, float fv, float df, int gm, float gyc) : R(r), T(t), slice(sl), scale(sc), fov(fv), aperture_size(df), glow_mode(gm), glow_y_cutoff(gyc) {}
 	CameraKeyframe(Eigen::Matrix<float, 3, 4> m, float sl, float sc, float fv, float df, int gm, float gyc) : slice(sl), scale(sc), fov(fv), aperture_size(df), glow_mode(gm), glow_y_cutoff(gyc) { T=m.col(3); R=Eigen::Quaternionf(m.block<3,3>(0,0)).coeffs();  }
-	CameraKeyframe operator*(float f) const { return {R*f,T*f,slice*f,scale*f,fov*f,aperture_size*f,glow_mode,glow_y_cutoff*f}; }
+	CameraKeyframe operator*(float f) const { return {R*f, T*f, slice*f, scale*f, fov*f, aperture_size*f, glow_mode, glow_y_cutoff*f}; }
 	CameraKeyframe operator+(const CameraKeyframe &rhs) const {
 		Eigen::Vector4f Rr=rhs.R;
 		if (Rr.dot(R)<0.f) Rr=-Rr;
-		return {R+Rr,T+rhs.T,slice+rhs.slice,scale+rhs.scale,fov+rhs.fov,aperture_size+rhs.aperture_size,glow_mode,glow_y_cutoff+rhs.glow_y_cutoff};
+		return {R+Rr, T+rhs.T, slice+rhs.slice, scale+rhs.scale, fov+rhs.fov, aperture_size+rhs.aperture_size, glow_mode, glow_y_cutoff+rhs.glow_y_cutoff};
 	}
 	bool SamePosAs(const CameraKeyframe &rhs) const {
 		return (T-rhs.T).norm()<0.0001f && fabsf(R.dot(rhs.R))>=0.999f;
@@ -88,8 +88,8 @@ struct CameraPath {
 #ifdef NGP_GUI
 	ImGuizmo::MODE m_gizmo_mode = ImGuizmo::LOCAL;
 	ImGuizmo::OPERATION m_gizmo_op = ImGuizmo::TRANSLATE;
-	int imgui(char path_filename_buf[128], float frame_milliseconds, Eigen::Matrix<float, 3, 4> &camera, float slice_plane_z, float scale, float fov, float aperture_size, float bounding_radius, const Eigen::Matrix<float, 3, 4> &first_xform, int glow_mode, float glow_y_cutoff);
-	bool imgui_viz(ImDrawList* list, Eigen::Matrix<float, 4, 4> &view2proj, Eigen::Matrix<float, 4, 4> &world2proj, Eigen::Matrix<float, 4, 4> &world2view, Eigen::Vector2f focal, float aspect);
+	int imgui(char path_filename_buf[128], float frame_milliseconds, Eigen::Matrix<float, 3, 4>& camera, float slice_plane_z, float scale, float fov, float aperture_size, float bounding_radius, const Eigen::Matrix<float, 3, 4>& first_xform, int glow_mode, float glow_y_cutoff);
+	bool imgui_viz(ImDrawList* list, Eigen::Matrix<float, 4, 4>& view2proj, Eigen::Matrix<float, 4, 4>& world2proj, Eigen::Matrix<float, 4, 4>& world2view, Eigen::Vector2f focal, float aspect);
 #endif
 };
 
diff --git a/include/neural-graphics-primitives/takikawa_encoding.cuh b/include/neural-graphics-primitives/takikawa_encoding.cuh
index af8bb168962f759a60875fa7c21fd004b99923fe..37ee159431e888af5ff6b84297d8dc7df3bf8553 100644
--- a/include/neural-graphics-primitives/takikawa_encoding.cuh
+++ b/include/neural-graphics-primitives/takikawa_encoding.cuh
@@ -62,12 +62,12 @@ __global__ void kernel_takikawa(
 			Eigen::Vector3f pos_derivative;
 
 			if (interpolation_type == tcnn::InterpolationType::Linear) {
-				#pragma unroll
+				NGP_PRAGMA_UNROLL
 				for (uint32_t dim = 0; dim < 3; ++dim) {
 					pos_derivative[dim] = 1.0f;
 				}
 			} else {
-				#pragma unroll
+				NGP_PRAGMA_UNROLL
 				for (uint32_t dim = 0; dim < 3; ++dim) {
 					pos_derivative[dim] = tcnn::smoothstep_derivative(pos[dim]);
 					pos[dim] = tcnn::smoothstep(pos[dim]);
@@ -78,11 +78,11 @@ __global__ void kernel_takikawa(
 				// Tri-linear interpolation
 				tcnn::vector_t<T, N_FEATURES_PER_LEVEL> result = {0};
 
-				#pragma unroll
+				NGP_PRAGMA_UNROLL
 				for (uint32_t idx = 0; idx < 8; ++idx) {
 					float weight = 1;
 
-					#pragma unroll
+					NGP_PRAGMA_UNROLL
 					for (uint32_t dim = 0; dim < 3; ++dim) {
 						if ((idx & (1<<dim)) == 0) {
 							weight *= 1 - pos[dim];
@@ -95,13 +95,13 @@ __global__ void kernel_takikawa(
 					auto val = *(tcnn::vector_t<T, N_FEATURES_PER_LEVEL>*)&grid[param_idx];
 
 					// Read params
-					#pragma unroll
+					NGP_PRAGMA_UNROLL
 					for (uint32_t feature = 0; feature < N_FEATURES_PER_LEVEL; ++feature) {
 						result[feature] += (T)(weight * (float)val[feature]);
 					}
 				}
 
-				#pragma unroll
+				NGP_PRAGMA_UNROLL
 				for (uint32_t feature = 0; feature < N_FEATURES_PER_LEVEL; ++feature) {
 					data_out(level * N_FEATURES_PER_LEVEL + feature, i) = result[feature];
 				}
@@ -111,16 +111,16 @@ __global__ void kernel_takikawa(
 			if (dy_dx) {
 				const float scale = scalbnf(1.0f, level + starting_level);
 
-				#pragma unroll
+				NGP_PRAGMA_UNROLL
 				for (uint32_t grad_dim = 0; grad_dim < 3; ++grad_dim) {
 					tcnn::vector_fullp_t<N_FEATURES_PER_LEVEL> grad = {0};
 
-					#pragma unroll
+					NGP_PRAGMA_UNROLL
 					for (uint32_t idx = 0; idx < 4; ++idx) {
 						float weight = scale;
 						uint32_t child_idx = 0;
 
-						#pragma unroll
+						NGP_PRAGMA_UNROLL
 						for (uint32_t non_grad_dim = 0; non_grad_dim < 2; ++non_grad_dim) {
 							const uint32_t dim = non_grad_dim >= grad_dim ? (non_grad_dim+1) : non_grad_dim;
 
@@ -139,7 +139,7 @@ __global__ void kernel_takikawa(
 						param_idx = node.vertices[child_idx] * N_FEATURES_PER_LEVEL;
 						auto val_right = *(tcnn::vector_t<T, N_FEATURES_PER_LEVEL>*)&grid[param_idx];
 
-						#pragma unroll
+						NGP_PRAGMA_UNROLL
 						for (uint32_t feature = 0; feature < N_FEATURES_PER_LEVEL; ++feature) {
 							((float*)&grad)[feature] += weight * ((float)((T*)&val_right)[feature] - (float)((T*)&val_left)[feature]) * pos_derivative[grad_dim];
 						}
@@ -156,7 +156,7 @@ __global__ void kernel_takikawa(
 		// Set output to zero for levels that were not reached
 		level = max(0, level-(int)starting_level);
 		for (; level < n_levels; ++level) {
-			#pragma unroll
+			NGP_PRAGMA_UNROLL
 			for (uint32_t f = 0; f < N_FEATURES_PER_LEVEL; ++f) {
 				data_out(level * N_FEATURES_PER_LEVEL + f, i) = (T)0.0f;
 			}
@@ -219,7 +219,7 @@ __global__ void kernel_takikawa_backward(
 			level -= starting_level;
 
 			if (interpolation_type == tcnn::InterpolationType::Smoothstep) {
-				#pragma unroll
+				NGP_PRAGMA_UNROLL
 				for (uint32_t dim = 0; dim < 3; ++dim) {
 					pos[dim] = tcnn::smoothstep(pos[dim]);
 				}
@@ -227,18 +227,18 @@ __global__ void kernel_takikawa_backward(
 
 			tcnn::vector_t<T, N_FEATURES_PER_LEVEL> grad;
 
-			#pragma unroll
+			NGP_PRAGMA_UNROLL
 			for (uint32_t f = 0; f < N_FEATURES_PER_LEVEL; ++f) {
 				grad[f] = dL_dy(N_FEATURES_PER_LEVEL * level + f, i);
 			}
 
 			// Tri-linear interpolation
 
-			#pragma unroll
+			NGP_PRAGMA_UNROLL
 			for (uint32_t idx = 0; idx < 8; ++idx) {
 				float weight = 1;
 
-				#pragma unroll
+				NGP_PRAGMA_UNROLL
 				for (uint32_t dim = 0; dim < 3; ++dim) {
 					if ((idx & (1<<dim)) == 0) {
 						weight *= 1 - pos[dim];
@@ -251,7 +251,7 @@ __global__ void kernel_takikawa_backward(
 
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 // atomicAdd(__half2) is only supported with compute capability 60 and above
 				if (N_FEATURES_PER_LEVEL > 1 && std::is_same<GRAD_T, __half>::value) {
-					#pragma unroll
+					NGP_PRAGMA_UNROLL
 					for (uint32_t feature = 0; feature < N_FEATURES_PER_LEVEL; feature += 2) {
 						__half2 v = {(__half)((float)grad[feature] * weight), (__half)((float)grad[feature+1] * weight)};
 						atomicAdd((__half2*)&params_gradient[param_idx + feature], v);
@@ -263,7 +263,7 @@ __global__ void kernel_takikawa_backward(
 						// Should never happen
 						//printf("Attempted to use atomicAdd(__half)\n")
 					} else {
-						#pragma unroll
+						NGP_PRAGMA_UNROLL
 						for (uint32_t f = 0; f < N_FEATURES_PER_LEVEL; ++f) {
 							atomicAdd((float*)&params_gradient[param_idx], (float)grad[f] * weight);
 						}
diff --git a/include/neural-graphics-primitives/triangle_octree.cuh b/include/neural-graphics-primitives/triangle_octree.cuh
index 6e1bfeaa0abf0fe3864ec8a6c4f1cd83cb29f826..8ce4b89ff70ee1fdc1114a4e01eb8128e8deb7eb 100644
--- a/include/neural-graphics-primitives/triangle_octree.cuh
+++ b/include/neural-graphics-primitives/triangle_octree.cuh
@@ -237,7 +237,7 @@ public:
 
 			uint8_t child_in_node = 0;
 
-			#pragma unroll
+			NGP_PRAGMA_UNROLL
 			for (uint8_t i = 0; i < 3; ++i) {
 				if (pos[i] >= 0.5f) {
 					child_in_node |= (1 << i);
@@ -262,7 +262,7 @@ public:
 		for (uint8_t depth = 0; depth < max_depth-1; ++depth) {
 			uint8_t child_in_node = 0;
 
-			#pragma unroll
+			NGP_PRAGMA_UNROLL
 			for (uint8_t i = 0; i < 3; ++i) {
 				if (pos[i] >= 0.5f) {
 					child_in_node |= (1 << i);
diff --git a/scripts/run.py b/scripts/run.py
index cd7cdd9e245818270862ee38bd5d76bd0ff6e7a0..aed15f889de280c55967dd7a38cebc133b2bd62c 100644
--- a/scripts/run.py
+++ b/scripts/run.py
@@ -65,8 +65,7 @@ def parse_args():
 	parser.add_argument("--sharpen", default=0, help="Set amount of sharpening applied to NeRF training images. Range 0.0 to 1.0.")
 
 
-	args = parser.parse_args()
-	return args
+	return parser.parse_args()
 
 if __name__ == "__main__":
 	args = parse_args()
diff --git a/src/camera_path.cu b/src/camera_path.cu
index 4b57fca1c3e2307c0ec4c8c1f54289df630e74ee..88d8be0bdd865791827726d9d1505ba05bcafe2b 100644
--- a/src/camera_path.cu
+++ b/src/camera_path.cu
@@ -46,7 +46,7 @@ CameraKeyframe lerp(const CameraKeyframe& p0, const CameraKeyframe& p1, float t,
 		p0.aperture_size + (p1.aperture_size - p0.aperture_size) * t,
 		// Note, the glow mode from the previous frame is used, since the modes cannot be interpolated
 		p0.glow_mode,
-		p0.glow_y_cutoff + (p1.glow_y_cutoff - p0.glow_y_cutoff) * t
+		p0.glow_y_cutoff + (p1.glow_y_cutoff - p0.glow_y_cutoff) * t,
 	};
 }
 
@@ -86,16 +86,16 @@ void from_json(bool is_first, const json& j, CameraKeyframe& p, const CameraKeyf
 
 		if (load_relative_to_first) {
 	 		Eigen::Matrix4f ref4 = Eigen::Matrix4f::Identity();
-	 		ref4.block<3,4>(0,0) = ref;
+	 		ref4.block<3, 4>(0, 0) = ref;
 
 	 		Eigen::Matrix4f first4 = Eigen::Matrix4f::Identity();
-	 		first4.block<3,4>(0,0) = first.m();
+	 		first4.block<3, 4>(0, 0) = first.m();
 
 	 		Eigen::Matrix4f p4 = Eigen::Matrix4f::Identity();
-	 		p4.block<3,4>(0,0) = p.m();
+	 		p4.block<3, 4>(0, 0) = p.m();
 
 	 		auto cur4 = ref4 * first4.inverse() * p4;
-	 		p.from_m(cur4.block<3,4>(0,0));
+	 		p.from_m(cur4.block<3, 4>(0, 0));
 		}
 	}
 	j.at("slice").get_to(p.slice);
@@ -141,7 +141,7 @@ void CameraPath::load(const std::string& filepath_string, const Eigen::Matrix<fl
 }
 
 #ifdef NGP_GUI
-int CameraPath::imgui(char path_filename_buf[128], float frame_milliseconds, Matrix<float, 3, 4> &camera, float slice_plane_z, float scale, float fov, float aperture_size, float bounding_radius, const Eigen::Matrix<float, 3, 4> &first_xform, int glow_mode, float glow_y_cutoff) {
+int CameraPath::imgui(char path_filename_buf[128], float frame_milliseconds, Matrix<float, 3, 4>& camera, float slice_plane_z, float scale, float fov, float aperture_size, float bounding_radius, const Eigen::Matrix<float, 3, 4>& first_xform, int glow_mode, float glow_y_cutoff) {
 	int n=std::max(0,int(m_keyframes.size())-1);
 	int read= 0;					// 1=smooth, 2=hard
 	if (!m_keyframes.empty()) {
diff --git a/src/nerf_loader.cu b/src/nerf_loader.cu
index 447bc27ec39268fb399f3b6eac8b6281de648978..01893c0d2b25b11c497db0cc2f8161a6252cb912 100644
--- a/src/nerf_loader.cu
+++ b/src/nerf_loader.cu
@@ -232,7 +232,7 @@ void read_camera_distortion(const nlohmann::json &json, CameraDistortion &camera
 	}
 
 	if (json.contains("rolling_shutter")) {
-		// the rolling shutter is a float4 of [A,B,C,D] where the time
+		// The rolling shutter is a float4 of [A,B,C,D] where the time
 		// for each pixel is t= A + B * u + C * v + D * motionblur_time,
 		// where u and v are the pixel coordinates within (0-1).
 		// The resulting t is used to interpolate between the start
@@ -624,7 +624,7 @@ NerfDataset load_nerf(const std::vector<filesystem::path>& jsonpaths, float shar
 					if (wa != dst.res.x() || ha != dst.res.y()) {
 						throw std::runtime_error{fmt::format("Depth image {} has wrong resolution.", depthpath.str())};
 					}
-					// tlog::success() << "Depth loaded from " << depthpath;
+					//tlog::success() << "Depth loaded from " << depthpath;
 				}
 			}
 
diff --git a/src/render_buffer.cu b/src/render_buffer.cu
index 8f0831effb20e11faf00b9ac40398263ca473947..55971646a3dd0eb4abc105ba7ea3e9d7cc0c6f7c 100644
--- a/src/render_buffer.cu
+++ b/src/render_buffer.cu
@@ -440,18 +440,6 @@ __device__ Array3f colormap_turbo(float x) {
 	};
 }
 
-__device__ Array3f colormap_viridis(float x) {
-	const Array3f c0 = Array3f{0.2777273272234177f, 0.005407344544966578f, 0.3340998053353061f};
-	const Array3f c1 = Array3f{0.1050930431085774f, 1.404613529898575f, 1.384590162594685f};
-	const Array3f c2 = Array3f{-0.3308618287255563f, 0.214847559468213f, 0.09509516302823659f};
-	const Array3f c3 = Array3f{-4.634230498983486f, -5.799100973351585f, -19.33244095627987f};
-	const Array3f c4 = Array3f{6.228269936347081f, 14.17993336680509f, 56.69055260068105f};
-	const Array3f c5 = Array3f{4.776384997670288f, -13.74514537774601f, -65.35303263337234f};
-	const Array3f c6 = Array3f{-5.435455855934631f, 4.645852612178535f, 26.3124352495832f};
-	x = __saturatef(x);
-	return (c0+x*(c1+x*(c2+x*(c3+x*(c4+x*(c5+x*c6))))));
-}
-
 __global__ void overlay_depth_kernel(
 	Vector2i resolution,
 	float alpha,
@@ -485,22 +473,33 @@ __global__ void overlay_depth_kernel(
 	uint32_t idx = x + resolution.x() * y;
 	uint32_t srcidx = srcx + image_resolution.x() * srcy;
 
-    Array4f color;
+	Array4f color;
 	if (srcx >= image_resolution.x() || srcy >= image_resolution.y() || srcx < 0 || srcy < 0) {
-        color = {0.0f, 0.0f, 0.0f, 0.0f};
+		color = {0.0f, 0.0f, 0.0f, 0.0f};
 	} else {
-        float depth_value = depth[srcidx] * depth_scale;
-        Array3f c = colormap_turbo(depth_value);
-        color = {c[0], c[1], c[2], 1.0f};
+		float depth_value = depth[srcidx] * depth_scale;
+		Array3f c = colormap_turbo(depth_value);
+		color = {c[0], c[1], c[2], 1.0f};
 	}
 
-
 	Array4f prev_color;
 	surf2Dread((float4*)&prev_color, surface, x * sizeof(float4), y);
 	color = color * alpha + prev_color * (1.f-alpha);
 	surf2Dwrite(to_float4(color), surface, x * sizeof(float4), y);
 }
 
+__device__ Array3f colormap_viridis(float x) {
+	const Array3f c0 = Array3f{0.2777273272234177f, 0.005407344544966578f, 0.3340998053353061f};
+	const Array3f c1 = Array3f{0.1050930431085774f, 1.404613529898575f, 1.384590162594685f};
+	const Array3f c2 = Array3f{-0.3308618287255563f, 0.214847559468213f, 0.09509516302823659f};
+	const Array3f c3 = Array3f{-4.634230498983486f, -5.799100973351585f, -19.33244095627987f};
+	const Array3f c4 = Array3f{6.228269936347081f, 14.17993336680509f, 56.69055260068105f};
+	const Array3f c5 = Array3f{4.776384997670288f, -13.74514537774601f, -65.35303263337234f};
+	const Array3f c6 = Array3f{-5.435455855934631f, 4.645852612178535f, 26.3124352495832f};
+	x = __saturatef(x);
+	return (c0+x*(c1+x*(c2+x*(c3+x*(c4+x*(c5+x*c6))))));
+}
+
 __global__ void overlay_false_color_kernel(Vector2i resolution, Vector2i training_resolution, bool to_srgb, int fov_axis, cudaSurfaceObject_t surface, const float *error_map, Vector2i error_map_resolution, const float *average, float brightness, bool viridis) {
 	uint32_t x = threadIdx.x + blockDim.x * blockIdx.x;
 	uint32_t y = threadIdx.y + blockDim.y * blockIdx.y;
diff --git a/src/testbed.cu b/src/testbed.cu
index afb1b5ba8ab4880b4320cdfda559a5ee0dbf912b..478e1c5630538ea671a541bba3efde59b5c7562e 100644
--- a/src/testbed.cu
+++ b/src/testbed.cu
@@ -1273,9 +1273,7 @@ bool Testbed::keyboard_event() {
 	}
 	if (ImGui::IsKeyPressed('M')) {
 		m_single_view = !m_single_view;
-		if (m_single_view) {
-			set_visualized_dim(-1);
-		}
+		set_visualized_dim(-1);
 		reset_accumulation();
 	}
 	if (ImGui::IsKeyPressed('T')) {
@@ -2753,7 +2751,7 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr
 		// Overlay the ground truth image if requested
 		if (m_render_ground_truth) {
 			auto const& metadata = m_nerf.training.dataset.metadata[m_nerf.training.view];
-			if(m_ground_truth_render_mode == EGroundTruthRenderMode::Shade) {
+			if (m_ground_truth_render_mode == EGroundTruthRenderMode::Shade) {
 				render_buffer.overlay_image(
 					m_ground_truth_alpha,
 					Array3f::Constant(m_exposure) + m_nerf.training.cam_exposure[m_nerf.training.view].variable(),
@@ -2767,18 +2765,17 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr
 					Vector2f::Constant(0.5f),
 					m_stream.get()
 				);
-			}
-			else if(m_ground_truth_render_mode == EGroundTruthRenderMode::Depth && metadata.depth) {
-                render_buffer.overlay_depth(
-                    m_ground_truth_alpha,
-                    metadata.depth,
+			} else if (m_ground_truth_render_mode == EGroundTruthRenderMode::Depth && metadata.depth) {
+				render_buffer.overlay_depth(
+					m_ground_truth_alpha,
+					metadata.depth,
 					1.0f/m_nerf.training.dataset.scale,
-                    metadata.resolution,
-                    m_fov_axis,
-                    m_zoom,
+					metadata.resolution,
+					m_fov_axis,
+					m_zoom,
 					Vector2f::Constant(0.5f),
-                    m_stream.get()
-                ); 
+					m_stream.get()
+				);
 			}
 		}
 
diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu
index 36ae7d1df03fb0924762e1ea296a95496478d2ff..b604496c49092d8391f34eb33cd9538db474d50a 100644
--- a/src/testbed_nerf.cu
+++ b/src/testbed_nerf.cu
@@ -572,7 +572,7 @@ __global__ void grid_to_bitfield(
 
 	float thresh = std::min(NERF_MIN_OPTICAL_THICKNESS(), *mean_density_ptr);
 
-	#pragma unroll
+	NGP_PRAGMA_UNROLL
 	for (uint8_t j = 0; j < 8; ++j) {
 		bits |= grid[i*8+j] > thresh ? ((uint8_t)1 << j) : 0;
 	}
@@ -589,7 +589,7 @@ __global__ void bitfield_max_pool(const uint32_t n_elements,
 
 	uint8_t bits = 0;
 
-	#pragma unroll
+	NGP_PRAGMA_UNROLL
 	for (uint8_t j = 0; j < 8; ++j) {
 		// If any bit is set in the previous level, set this
 		// level's bit. (Max pooling.)
@@ -1659,7 +1659,7 @@ __global__ void compute_cam_gradient_train_nerf(
 
 	if (cam_pos_gradient) {
 		// Atomically reduce the ray gradient into the xform gradient
-		#pragma unroll
+		NGP_PRAGMA_UNROLL
 		for (uint32_t j = 0; j < 3; ++j) {
 			atomicAdd(&cam_pos_gradient[img][j], ray_gradient.o[j] / xy_pdf);
 		}
@@ -1673,7 +1673,7 @@ __global__ void compute_cam_gradient_train_nerf(
 		Vector3f angle_axis = ray.d.cross(ray_gradient.d);
 
 		// Atomically reduce the ray gradient into the xform gradient
-		#pragma unroll
+		NGP_PRAGMA_UNROLL
 		for (uint32_t j = 0; j < 3; ++j) {
 			atomicAdd(&cam_rot_gradient[img][j], angle_axis[j] / xy_pdf);
 		}