Skip to content
Snippets Groups Projects
Commit 395e2437 authored by Thomas Müller's avatar Thomas Müller
Browse files

DLSS: use non-premultiplied alpha (doesn't completely remove jitter, but fixes black halos)

parent 9143822f
No related branches found
No related tags found
No related merge requests found
......@@ -534,7 +534,7 @@ __global__ void overlay_false_color_kernel(Vector2i resolution, Vector2i trainin
surf2Dwrite(to_float4(color), surface, x * sizeof(float4), y);
}
__global__ void tonemap_kernel(Vector2i resolution, float exposure, Array4f background_color, Array4f* accumulate_buffer, EColorSpace color_space, EColorSpace output_color_space, ETonemapCurve tonemap_curve, bool clamp_output_color, cudaSurfaceObject_t surface) {
__global__ void tonemap_kernel(Vector2i resolution, float exposure, Array4f background_color, Array4f* accumulate_buffer, EColorSpace color_space, EColorSpace output_color_space, ETonemapCurve tonemap_curve, bool clamp_output_color, bool unmultiply_alpha, cudaSurfaceObject_t surface) {
uint32_t x = threadIdx.x + blockDim.x * blockIdx.x;
uint32_t y = threadIdx.y + blockDim.y * blockIdx.y;
......@@ -556,6 +556,11 @@ __global__ void tonemap_kernel(Vector2i resolution, float exposure, Array4f back
color.w() += weight;
color.head<3>() = tonemap(color.head<3>(), Array3f::Constant(exposure), tonemap_curve, color_space, output_color_space);
if (unmultiply_alpha && color.w() > 0.0f) {
color.head<3>() /= color.w();
}
if (clamp_output_color) {
color = color.cwiseMax(0.0f).cwiseMin(1.0f);
}
......@@ -577,6 +582,12 @@ __global__ void dlss_splat_kernel(
float4 color;
surf2Dread(&color, dlss_surface, x * sizeof(float4), y);
// DLSS operates on non-premultiplied alpha, so multiply it back in
color.x *= color.w;
color.y *= color.w;
color.z *= color.w;
surf2Dwrite(color, surface, x * sizeof(float4), y);
}
......@@ -663,6 +674,7 @@ void CudaRenderBuffer::tonemap(float exposure, const Array4f& background_color,
output_color_space,
m_tonemap_curve,
m_dlss && output_color_space == EColorSpace::SRGB,
(bool)m_dlss, // DLSS seems to perform best with non-premultiplied alpha (probably trained on such data)
m_dlss ? m_dlss->frame() : surface()
);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment