-
Thomas Pickles authored
Options can now be selected at command line without requiring separate builds of the code
4f593d08
testbed.h 40.30 KiB
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
/** @file testbed.h
* @author Thomas Müller & Alex Evans, NVIDIA
*/
#pragma once
#include <neural-graphics-primitives/adam_optimizer.h>
#include <neural-graphics-primitives/camera_path.h>
#include <neural-graphics-primitives/common.h>
#include <neural-graphics-primitives/discrete_distribution.h>
#include <neural-graphics-primitives/nerf.h>
#include <neural-graphics-primitives/nerf_loader.h>
#include <neural-graphics-primitives/render_buffer.h>
#include <neural-graphics-primitives/sdf.h>
#include <neural-graphics-primitives/shared_queue.h>
#include <neural-graphics-primitives/thread_pool.h>
#include <neural-graphics-primitives/trainable_buffer.cuh>
#ifdef NGP_GUI
# include <neural-graphics-primitives/openxr_hmd.h>
#endif
#include <tiny-cuda-nn/multi_stream.h>
#include <tiny-cuda-nn/random.h>
#include <json/json.hpp>
#ifdef NGP_PYTHON
# include <pybind11/pybind11.h>
# include <pybind11/numpy.h>
#endif
#include <thread>
struct GLFWwindow;
TCNN_NAMESPACE_BEGIN
template <typename T> class Loss;
template <typename T> class Optimizer;
template <typename T> class Encoding;
template <typename T, typename PARAMS_T> class Network;
template <typename T, typename PARAMS_T, typename COMPUTE_T> class Trainer;
template <uint32_t N_DIMS, uint32_t RANK, typename T> class TrainableBuffer;
TCNN_NAMESPACE_END
NGP_NAMESPACE_BEGIN
template <typename T> class NerfNetwork;
class TriangleOctree;
class TriangleBvh;
struct Triangle;
class GLTexture;
class Testbed {
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
Testbed(ETestbedMode mode = ETestbedMode::None);
~Testbed();
Testbed(ETestbedMode mode, const fs::path& data_path) : Testbed(mode) { load_training_data(data_path); }
Testbed(ETestbedMode mode, const fs::path& data_path, const fs::path& network_config_path) : Testbed(mode, data_path) { reload_network_from_file(network_config_path); }
Testbed(ETestbedMode mode, const fs::path& data_path, const nlohmann::json& network_config) : Testbed(mode, data_path) { reload_network_from_json(network_config); }
bool clear_tmp_dir();
void update_imgui_paths();
void load_training_data(const fs::path& path);
void reload_training_data();
void clear_training_data();
void set_mode(ETestbedMode mode);
using distance_fun_t = std::function<void(uint32_t, const Eigen::Vector3f*, float*, cudaStream_t)>;
using normals_fun_t = std::function<void(uint32_t, const Eigen::Vector3f*, Eigen::Vector3f*, cudaStream_t)>;
class SphereTracer {
public:
SphereTracer() {}
void init_rays_from_camera(
uint32_t spp,
const Eigen::Vector2i& resolution,
const Eigen::Vector2f& focal_length,
const Eigen::Matrix<float, 3, 4>& camera_matrix,
const Eigen::Vector2f& screen_center,
const Eigen::Vector3f& parallax_shift,
bool snap_to_pixel_centers,
const BoundingBox& aabb,
float floor_y,
float near_distance,
float plane_z,
float aperture_size,
const Foveation& foveation,
const Buffer2DView<const Eigen::Array4f>& envmap,
Eigen::Array4f* frame_buffer,
float* depth_buffer,
const Buffer2DView<const uint8_t>& hidden_area_mask,
const TriangleOctree* octree,
uint32_t n_octree_levels,
cudaStream_t stream
);
void init_rays_from_data(uint32_t n_elements, const RaysSdfSoa& data, cudaStream_t stream);
uint32_t trace_bvh(TriangleBvh* bvh, const Triangle* triangles, cudaStream_t stream);
uint32_t trace(
const distance_fun_t& distance_function,
float zero_offset,
float distance_scale,
float maximum_distance,
const BoundingBox& aabb,
const float floor_y,
const TriangleOctree* octree,
uint32_t n_octree_levels,
cudaStream_t stream
);
void enlarge(size_t n_elements, cudaStream_t stream);
RaysSdfSoa& rays_hit() { return m_rays_hit; }
RaysSdfSoa& rays_init() { return m_rays[0]; }
uint32_t n_rays_initialized() const { return m_n_rays_initialized; }
void set_trace_shadow_rays(bool val) { m_trace_shadow_rays = val; }
void set_shadow_sharpness(float val) { m_shadow_sharpness = val; }
private:
RaysSdfSoa m_rays[2];
RaysSdfSoa m_rays_hit;
uint32_t* m_hit_counter;
uint32_t* m_alive_counter;
uint32_t m_n_rays_initialized = 0;
float m_shadow_sharpness = 2048.f;
bool m_trace_shadow_rays = false;
tcnn::GPUMemoryArena::Allocation m_scratch_alloc;
};
class NerfTracer {
public:
NerfTracer() {}
void init_rays_from_camera(
uint32_t spp,
uint32_t padded_output_width,
uint32_t n_extra_dims,
const Eigen::Vector2i& resolution,
const Eigen::Vector2f& focal_length,
const Eigen::Matrix<float, 3, 4>& camera_matrix0,
const Eigen::Matrix<float, 3, 4>& camera_matrix1,
const Eigen::Vector4f& rolling_shutter,
const Eigen::Vector2f& screen_center,
const Eigen::Vector3f& parallax_shift,
bool snap_to_pixel_centers,
const BoundingBox& render_aabb,
const Eigen::Matrix3f& render_aabb_to_local,
float near_distance,
float plane_z,
float aperture_size,
const Foveation& foveation,
const Lens& lens,
const Buffer2DView<const Eigen::Array4f>& envmap,
const Buffer2DView<const Eigen::Vector2f>& distortion,
Eigen::Array4f* frame_buffer,
float* depth_buffer,
const Buffer2DView<const uint8_t>& hidden_area_mask,
const uint8_t* grid,
int show_accel,
float cone_angle_constant,
ERenderMode render_mode,
cudaStream_t stream
);
uint32_t trace(
NerfNetwork<precision_t>& network,
const BoundingBox& render_aabb,
const Eigen::Matrix3f& render_aabb_to_local,
const BoundingBox& train_aabb,
const Eigen::Vector2f& focal_length,
float cone_angle_constant,
const uint8_t* grid,
ERenderMode render_mode,
const Eigen::Matrix<float, 3, 4> &camera_matrix,
float depth_scale,
int visualized_layer,
int visualized_dim,
ENerfActivation rgb_activation,
ENerfActivation density_activation,
int render_no_attenuation,
int show_accel,
float min_transmittance,
float glow_y_cutoff,
int glow_mode,
const float* extra_dims_gpu,
cudaStream_t stream
);
void enlarge(size_t n_elements, uint32_t padded_output_width, uint32_t n_extra_dims, cudaStream_t stream);
RaysNerfSoa& rays_hit() { return m_rays_hit; }
RaysNerfSoa& rays_init() { return m_rays[0]; }
uint32_t n_rays_initialized() const { return m_n_rays_initialized; }
private:
RaysNerfSoa m_rays[2];
RaysNerfSoa m_rays_hit;
precision_t* m_network_output;
float* m_network_input;
uint32_t* m_hit_counter;
uint32_t* m_alive_counter;
uint32_t m_n_rays_initialized = 0;
tcnn::GPUMemoryArena::Allocation m_scratch_alloc;
};
class FiniteDifferenceNormalsApproximator {
public:
void enlarge(uint32_t n_elements, cudaStream_t stream);
void normal(uint32_t n_elements, const distance_fun_t& distance_function, const Eigen::Vector3f* pos, Eigen::Vector3f* normal, float epsilon, cudaStream_t stream);
private:
Eigen::Vector3f* dx;
Eigen::Vector3f* dy;
Eigen::Vector3f* dz;
float* dist_dx_pos;
float* dist_dy_pos;
float* dist_dz_pos;
float* dist_dx_neg;
float* dist_dy_neg;
float* dist_dz_neg;
tcnn::GPUMemoryArena::Allocation m_scratch_alloc;
};
struct LevelStats {
float mean() { return count ? (x / (float)count) : 0.f; }
float variance() { return count ? (xsquared - (x * x) / (float)count) / (float)count : 0.f; }
float sigma() { return sqrtf(variance()); }
float fraczero() { return (float)numzero / float(count + numzero); }
float fracquant() { return (float)numquant / float(count); }
float x;
float xsquared;
float min;
float max;
int numzero;
int numquant;
int count;
};
// Due to mixed-precision training, small loss values can lead to
// underflow (round to zero) in the gradient computations. Hence,
// scale the loss (and thereby gradients) up by this factor and
// divide it out in the optimizer later on.
static constexpr float LOSS_SCALE = 128.0f;
struct NetworkDims {
uint32_t n_input;
uint32_t n_output;
uint32_t n_pos;
};
NetworkDims network_dims_volume() const;
NetworkDims network_dims_sdf() const;
NetworkDims network_dims_image() const;
NetworkDims network_dims_nerf() const;
NetworkDims network_dims() const;
void train_volume(size_t target_batch_size, bool get_loss_scalar, cudaStream_t stream);
void training_prep_volume(uint32_t batch_size, cudaStream_t stream) {}
void load_volume(const fs::path& data_path);
class CudaDevice;
const float* get_inference_extra_dims(cudaStream_t stream) const;
void render_nerf(
cudaStream_t stream,
const CudaRenderBufferView& render_buffer,
NerfNetwork<precision_t>& nerf_network,
const uint8_t* density_grid_bitfield,
const Eigen::Vector2f& focal_length,
const Eigen::Matrix<float, 3, 4>& camera_matrix0,
const Eigen::Matrix<float, 3, 4>& camera_matrix1,
const Eigen::Vector4f& rolling_shutter,
const Eigen::Vector2f& screen_center,
const Foveation& foveation,
int visualized_dimension
);
void render_sdf(
cudaStream_t stream,
const distance_fun_t& distance_function,
const normals_fun_t& normals_function,
const CudaRenderBufferView& render_buffer,
const Eigen::Vector2f& focal_length,
const Eigen::Matrix<float, 3, 4>& camera_matrix,
const Eigen::Vector2f& screen_center,
const Foveation& foveation,
int visualized_dimension
);
void render_image(
cudaStream_t stream,
const CudaRenderBufferView& render_buffer,
const Eigen::Vector2f& focal_length,
const Eigen::Matrix<float, 3, 4>& camera_matrix,
const Eigen::Vector2f& screen_center,
const Foveation& foveation,
int visualized_dimension
);
void render_volume(
cudaStream_t stream,
const CudaRenderBufferView& render_buffer,
const Eigen::Vector2f& focal_length,
const Eigen::Matrix<float, 3, 4>& camera_matrix,
const Eigen::Vector2f& screen_center,
const Foveation& foveation
);
void render_frame(
cudaStream_t stream,
const Eigen::Matrix<float, 3, 4>& camera_matrix0,
const Eigen::Matrix<float, 3, 4>& camera_matrix1,
const Eigen::Matrix<float, 3, 4>& prev_camera_matrix,
const Eigen::Vector2f& screen_center,
const Eigen::Vector2f& relative_focal_length,
const Eigen::Vector4f& nerf_rolling_shutter,
const Foveation& foveation,
const Foveation& prev_foveation,
int visualized_dimension,
CudaRenderBuffer& render_buffer,
bool to_srgb = true,
CudaDevice* device = nullptr
);
void render_frame_main(
CudaDevice& device,
const Eigen::Matrix<float, 3, 4>& camera_matrix0,
const Eigen::Matrix<float, 3, 4>& camera_matrix1,
const Eigen::Vector2f& screen_center,
const Eigen::Vector2f& relative_focal_length,
const Eigen::Vector4f& nerf_rolling_shutter,
const Foveation& foveation,
int visualized_dimension
);
void render_frame_epilogue(
cudaStream_t stream,
const Eigen::Matrix<float, 3, 4>& camera_matrix0,
const Eigen::Matrix<float, 3, 4>& prev_camera_matrix,
const Eigen::Vector2f& screen_center,
const Eigen::Vector2f& relative_focal_length,
const Foveation& foveation,
const Foveation& prev_foveation,
CudaRenderBuffer& render_buffer,
bool to_srgb = true
);
void visualize_nerf_cameras(ImDrawList* list, const Eigen::Matrix<float, 4, 4>& world2proj);
fs::path find_network_config(const fs::path& network_config_path);
nlohmann::json load_network_config(const fs::path& network_config_path);
void reload_network_from_file(const fs::path& path = "");
void reload_network_from_json(const nlohmann::json& json, const std::string& config_base_path=""); // config_base_path is needed so that if the passed in json uses the 'parent' feature, we know where to look... be sure to use a filename, or if a directory, end with a trailing slash
void reset_accumulation(bool due_to_camera_movement = false, bool immediate_redraw = true);
void redraw_next_frame() {
m_render_skip_due_to_lack_of_camera_movement_counter = 0;
}
bool reprojection_available() { return m_dlss; }
static ELossType string_to_loss_type(const std::string& str);
void reset_network(bool clear_density_grid = true);
void create_empty_nerf_dataset(size_t n_images, int aabb_scale = 1, bool is_hdr = false);
void load_nerf(const fs::path& data_path);
void load_nerf_post();
void load_mesh(const fs::path& data_path);
void set_exposure(float exposure) { m_exposure = exposure; }
void set_max_level(float maxlevel);
void set_min_level(float minlevel);
void set_visualized_dim(int dim);
void set_visualized_layer(int layer);
void translate_camera(const Eigen::Vector3f& rel, const Eigen::Matrix3f& rot, bool allow_up_down = true);
Eigen::Matrix3f rotation_from_angles(const Eigen::Vector2f& angles) const;
void mouse_drag();
void mouse_wheel();
void load_file(const fs::path& path);
void set_nerf_camera_matrix(const Eigen::Matrix<float, 3, 4>& cam);
Eigen::Vector3f look_at() const;
void set_look_at(const Eigen::Vector3f& pos);
float scale() const { return m_scale; }
void set_scale(float scale);
Eigen::Vector3f view_pos() const { return m_camera.col(3); }
Eigen::Vector3f view_dir() const { return m_camera.col(2); }
Eigen::Vector3f view_up() const { return m_camera.col(1); }
Eigen::Vector3f view_side() const { return m_camera.col(0); }
void set_view_dir(const Eigen::Vector3f& dir);
void first_training_view();
void last_training_view();
void previous_training_view();
void next_training_view();
void set_camera_to_training_view(int trainview);
void reset_camera();
bool keyboard_event();
void generate_training_samples_sdf(Eigen::Vector3f* positions, float* distances, uint32_t n_to_generate, cudaStream_t stream, bool uniform_only);
void update_density_grid_nerf(float decay, uint32_t n_uniform_density_grid_samples, uint32_t n_nonuniform_density_grid_samples, cudaStream_t stream);
void update_density_grid_mean_and_bitfield(cudaStream_t stream);
void mark_density_grid_in_sphere_empty(const Eigen::Vector3f& pos, float radius, cudaStream_t stream);
struct NerfCounters {
tcnn::GPUMemory<uint32_t> numsteps_counter; // number of steps each ray took
tcnn::GPUMemory<uint32_t> numsteps_counter_compacted; // number of steps each ray took
tcnn::GPUMemory<float> loss;
uint32_t rays_per_batch = 1<<12;
uint32_t n_rays_total = 0;
uint32_t measured_batch_size = 0;
uint32_t measured_batch_size_before_compaction = 0;
void prepare_for_training_steps(cudaStream_t stream);
float update_after_training(uint32_t target_batch_size, bool get_loss_scalar, cudaStream_t stream);
};
void train_nerf(uint32_t target_batch_size, bool get_loss_scalar, cudaStream_t stream);
void train_nerf_step(uint32_t target_batch_size, NerfCounters& counters, cudaStream_t stream);
void train_sdf(size_t target_batch_size, bool get_loss_scalar, cudaStream_t stream);
void train_image(size_t target_batch_size, bool get_loss_scalar, cudaStream_t stream);
void set_train(bool mtrain);
template <typename T>
void dump_parameters_as_images(const T* params, const std::string& filename_base);
void prepare_next_camera_path_frame();
void imgui();
void training_prep_nerf(uint32_t batch_size, cudaStream_t stream);
void training_prep_sdf(uint32_t batch_size, cudaStream_t stream);
void training_prep_image(uint32_t batch_size, cudaStream_t stream) {}
void train(uint32_t batch_size);
Eigen::Vector2f calc_focal_length(const Eigen::Vector2i& resolution, const Eigen::Vector2f& relative_focal_length, int fov_axis, float zoom) const;
Eigen::Vector2f render_screen_center(const Eigen::Vector2f& screen_center) const;
void optimise_mesh_step(uint32_t N_STEPS);
void compute_mesh_vertex_colors();
tcnn::GPUMemory<float> get_density_on_grid(Eigen::Vector3i res3d, const BoundingBox& aabb, const Eigen::Matrix3f& render_aabb_to_local); // network version (nerf or sdf)
tcnn::GPUMemory<float> get_sdf_gt_on_grid(Eigen::Vector3i res3d, const BoundingBox& aabb, const Eigen::Matrix3f& render_aabb_to_local); // sdf gt version (sdf only)
tcnn::GPUMemory<Eigen::Array4f> get_rgba_on_grid(Eigen::Vector3i res3d, Eigen::Vector3f ray_dir, bool voxel_centers, float depth, bool density_as_alpha = false);
int marching_cubes(Eigen::Vector3i res3d, const BoundingBox& render_aabb, const Eigen::Matrix3f& render_aabb_to_local, float thresh);
float get_depth_from_renderbuffer(const CudaRenderBuffer& render_buffer, const Eigen::Vector2f& uv);
Eigen::Vector3f get_3d_pos_from_pixel(const CudaRenderBuffer& render_buffer, const Eigen::Vector2i& focus_pixel);
void autofocus();
size_t n_params();
size_t first_encoder_param();
size_t n_encoding_params();
#ifdef NGP_PYTHON
pybind11::dict compute_marching_cubes_mesh(Eigen::Vector3i res3d = Eigen::Vector3i::Constant(128), BoundingBox aabb = BoundingBox{Eigen::Vector3f::Zero(), Eigen::Vector3f::Ones()}, float thresh=2.5f);
pybind11::array_t<float> render_to_cpu(int width, int height, int spp, bool linear, float start_t, float end_t, float fps, float shutter_fraction);
pybind11::array_t<float> screenshot(bool linear) const;
void override_sdf_training_data(pybind11::array_t<float> points, pybind11::array_t<float> distances);
#endif
double calculate_iou(uint32_t n_samples=128*1024*1024, float scale_existing_results_factor=0.0, bool blocking=true, bool force_use_octree = true);
void draw_visualizations(ImDrawList* list, const Eigen::Matrix<float, 3, 4>& camera_matrix);
void train_and_render(bool skip_rendering);
fs::path training_data_path() const;
void init_window(int resw, int resh, bool hidden = false, bool second_window = false);
void destroy_window();
void init_vr();
void update_vr_performance_settings();
void apply_camera_smoothing(float elapsed_ms);
int find_best_training_view(int default_view);
bool begin_frame();
void handle_user_input();
Eigen::Vector3f vr_to_world(const Eigen::Vector3f& pos) const;
void begin_vr_frame_and_handle_vr_input();
void gather_histograms();
void draw_gui();
bool frame();
bool want_repl();
void load_image(const fs::path& data_path);
void load_exr_image(const fs::path& data_path);
void load_stbi_image(const fs::path& data_path);
void load_binary_image(const fs::path& data_path);
uint32_t n_dimensions_to_visualize() const;
float fov() const ;
void set_fov(float val) ;
Eigen::Vector2f fov_xy() const ;
void set_fov_xy(const Eigen::Vector2f& val);
void save_snapshot(const fs::path& path, bool include_optimizer_state, bool compress);
void load_snapshot(const fs::path& path);
CameraKeyframe copy_camera_to_keyframe() const;
void set_camera_from_keyframe(const CameraKeyframe& k);
void set_camera_from_time(float t);
void update_loss_graph();
void load_camera_path(const fs::path& path);
bool loop_animation();
void set_loop_animation(bool value);
float compute_image_mse(bool quantize_to_byte);
void compute_and_save_marching_cubes_mesh(const char* filename, Eigen::Vector3i res3d = Eigen::Vector3i::Constant(128), BoundingBox aabb = {}, float thresh = 2.5f, bool unwrap_it = false);
Eigen::Vector3i compute_and_save_png_slices(const char* filename, int res, BoundingBox aabb = {}, float thresh = 2.5f, float density_range = 4.f, bool flip_y_and_z_axes = false);
fs::path root_dir();
////////////////////////////////////////////////////////////////
// marching cubes related state
struct MeshState {
float thresh = 2.5f;
int res = 256;
bool unwrap = false;
float smooth_amount = 2048.f;
float density_amount = 128.f;
float inflate_amount = 1.f;
bool optimize_mesh = false;
tcnn::GPUMemory<Eigen::Vector3f> verts;
tcnn::GPUMemory<Eigen::Vector3f> vert_normals;
tcnn::GPUMemory<Eigen::Vector3f> vert_colors;
tcnn::GPUMemory<Eigen::Vector4f> verts_smoothed; // homogenous
tcnn::GPUMemory<uint32_t> indices;
tcnn::GPUMemory<Eigen::Vector3f> verts_gradient;
std::shared_ptr<TrainableBuffer<3, 1, float>> trainable_verts;
std::shared_ptr<tcnn::Optimizer<float>> verts_optimizer;
void clear() {
indices={};
verts={};
vert_normals={};
vert_colors={};
verts_smoothed={};
verts_gradient={};
trainable_verts=nullptr;
verts_optimizer=nullptr;
}
};
MeshState m_mesh;
bool m_want_repl = false;
bool m_render_window = false;
bool m_gather_histograms = false;
bool m_include_optimizer_state_in_snapshot = false;
bool m_compress_snapshot = true;
bool m_render_ground_truth = false;
EGroundTruthRenderMode m_ground_truth_render_mode = EGroundTruthRenderMode::Shade;
float m_ground_truth_alpha = 1.0f;
bool m_train = false;
bool m_training_data_available = false;
bool m_render = true;
int m_max_spp = 0;
ETestbedMode m_testbed_mode = ETestbedMode::None;
bool m_max_level_rand_training = false;
// Rendering stuff
Eigen::Vector2i m_window_res = Eigen::Vector2i::Constant(0);
bool m_dynamic_res = true;
float m_dynamic_res_target_fps = 20.0f;
int m_fixed_res_factor = 8;
float m_scale = 1.0;
float m_aperture_size = 0.0f;
Eigen::Vector2f m_relative_focal_length = Eigen::Vector2f::Ones();
uint32_t m_fov_axis = 1;
float m_zoom = 1.f; // 2d zoom factor (for insets?)
Eigen::Vector2f m_screen_center = Eigen::Vector2f::Constant(0.5f); // center of 2d zoom
float m_ndc_znear = 1.0f / 32.0f;
float m_ndc_zfar = 128.0f;
Eigen::Matrix<float, 3, 4> m_camera = Eigen::Matrix<float, 3, 4>::Zero();
Eigen::Matrix<float, 3, 4> m_smoothed_camera = Eigen::Matrix<float, 3, 4>::Zero();
size_t m_render_skip_due_to_lack_of_camera_movement_counter = 0;
bool m_fps_camera = false;
bool m_camera_smoothing = false;
bool m_autofocus = false;
Eigen::Vector3f m_autofocus_target = Eigen::Vector3f::Constant(0.5f);
CameraPath m_camera_path = {};
Eigen::Vector3f m_up_dir = {0.0f, 1.0f, 0.0f};
Eigen::Vector3f m_sun_dir = Eigen::Vector3f::Ones().normalized();
float m_bounding_radius = 1;
float m_exposure = 0.f;
ERenderMode m_render_mode = ERenderMode::Shade;
EMeshRenderMode m_mesh_render_mode = EMeshRenderMode::VertexNormals;
uint32_t m_seed = 1337;
#ifdef NGP_GUI
GLFWwindow* m_glfw_window = nullptr;
struct SecondWindow {
GLFWwindow* window = nullptr;
GLuint program = 0;
GLuint vao = 0, vbo = 0;
void draw(GLuint texture);
} m_second_window;
float m_drag_depth = 1.0f;
// The VAO will be empty, but we need a valid one for attribute-less rendering
GLuint m_blit_vao = 0;
GLuint m_blit_program = 0;
void init_opengl_shaders();
void blit_texture(const Foveation& foveation, GLint rgba_texture, GLint rgba_filter_mode, GLint depth_texture, GLint framebuffer, const Eigen::Vector2i& offset, const Eigen::Vector2i& resolution);
void create_second_window();
std::unique_ptr<OpenXRHMD> m_hmd;
OpenXRHMD::FrameInfoPtr m_vr_frame_info;
bool m_vr_use_depth_reproject = false;
bool m_vr_use_hidden_area_mask = true;
void set_n_views(size_t n_views);
std::function<bool()> m_keyboard_event_callback;
std::shared_ptr<GLTexture> m_pip_render_texture;
std::vector<std::shared_ptr<GLTexture>> m_rgba_render_textures;
std::vector<std::shared_ptr<GLTexture>> m_depth_render_textures;
#endif
std::unique_ptr<CudaRenderBuffer> m_pip_render_buffer;
SharedQueue<std::unique_ptr<ICallable>> m_task_queue;
void redraw_gui_next_frame() {
m_gui_redraw = true;
}
bool m_gui_redraw = true;
struct Nerf {
struct Training {
NerfDataset dataset;
int n_images_for_training = 0; // how many images to train from, as a high watermark compared to the dataset size
int n_images_for_training_prev = 0; // how many images we saw last time we updated the density grid
struct ErrorMap {
tcnn::GPUMemory<float> data;
tcnn::GPUMemory<float> cdf_x_cond_y;
tcnn::GPUMemory<float> cdf_y;
tcnn::GPUMemory<float> cdf_img;
std::vector<float> pmf_img_cpu;
Eigen::Vector2i resolution = {16, 16};
Eigen::Vector2i cdf_resolution = {16, 16};
bool is_cdf_valid = false;
} error_map;
std::vector<TrainingXForm> transforms;
tcnn::GPUMemory<TrainingXForm> transforms_gpu;
std::vector<Eigen::Vector3f> cam_pos_gradient;
tcnn::GPUMemory<Eigen::Vector3f> cam_pos_gradient_gpu;
std::vector<Eigen::Vector3f> cam_rot_gradient;
tcnn::GPUMemory<Eigen::Vector3f> cam_rot_gradient_gpu;
tcnn::GPUMemory<Eigen::Array3f> cam_exposure_gpu;
std::vector<Eigen::Array3f> cam_exposure_gradient;
tcnn::GPUMemory<Eigen::Array3f> cam_exposure_gradient_gpu;
Eigen::Vector2f cam_focal_length_gradient = Eigen::Vector2f::Zero();
tcnn::GPUMemory<Eigen::Vector2f> cam_focal_length_gradient_gpu;
std::vector<AdamOptimizer<Eigen::Array3f>> cam_exposure;
std::vector<AdamOptimizer<Eigen::Vector3f>> cam_pos_offset;
std::vector<RotationAdamOptimizer> cam_rot_offset;
AdamOptimizer<Eigen::Vector2f> cam_focal_length_offset = AdamOptimizer<Eigen::Vector2f>(0.f);
tcnn::GPUMemory<float> extra_dims_gpu; // if the model demands a latent code per training image, we put them in here.
tcnn::GPUMemory<float> extra_dims_gradient_gpu;
std::vector<AdamOptimizer<Eigen::ArrayXf>> extra_dims_opt;
void reset_extra_dims(default_rng_t &rng);
float extrinsic_l2_reg = 1e-4f;
float extrinsic_learning_rate = 1e-3f;
float intrinsic_l2_reg = 1e-4f;
float exposure_l2_reg = 0.0f;
NerfCounters counters_rgb;
bool random_bg_color = true;
bool grey_loss = false;
bool linear_colors = false;
ELossType loss_type = ELossType::L2;
ELossType depth_loss_type = ELossType::L1;
bool snap_to_pixel_centers = true;
bool train_envmap = false;
bool optimize_distortion = false;
bool optimize_extrinsics = false;
bool optimize_extra_dims = false;
bool optimize_focal_length = false;
bool optimize_exposure = false;
bool render_error_overlay = false;
float error_overlay_brightness = 0.125f;
uint32_t n_steps_between_cam_updates = 16;
uint32_t n_steps_since_cam_update = 0;
bool sample_focal_plane_proportional_to_error = false;
bool sample_image_proportional_to_error = false;
bool include_sharpness_in_error = false;
uint32_t n_steps_between_error_map_updates = 128;
uint32_t n_steps_since_error_map_update = 0;
uint32_t n_rays_since_error_map_update = 0;
float near_distance = 0.1f;
float density_grid_decay = 0.95f;
default_rng_t density_grid_rng;
int view = 0;
float depth_supervision_lambda = 0.f;
tcnn::GPUMemory<float> sharpness_grid;
void set_camera_intrinsics(int frame_idx, float fx, float fy = 0.0f, float cx = -0.5f, float cy = -0.5f, float k1 = 0.0f, float k2 = 0.0f, float p1 = 0.0f, float p2 = 0.0f, float k3 = 0.0f, float k4 = 0.0f, bool is_fisheye = false);
void set_camera_extrinsics_rolling_shutter(int frame_idx, Eigen::Matrix<float, 3, 4> camera_to_world_start, Eigen::Matrix<float, 3, 4> camera_to_world_end, const Eigen::Vector4f& rolling_shutter, bool convert_to_ngp = true);
void set_camera_extrinsics(int frame_idx, Eigen::Matrix<float, 3, 4> camera_to_world, bool convert_to_ngp = true);
Eigen::Matrix<float, 3, 4> get_camera_extrinsics(int frame_idx);
void update_transforms(int first = 0, int last = -1);
#ifdef NGP_PYTHON
void set_image(int frame_idx, pybind11::array_t<float> img, pybind11::array_t<float> depth_img, float depth_scale);
#endif
void reset_camera_extrinsics();
void export_camera_extrinsics(const fs::path& path, bool export_extrinsics_in_quat_format = true);
} training = {};
tcnn::GPUMemory<float> density_grid; // NERF_GRIDSIZE()^3 grid of EMA smoothed densities from the network
tcnn::GPUMemory<uint8_t> density_grid_bitfield;
uint8_t* get_density_grid_bitfield_mip(uint32_t mip);
tcnn::GPUMemory<float> density_grid_mean;
uint32_t density_grid_ema_step = 0;
uint32_t max_cascade = 0;
ENerfActivation rgb_activation = ENerfActivation::Exponential;
ENerfActivation density_activation = ENerfActivation::Exponential;
Eigen::Vector3f light_dir = Eigen::Vector3f::Constant(0.5f);
uint32_t extra_dim_idx_for_inference = 0; // which training image's latent code should be presented at inference time
int show_accel = -1;
float sharpen = 0.f;
float cone_angle_constant = 1.f/256.f;
bool visualize_cameras = false;
bool render_with_lens_distortion = false;
Lens render_lens = {};
float render_min_transmittance = 0.01f;
bool render_no_attenuation = false;
bool train_no_attenuation = false;
float glow_y_cutoff = 0.f;
int glow_mode = 0;
} m_nerf;
struct Sdf {
float shadow_sharpness = 2048.0f;
float maximum_distance = 0.00005f;
float fd_normals_epsilon = 0.0005f;
ESDFGroundTruthMode groundtruth_mode = ESDFGroundTruthMode::RaytracedMesh;
BRDFParams brdf;
// Mesh data
EMeshSdfMode mesh_sdf_mode = EMeshSdfMode::Raystab;
float mesh_scale;
tcnn::GPUMemory<Triangle> triangles_gpu;
std::vector<Triangle> triangles_cpu;
std::vector<float> triangle_weights;
DiscreteDistribution triangle_distribution;
tcnn::GPUMemory<float> triangle_cdf;
std::shared_ptr<TriangleBvh> triangle_bvh; // unique_ptr
bool uses_takikawa_encoding = false;
bool use_triangle_octree = false;
int octree_depth_target = 0; // we duplicate this state so that you can waggle the slider without triggering it immediately
std::shared_ptr<TriangleOctree> triangle_octree;
tcnn::GPUMemory<float> brick_data;
uint32_t brick_res = 0;
uint32_t brick_level = 10;
uint32_t brick_quantise_bits = 0;
bool brick_smooth_normals = false; // if true, then we space the central difference taps by one voxel
bool analytic_normals = false;
float zero_offset = 0;
float distance_scale = 0.95f;
double iou = 0.0;
float iou_decay = 0.0f;
bool calculate_iou_online = false;
tcnn::GPUMemory<uint32_t> iou_counter;
struct Training {
size_t idx = 0;
size_t size = 0;
size_t max_size = 1 << 24;
bool did_generate_more_training_data = false;
bool generate_sdf_data_online = true;
float surface_offset_scale = 1.0f;
tcnn::GPUMemory<Eigen::Vector3f> positions;
tcnn::GPUMemory<Eigen::Vector3f> positions_shuffled;
tcnn::GPUMemory<float> distances;
tcnn::GPUMemory<float> distances_shuffled;
tcnn::GPUMemory<Eigen::Vector3f> perturbations;
} training = {};
} m_sdf;
enum EDataType {
Float,
Half,
};
struct Image {
tcnn::GPUMemory<char> data;
EDataType type = EDataType::Float;
Eigen::Vector2i resolution = Eigen::Vector2i::Constant(0.0f);
tcnn::GPUMemory<Eigen::Vector2f> render_coords;
tcnn::GPUMemory<Eigen::Array3f> render_out;
struct Training {
tcnn::GPUMemory<float> positions_tmp;
tcnn::GPUMemory<Eigen::Vector2f> positions;
tcnn::GPUMemory<Eigen::Array3f> targets;
bool snap_to_pixel_centers = true;
bool linear_colors = false;
} training = {};
ERandomMode random_mode = ERandomMode::Stratified;
} m_image;
struct VolPayload {
Eigen::Vector3f dir;
Eigen::Array4f col;
uint32_t pixidx;
};
struct Volume {
float albedo = 0.95f;
float scattering = 0.f;
float inv_distance_scale = 100.f;
tcnn::GPUMemory<char> nanovdb_grid;
tcnn::GPUMemory<uint8_t> bitgrid;
float global_majorant = 1.f;
Eigen::Vector3f world2index_offset = {0, 0, 0};
float world2index_scale = 1.f;
struct Training {
tcnn::GPUMemory<Eigen::Vector3f> positions = {};
tcnn::GPUMemory<Eigen::Array4f> targets = {};
} training = {};
// tracing state
tcnn::GPUMemory<Eigen::Vector3f> pos[2] = {};
tcnn::GPUMemory<VolPayload> payload[2] = {};
tcnn::GPUMemory<uint32_t> hit_counter = {};
tcnn::GPUMemory<Eigen::Array4f> radiance_and_density;
} m_volume;
float m_camera_velocity = 1.0f;
EColorSpace m_color_space = EColorSpace::Linear;
ETonemapCurve m_tonemap_curve = ETonemapCurve::Identity;
bool m_dlss = false;
std::shared_ptr<IDlssProvider> m_dlss_provider;
float m_dlss_sharpening = 0.0f;
// 3D stuff
float m_render_near_distance = 0.0f;
float m_slice_plane_z = 0.0f;
bool m_floor_enable = false;
inline float get_floor_y() const { return m_floor_enable ? m_aabb.min.y() + 0.001f : -10000.f; }
BoundingBox m_raw_aabb;
BoundingBox m_aabb;
BoundingBox m_render_aabb;
Eigen::Matrix3f m_render_aabb_to_local;
Eigen::Matrix<float, 3, 4> crop_box(bool nerf_space) const;
std::vector<Eigen::Vector3f> crop_box_corners(bool nerf_space) const;
void set_crop_box(Eigen::Matrix<float, 3, 4> m, bool nerf_space);
// Rendering/UI bookkeeping
Ema m_training_prep_ms = {EEmaType::Time, 100};
Ema m_training_ms = {EEmaType::Time, 100};
Ema m_render_ms = {EEmaType::Time, 100};
// The frame contains everything, i.e. training + rendering + GUI and buffer swapping
Ema m_frame_ms = {EEmaType::Time, 100};
std::chrono::time_point<std::chrono::steady_clock> m_last_frame_time_point;
std::chrono::time_point<std::chrono::steady_clock> m_last_gui_draw_time_point;
std::chrono::time_point<std::chrono::steady_clock> m_training_start_time_point;
Eigen::Array4f m_background_color = {0.0f, 0.0f, 0.0f, 1.0f};
bool m_vsync = false;
bool m_render_transparency_as_checkerboard = false;
// Visualization of neuron activations
int m_visualized_dimension = -1;
int m_visualized_layer = 0;
struct View {
std::shared_ptr<CudaRenderBuffer> render_buffer;
Eigen::Vector2i full_resolution = {1, 1};
int visualized_dimension = 0;
Eigen::Matrix<float, 3, 4> camera0 = Eigen::Matrix<float, 3, 4>::Zero();
Eigen::Matrix<float, 3, 4> camera1 = Eigen::Matrix<float, 3, 4>::Zero();
Eigen::Matrix<float, 3, 4> prev_camera = Eigen::Matrix<float, 3, 4>::Zero();
Foveation foveation;
Foveation prev_foveation;
Eigen::Vector2f relative_focal_length;
Eigen::Vector2f screen_center;
CudaDevice* device = nullptr;
};
std::vector<View> m_views;
Eigen::Vector2i m_n_views = {1, 1};
bool m_single_view = true;
float m_picture_in_picture_res = 0.f; // if non zero, requests a small second picture :)
struct ImGuiVars {
static const uint32_t MAX_PATH_LEN = 1024;
bool enabled = true; // tab to toggle
char cam_path_path[MAX_PATH_LEN] = "cam.json";
char extrinsics_path[MAX_PATH_LEN] = "extrinsics.json";
char mesh_path[MAX_PATH_LEN] = "base.obj";
char snapshot_path[MAX_PATH_LEN] = "base.ingp";
char video_path[MAX_PATH_LEN] = "video.mp4";
} m_imgui;
fs::path m_root_dir = "";
bool m_visualize_unit_cube = false;
bool m_edit_render_aabb = false;
bool m_edit_world_transform = true;
bool m_snap_to_pixel_centers = false;
Eigen::Vector3f m_parallax_shift = {0.0f, 0.0f, 0.0f}; // to shift the viewer's origin by some amount in camera space
// CUDA stuff
tcnn::StreamAndEvent m_stream;
// Hashgrid encoding analysis
float m_quant_percent = 0.f;
std::vector<LevelStats> m_level_stats;
std::vector<LevelStats> m_first_layer_column_stats;
int m_num_levels = 0;
int m_histo_level = 0; // collect a histogram for this level
uint32_t m_base_grid_resolution;
float m_per_level_scale;
float m_histo[257] = {};
float m_histo_scale = 1.f;
uint32_t m_training_step = 0;
uint32_t m_training_batch_size = 1 << 18;
Ema m_loss_scalar = {EEmaType::Time, 100};
std::vector<float> m_loss_graph = std::vector<float>(256, 0.0f);
size_t m_loss_graph_samples = 0;
bool m_train_encoding = true;
bool m_train_network = true;
class CudaDevice {
public:
struct Data {
tcnn::GPUMemory<uint8_t> density_grid_bitfield;
uint8_t* density_grid_bitfield_ptr;
tcnn::GPUMemory<precision_t> params;
std::shared_ptr<Buffer2D<uint8_t>> hidden_area_mask;
};
CudaDevice(int id, bool is_primary) : m_id{id}, m_is_primary{is_primary} {
auto guard = device_guard();
m_stream = std::make_unique<tcnn::StreamAndEvent>();
m_data = std::make_unique<Data>();
m_render_worker = std::make_unique<ThreadPool>(is_primary ? 0u : 1u);
}
CudaDevice(const CudaDevice&) = delete;
CudaDevice& operator=(const CudaDevice&) = delete;
CudaDevice(CudaDevice&&) = default;
CudaDevice& operator=(CudaDevice&&) = default;
tcnn::ScopeGuard device_guard() {
int prev_device = tcnn::cuda_device();
if (prev_device == m_id) {
return {};
}
tcnn::set_cuda_device(m_id);
return tcnn::ScopeGuard{[prev_device]() {
tcnn::set_cuda_device(prev_device);
}};
}
int id() const {
return m_id;
}
bool is_primary() const {
return m_is_primary;
}
std::string name() const {
return tcnn::cuda_device_name(m_id);
}
int compute_capability() const {
return tcnn::cuda_compute_capability(m_id);
}
cudaStream_t stream() const {
return m_stream->get();
}
void wait_for(cudaStream_t stream) const {
CUDA_CHECK_THROW(cudaEventRecord(m_primary_device_event.event, stream));
m_stream->wait_for(m_primary_device_event.event);
}
void signal(cudaStream_t stream) const {
m_stream->signal(stream);
}
const CudaRenderBufferView& render_buffer_view() const {
return m_render_buffer_view;
}
void set_render_buffer_view(const CudaRenderBufferView& view) {
m_render_buffer_view = view;
}
Data& data() const {
return *m_data;
}
bool dirty() const {
return m_dirty;
}
void set_dirty(bool value) {
m_dirty = value;
}
void set_network(const std::shared_ptr<tcnn::Network<float, precision_t>>& network) {
m_network = network;
}
void set_nerf_network(const std::shared_ptr<NerfNetwork<precision_t>>& nerf_network);
const std::shared_ptr<tcnn::Network<float, precision_t>>& network() const {
return m_network;
}
const std::shared_ptr<NerfNetwork<precision_t>>& nerf_network() const {
return m_nerf_network;
}
void clear() {
m_data = std::make_unique<Data>();
m_render_buffer_view = {};
m_network = {};
m_nerf_network = {};
set_dirty(true);
}
template <class F>
auto enqueue_task(F&& f) -> std::future<std::result_of_t <F()>> {
if (is_primary()) {
return std::async(std::launch::deferred, std::forward<F>(f));
} else {
return m_render_worker->enqueue_task(std::forward<F>(f));
}
}
private:
int m_id;
bool m_is_primary;
std::unique_ptr<tcnn::StreamAndEvent> m_stream;
struct Event {
Event() {
CUDA_CHECK_THROW(cudaEventCreate(&event));
}
~Event() {
cudaEventDestroy(event);
}
Event(const Event&) = delete;
Event& operator=(const Event&) = delete;
Event(Event&& other) { *this = std::move(other); }
Event& operator=(Event&& other) {
std::swap(event, other.event);
return *this;
}
cudaEvent_t event = {};
};
Event m_primary_device_event;
std::unique_ptr<Data> m_data;
CudaRenderBufferView m_render_buffer_view = {};
std::shared_ptr<tcnn::Network<float, precision_t>> m_network;
std::shared_ptr<NerfNetwork<precision_t>> m_nerf_network;
bool m_dirty = true;
std::unique_ptr<ThreadPool> m_render_worker;
};
void sync_device(CudaRenderBuffer& render_buffer, CudaDevice& device);
tcnn::ScopeGuard use_device(cudaStream_t stream, CudaRenderBuffer& render_buffer, CudaDevice& device);
void set_all_devices_dirty();
std::vector<CudaDevice> m_devices;
CudaDevice& primary_device() {
return m_devices.front();
}
ThreadPool m_thread_pool;
std::vector<std::future<void>> m_render_futures;
bool m_use_aux_devices = false;
bool m_foveated_rendering = false;
float m_foveated_rendering_max_scaling = 2.0f;
fs::path m_data_path;
fs::path m_network_config_path = "base.json";
nlohmann::json m_network_config;
default_rng_t m_rng;
CudaRenderBuffer m_windowless_render_surface{std::make_shared<CudaSurface2D>()};
uint32_t network_width(uint32_t layer) const;
uint32_t network_num_forward_activations() const;
// Network & training stuff
std::shared_ptr<tcnn::Loss<precision_t>> m_loss;
std::shared_ptr<tcnn::Optimizer<precision_t>> m_optimizer;
std::shared_ptr<tcnn::Encoding<precision_t>> m_encoding;
std::shared_ptr<tcnn::Network<float, precision_t>> m_network;
std::shared_ptr<tcnn::Trainer<float, precision_t, precision_t>> m_trainer;
struct TrainableEnvmap {
std::shared_ptr<tcnn::Optimizer<float>> optimizer;
std::shared_ptr<TrainableBuffer<4, 2, float>> envmap;
std::shared_ptr<tcnn::Trainer<float, float, float>> trainer;
Eigen::Vector2i resolution;
ELossType loss_type;
Buffer2DView<const Eigen::Array4f> inference_view() const {
if (!envmap) {
return {};
}
return {(const Eigen::Array4f*)envmap->inference_params(), resolution};
}
Buffer2DView<const Eigen::Array4f> view() const {
if (!envmap) {
return {};
}
return {(const Eigen::Array4f*)envmap->params(), resolution};
}
} m_envmap;
struct TrainableDistortionMap {
std::shared_ptr<tcnn::Optimizer<float>> optimizer;
std::shared_ptr<TrainableBuffer<2, 2, float>> map;
std::shared_ptr<tcnn::Trainer<float, float, float>> trainer;
Eigen::Vector2i resolution;
Buffer2DView<const Eigen::Vector2f> inference_view() const {
if (!map) {
return {};
}
return {(const Eigen::Vector2f*)map->inference_params(), resolution};
}
Buffer2DView<const Eigen::Vector2f> view() const {
if (!map) {
return {};
}
return {(const Eigen::Vector2f*)map->params(), resolution};
}
} m_distortion;
std::shared_ptr<NerfNetwork<precision_t>> m_nerf_network;
};
NGP_NAMESPACE_END