From 60d8df3feefd07b955c4b083ad7ab27d90b49fa4 Mon Sep 17 00:00:00 2001 From: clayjohn Date: Sat, 5 Feb 2022 15:03:39 -0800 Subject: [PATCH] Optimize and fix backbuffer gaussian blur --- servers/rendering/renderer_rd/effects_rd.cpp | 20 +--- servers/rendering/renderer_rd/effects_rd.h | 2 +- .../renderer_rd/renderer_storage_rd.cpp | 43 +++------ .../renderer_rd/renderer_storage_rd.h | 7 +- .../rendering/renderer_rd/shaders/copy.glsl | 91 ++++++++----------- 5 files changed, 61 insertions(+), 102 deletions(-) diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index 25a366aa4b2..9f600f46931 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -482,12 +482,11 @@ void EffectsRD::set_color(RID p_dest_texture, const Color &p_color, const Rect2i RD::get_singleton()->compute_list_end(); } -void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst) { +void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, const Rect2i &p_region, bool p_8bit_dst) { ERR_FAIL_COND_MSG(prefer_raster_effects, "Can't use the compute version of the gaussian blur with the mobile renderer."); memset(©.push_constant, 0, sizeof(CopyPushConstant)); - uint32_t base_flags = 0; copy.push_constant.section[0] = p_region.position.x; copy.push_constant.section[1] = p_region.position.y; copy.push_constant.section[2] = p_region.size.width; @@ -497,23 +496,12 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_back_texture), 3); - - copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL; - RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - //VERTICAL - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_back_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_texture), 3); - copy.push_constant.flags = base_flags; RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); + RD::get_singleton()->compute_list_end(); } @@ -2344,8 +2332,8 @@ EffectsRD::EffectsRD(bool p_prefer_raster_effects) { Vector copy_modes; copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n"); copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define DST_IMAGE_8BIT\n"); - copy_modes.push_back("\n#define MODE_GAUSSIAN_GLOW\n"); - copy_modes.push_back("\n#define MODE_GAUSSIAN_GLOW\n#define GLOW_USE_AUTO_EXPOSURE\n"); + copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define MODE_GLOW\n"); + copy_modes.push_back("\n#define MODE_GAUSSIAN_BLUR\n#define MODE_GLOW\n#define GLOW_USE_AUTO_EXPOSURE\n"); copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n"); copy_modes.push_back("\n#define MODE_SIMPLE_COPY\n#define DST_IMAGE_8BIT\n"); copy_modes.push_back("\n#define MODE_SIMPLE_COPY_DEPTH\n"); diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h index 1a080756a85..f5e5b1ace7e 100644 --- a/servers/rendering/renderer_rd/effects_rd.h +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -899,7 +899,7 @@ public: void copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false); void copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far); void copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y = false, bool p_panorama = false); - void gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back_texture, const Rect2i &p_region, bool p_8bit_dst = false); + void gaussian_blur(RID p_source_rd_texture, RID p_texture, const Rect2i &p_region, bool p_8bit_dst = false); void set_color(RID p_dest_texture, const Color &p_color, const Rect2i &p_region, bool p_8bit_dst = false); void gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const Size2i &p_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_threshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0); void gaussian_glow_raster(RID p_source_rd_texture, RID p_framebuffer_half, RID p_rd_texture_half, RID p_dest_framebuffer, const Vector2 &p_pixel_size, float p_strength = 1.0, bool p_high_quality = false, bool p_first_pass = false, float p_luminance_cap = 16.0, float p_exposure = 1.0, float p_bloom = 0.0, float p_hdr_bleed_threshold = 1.0, float p_hdr_bleed_scale = 1.0, RID p_auto_exposure = RID(), float p_auto_exposure_grey = 1.0); diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp index 145c4f902e9..f345001539f 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -7516,10 +7516,6 @@ void RendererStorageRD::_clear_render_target(RenderTarget *rt) { if (rt->backbuffer.is_valid()) { RD::get_singleton()->free(rt->backbuffer); rt->backbuffer = RID(); - for (int i = 0; i < rt->backbuffer_mipmaps.size(); i++) { - //just erase copies, since the rest are erased by dependency - RD::get_singleton()->free(rt->backbuffer_mipmaps[i].mipmap_copy); - } rt->backbuffer_mipmaps.clear(); rt->backbuffer_uniform_set = RID(); //chain deleted } @@ -7636,7 +7632,9 @@ void RendererStorageRD::_create_render_target_backbuffer(RenderTarget *rt) { tf.mipmaps = mipmaps_required; rt->backbuffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(rt->backbuffer, "Render Target Back Buffer"); rt->backbuffer_mipmap0 = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, 0); + RD::get_singleton()->set_resource_name(rt->backbuffer_mipmap0, "Back Buffer slice mipmap 0"); { Vector fb_tex; @@ -7651,23 +7649,10 @@ void RendererStorageRD::_create_render_target_backbuffer(RenderTarget *rt) { } //create mipmaps for (uint32_t i = 1; i < mipmaps_required; i++) { - RenderTarget::BackbufferMipmap mm; - { - mm.mipmap = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, i); - } + RID mipmap = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rt->backbuffer, 0, i); + RD::get_singleton()->set_resource_name(mipmap, "Back Buffer slice mip: " + itos(i)); - { - Size2 mm_size = Image::get_image_mipmap_size(tf.width, tf.height, Image::FORMAT_RGBA8, i); - - RD::TextureFormat mmtf = tf; - mmtf.width = mm_size.width; - mmtf.height = mm_size.height; - mmtf.mipmaps = 1; - - mm.mipmap_copy = RD::get_singleton()->texture_create(mmtf, RD::TextureView()); - } - - rt->backbuffer_mipmaps.push_back(mm); + rt->backbuffer_mipmaps.push_back(mipmap); } } @@ -8111,7 +8096,7 @@ void RendererStorageRD::render_target_copy_to_back_buffer(RID p_render_target, c if (!p_gen_mipmaps) { return; } - + RD::get_singleton()->draw_command_begin_label("Gaussian Blur Mipmaps"); //then mipmap blur RID prev_texture = rt->color; //use color, not backbuffer, as bb has mipmaps. @@ -8121,10 +8106,11 @@ void RendererStorageRD::render_target_copy_to_back_buffer(RID p_render_target, c region.size.x = MAX(1, region.size.x >> 1); region.size.y = MAX(1, region.size.y >> 1); - const RenderTarget::BackbufferMipmap &mm = rt->backbuffer_mipmaps[i]; - effects->gaussian_blur(prev_texture, mm.mipmap, mm.mipmap_copy, region, true); - prev_texture = mm.mipmap; + RID mipmap = rt->backbuffer_mipmaps[i]; + effects->gaussian_blur(prev_texture, mipmap, region, true); + prev_texture = mipmap; } + RD::get_singleton()->draw_command_end_label(); } void RendererStorageRD::render_target_clear_back_buffer(RID p_render_target, const Rect2i &p_region, const Color &p_color) { @@ -8164,7 +8150,7 @@ void RendererStorageRD::render_target_gen_back_buffer_mipmaps(RID p_render_targe return; //nothing to do } } - + RD::get_singleton()->draw_command_begin_label("Gaussian Blur Mipmaps2"); //then mipmap blur RID prev_texture = rt->backbuffer_mipmap0; @@ -8174,10 +8160,11 @@ void RendererStorageRD::render_target_gen_back_buffer_mipmaps(RID p_render_targe region.size.x = MAX(1, region.size.x >> 1); region.size.y = MAX(1, region.size.y >> 1); - const RenderTarget::BackbufferMipmap &mm = rt->backbuffer_mipmaps[i]; - effects->gaussian_blur(prev_texture, mm.mipmap, mm.mipmap_copy, region, true); - prev_texture = mm.mipmap; + RID mipmap = rt->backbuffer_mipmaps[i]; + effects->gaussian_blur(prev_texture, mipmap, region, true); + prev_texture = mipmap; } + RD::get_singleton()->draw_command_end_label(); } RID RendererStorageRD::render_target_get_framebuffer_uniform_set(RID p_render_target) { diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h index 43bbcf65205..6febb71e93f 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.h +++ b/servers/rendering/renderer_rd/renderer_storage_rd.h @@ -1169,12 +1169,7 @@ private: RID backbuffer_fb; RID backbuffer_mipmap0; - struct BackbufferMipmap { - RID mipmap; - RID mipmap_copy; - }; - - Vector backbuffer_mipmaps; + Vector backbuffer_mipmaps; RID framebuffer_uniform_set; RID backbuffer_uniform_set; diff --git a/servers/rendering/renderer_rd/shaders/copy.glsl b/servers/rendering/renderer_rd/shaders/copy.glsl index d4d0ed0f568..ecf7bb98170 100644 --- a/servers/rendering/renderer_rd/shaders/copy.glsl +++ b/servers/rendering/renderer_rd/shaders/copy.glsl @@ -61,7 +61,7 @@ layout(rgba8, set = 3, binding = 0) uniform restrict writeonly image2D dest_buff layout(rgba32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer; #endif -#ifdef MODE_GAUSSIAN_GLOW +#ifdef MODE_GAUSSIAN_BLUR shared vec4 local_cache[256]; shared vec4 temp_cache[128]; #endif @@ -70,7 +70,7 @@ void main() { // Pixel being shaded ivec2 pos = ivec2(gl_GlobalInvocationID.xy); -#ifndef MODE_GAUSSIAN_GLOW // Glow needs the extra threads +#ifndef MODE_GAUSSIAN_BLUR // Gaussian blur needs the extra threads if (any(greaterThanEqual(pos, params.section.zw))) { //too large, do nothing return; } @@ -92,35 +92,11 @@ void main() { #ifdef MODE_GAUSSIAN_BLUR - //Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect - - if (bool(params.flags & FLAG_HORIZONTAL)) { - ivec2 base_pos = (pos + params.section.xy) << 1; - vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.214607; - color += texelFetch(source_color, base_pos + ivec2(1, 0), 0) * 0.189879; - color += texelFetch(source_color, base_pos + ivec2(2, 0), 0) * 0.131514; - color += texelFetch(source_color, base_pos + ivec2(3, 0), 0) * 0.071303; - color += texelFetch(source_color, base_pos + ivec2(-1, 0), 0) * 0.189879; - color += texelFetch(source_color, base_pos + ivec2(-2, 0), 0) * 0.131514; - color += texelFetch(source_color, base_pos + ivec2(-3, 0), 0) * 0.071303; - imageStore(dest_buffer, pos + params.target, color); - } else { - ivec2 base_pos = (pos + params.section.xy); - vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.38774; - color += texelFetch(source_color, base_pos + ivec2(0, 1), 0) * 0.24477; - color += texelFetch(source_color, base_pos + ivec2(0, 2), 0) * 0.06136; - color += texelFetch(source_color, base_pos + ivec2(0, -1), 0) * 0.24477; - color += texelFetch(source_color, base_pos + ivec2(0, -2), 0) * 0.06136; - imageStore(dest_buffer, pos + params.target, color); - } -#endif - -#ifdef MODE_GAUSSIAN_GLOW - // First pass copy texture into 16x16 local memory for every 8x8 thread block vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw)); uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * 16; +#ifdef MODE_GLOW if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) { vec2 quad_offset_uv = clamp((vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.0)) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw)); @@ -128,12 +104,15 @@ void main() { local_cache[dest_index + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.z, 0.0), 0)) * 0.5; local_cache[dest_index + 16] = (textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0) + textureLod(source_color, quad_offset_uv + vec2(0.0, 1.0 / params.section.w), 0)) * 0.5; local_cache[dest_index + 16 + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.zw), 0)) * 0.5; - } else { + } else +#endif + { local_cache[dest_index] = textureLod(source_color, quad_center_uv, 0); local_cache[dest_index + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0); local_cache[dest_index + 16] = textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0); local_cache[dest_index + 16 + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0); } +#ifdef MODE_GLOW if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) { // Tonemap initial samples to reduce weight of fireflies: https://graphicrants.blogspot.com/2013/12/tone-mapping.html local_cache[dest_index] /= 1.0 + dot(local_cache[dest_index].rgb, vec3(0.299, 0.587, 0.114)); @@ -141,29 +120,33 @@ void main() { local_cache[dest_index + 16] /= 1.0 + dot(local_cache[dest_index + 16].rgb, vec3(0.299, 0.587, 0.114)); local_cache[dest_index + 16 + 1] /= 1.0 + dot(local_cache[dest_index + 16 + 1].rgb, vec3(0.299, 0.587, 0.114)); } - + const float kernel[4] = { 0.174938, 0.165569, 0.140367, 0.106595 }; +#else + // Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect. + const float kernel[4] = { 0.214607, 0.189879, 0.131514, 0.071303 }; +#endif memoryBarrierShared(); barrier(); // Horizontal pass. Needs to copy into 8x16 chunk of local memory so vertical pass has full resolution uint read_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 32 + 4; vec4 color_top = vec4(0.0); - color_top += local_cache[read_index] * 0.174938; - color_top += local_cache[read_index + 1] * 0.165569; - color_top += local_cache[read_index + 2] * 0.140367; - color_top += local_cache[read_index + 3] * 0.106595; - color_top += local_cache[read_index - 1] * 0.165569; - color_top += local_cache[read_index - 2] * 0.140367; - color_top += local_cache[read_index - 3] * 0.106595; + color_top += local_cache[read_index] * kernel[0]; + color_top += local_cache[read_index + 1] * kernel[1]; + color_top += local_cache[read_index + 2] * kernel[2]; + color_top += local_cache[read_index + 3] * kernel[3]; + color_top += local_cache[read_index - 1] * kernel[1]; + color_top += local_cache[read_index - 2] * kernel[2]; + color_top += local_cache[read_index - 3] * kernel[3]; vec4 color_bottom = vec4(0.0); - color_bottom += local_cache[read_index + 16] * 0.174938; - color_bottom += local_cache[read_index + 1 + 16] * 0.165569; - color_bottom += local_cache[read_index + 2 + 16] * 0.140367; - color_bottom += local_cache[read_index + 3 + 16] * 0.106595; - color_bottom += local_cache[read_index - 1 + 16] * 0.165569; - color_bottom += local_cache[read_index - 2 + 16] * 0.140367; - color_bottom += local_cache[read_index - 3 + 16] * 0.106595; + color_bottom += local_cache[read_index + 16] * kernel[0]; + color_bottom += local_cache[read_index + 1 + 16] * kernel[1]; + color_bottom += local_cache[read_index + 2 + 16] * kernel[2]; + color_bottom += local_cache[read_index + 3 + 16] * kernel[3]; + color_bottom += local_cache[read_index - 1 + 16] * kernel[1]; + color_bottom += local_cache[read_index - 2 + 16] * kernel[2]; + color_bottom += local_cache[read_index - 3 + 16] * kernel[3]; // rotate samples to take advantage of cache coherency uint write_index = gl_LocalInvocationID.y * 2 + gl_LocalInvocationID.x * 16; @@ -174,18 +157,24 @@ void main() { memoryBarrierShared(); barrier(); + // If destination outside of texture, can stop doing work now + if (any(greaterThanEqual(pos, params.section.zw))) { + return; + } + // Vertical pass uint index = gl_LocalInvocationID.y + gl_LocalInvocationID.x * 16 + 4; vec4 color = vec4(0.0); - color += temp_cache[index] * 0.174938; - color += temp_cache[index + 1] * 0.165569; - color += temp_cache[index + 2] * 0.140367; - color += temp_cache[index + 3] * 0.106595; - color += temp_cache[index - 1] * 0.165569; - color += temp_cache[index - 2] * 0.140367; - color += temp_cache[index - 3] * 0.106595; + color += temp_cache[index] * kernel[0]; + color += temp_cache[index + 1] * kernel[1]; + color += temp_cache[index + 2] * kernel[2]; + color += temp_cache[index + 3] * kernel[3]; + color += temp_cache[index - 1] * kernel[1]; + color += temp_cache[index - 2] * kernel[2]; + color += temp_cache[index - 3] * kernel[3]; +#ifdef MODE_GLOW if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) { // Undo tonemap to restore range: https://graphicrants.blogspot.com/2013/12/tone-mapping.html color /= 1.0 - dot(color.rgb, vec3(0.299, 0.587, 0.114)); @@ -205,7 +194,7 @@ void main() { color = min(color * feedback, vec4(params.glow_luminance_cap)); } - +#endif imageStore(dest_buffer, pos + params.target, color); #endif