From 646543257059b750938c1ab535bf78348c9407e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Mon, 8 May 2023 11:48:17 +0200 Subject: [PATCH] Save cluster render shader from being optimized out entirely --- drivers/vulkan/rendering_device_vulkan.cpp | 3 +++ .../renderer_rd/cluster_builder_rd.cpp | 20 ++++++++++++++++--- .../renderer_rd/shaders/cluster_render.glsl | 18 +++++++++++++---- servers/rendering/rendering_device.h | 2 ++ 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 84b391d18ac..6e62f833a57 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -9380,6 +9380,9 @@ bool RenderingDeviceVulkan::has_feature(const Features p_feature) const { VulkanContext::VRSCapabilities vrs_capabilities = context->get_vrs_capabilities(); return vrs_capabilities.attachment_vrs_supported && context->get_physical_device_features().shaderStorageImageExtendedFormats; } break; + case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: { + return true; + } break; default: { return false; } diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp index 959a752fbab..d2a1a5ab9c3 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.cpp +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -47,15 +47,29 @@ ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() { } { + RD::FramebufferFormatID fb_format; + RD::PipelineColorBlendState blend_state; + String defines; + if (RD::get_singleton()->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) { + fb_format = RD::get_singleton()->framebuffer_format_create_empty(); + blend_state = RD::PipelineColorBlendState::create_disabled(); + } else { + Vector afs; + afs.push_back(RD::AttachmentFormat()); + afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + fb_format = RD::get_singleton()->framebuffer_format_create(afs); + defines = "\n#define USE_ATTACHMENT\n"; + } + Vector versions; versions.push_back(""); - cluster_render.cluster_render_shader.initialize(versions); + cluster_render.cluster_render_shader.initialize(versions, defines); cluster_render.shader_version = cluster_render.cluster_render_shader.version_create(); cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0); - cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0); + cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); RD::PipelineMultisampleState ms; ms.sample_count = RD::TEXTURE_SAMPLES_4; - cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0); + cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), blend_state, 0); } { Vector versions; diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl index 8c26a67926d..bfc98445c55 100644 --- a/servers/rendering/renderer_rd/shaders/cluster_render.glsl +++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl @@ -100,6 +100,10 @@ layout(set = 0, binding = 3, std430) buffer restrict ClusterRender { } cluster_render; +#ifdef USE_ATTACHMENT +layout(location = 0) out vec4 frag_color; +#endif + void main() { //convert from screen to cluster uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift; @@ -113,6 +117,8 @@ void main() { uint usage_write_offset = cluster_offset + (element_index >> 5); uint usage_write_bit = 1 << (element_index & 0x1F); + uint aux = 0; + #ifdef USE_SUBGROUPS uint cluster_thread_group_index; @@ -138,7 +144,7 @@ void main() { cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask); if (cluster_thread_group_index == 0) { - atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); } } #else @@ -147,7 +153,7 @@ void main() { if (!gl_HelperInvocation) #endif { - atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); + aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); } #endif //find the current element in the depth usage list and mark the current depth as used @@ -162,7 +168,7 @@ void main() { if (!gl_HelperInvocation) { z_write_bit = subgroupOr(z_write_bit); //merge all Zs if (cluster_thread_group_index == 0) { - atomicOr(cluster_render.data[z_write_offset], z_write_bit); + aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit); } } #else @@ -171,7 +177,11 @@ void main() { if (!gl_HelperInvocation) #endif { - atomicOr(cluster_render.data[z_write_offset], z_write_bit); + aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit); } #endif + +#ifdef USE_ATTACHMENT + frag_color = vec4(float(aux)); +#endif } diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index e55db0a2373..d2a29f61bd3 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -704,6 +704,8 @@ public: SUPPORTS_MULTIVIEW, SUPPORTS_FSR_HALF_FLOAT, SUPPORTS_ATTACHMENT_VRS, + // If not supported, a fragment shader with only side effets (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver. + SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS, }; virtual bool has_feature(const Features p_feature) const = 0;