diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index ad63cd82047..a52fbc44c79 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -1222,7 +1222,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p if ((p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } - if ((p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT)) { + if ((p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && (p_format.usage_bits & TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT)) { // For VRS images we can't use the typeless format. resource_desc.Format = DXGI_FORMAT_R8_UINT; } @@ -1827,8 +1827,11 @@ static D3D12_BARRIER_ACCESS _rd_texture_layout_access_mask(RDD::TextureLayout p_ return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL: return D3D12_BARRIER_ACCESS_RESOLVE_DEST; - case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL: + case RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL: return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; + case RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL: + DEV_ASSERT(false && "Fragment density maps are not supported in D3D12."); + return D3D12_BARRIER_ACCESS_NO_ACCESS; default: return D3D12_BARRIER_ACCESS_NO_ACCESS; } @@ -1947,7 +1950,7 @@ static void _rd_stages_to_d3d12(BitField p_stages, D3D12 r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING; } - if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) { + if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT)) { r_sync |= D3D12_BARRIER_SYNC_PIXEL_SHADING; } @@ -2042,8 +2045,11 @@ static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::Text return D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE; case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL: return D3D12_BARRIER_LAYOUT_RESOLVE_DEST; - case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL: + case RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL: return D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE; + case RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL: + DEV_ASSERT(false && "Fragment density maps are not supported in D3D12."); + return D3D12_BARRIER_LAYOUT_UNDEFINED; default: DEV_ASSERT(false && "Unknown texture layout."); return D3D12_BARRIER_LAYOUT_UNDEFINED; @@ -2451,7 +2457,7 @@ RDD::SwapChainID RenderingDeviceDriverD3D12::swap_chain_create(RenderingContextD color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT); subpass.color_references.push_back(color_ref); - RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1); + RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1, AttachmentReference()); ERR_FAIL_COND_V(!render_pass, SwapChainID()); // Create the empty swap chain until it is resized. @@ -2811,8 +2817,8 @@ RDD::FramebufferID RenderingDeviceDriverD3D12::_framebuffer_create(RenderPassID uint32_t vrs_index = UINT32_MAX; for (const Subpass &E : pass_info->subpasses) { - if (E.vrs_reference.attachment != AttachmentReference::UNUSED) { - vrs_index = E.vrs_reference.attachment; + if (E.fragment_shading_rate_reference.attachment != AttachmentReference::UNUSED) { + vrs_index = E.fragment_shading_rate_reference.attachment; } } @@ -4963,7 +4969,9 @@ Vector RenderingDeviceDriverD3D12::pipeline_cache_serialize() { // ----- SUBPASS ----- -RDD::RenderPassID RenderingDeviceDriverD3D12::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) { +RDD::RenderPassID RenderingDeviceDriverD3D12::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) { + ERR_FAIL_COND_V_MSG(p_fragment_density_map_attachment.attachment != AttachmentReference::UNUSED, RenderPassID(), "Fragment density maps are not supported in D3D12."); + // Pre-bookkeep. RenderPassInfo *pass_info = VersatileResource::allocate(resources_allocator); @@ -5064,7 +5072,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd } } - if (fb_info->vrs_attachment && vrs_capabilities.ss_image_supported) { + if (fb_info->vrs_attachment && fsr_capabilities.attachment_supported) { ComPtr cmd_list_5; cmd_buf_info->cmd_list->QueryInterface(cmd_list_5.GetAddressOf()); if (cmd_list_5) { @@ -5184,7 +5192,7 @@ void RenderingDeviceDriverD3D12::command_end_render_pass(CommandBufferID p_cmd_b const FramebufferInfo *fb_info = cmd_buf_info->render_pass_state.fb_info; const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info; - if (vrs_capabilities.ss_image_supported) { + if (fsr_capabilities.attachment_supported) { ComPtr cmd_list_5; cmd_buf_info->cmd_list->QueryInterface(cmd_list_5.GetAddressOf()); if (cmd_list_5) { @@ -6244,12 +6252,6 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { return subgroup_capabilities.supported_stages_flags_rd(); case LIMIT_SUBGROUP_OPERATIONS: return subgroup_capabilities.supported_operations_flags_rd(); - case LIMIT_VRS_TEXEL_WIDTH: - case LIMIT_VRS_TEXEL_HEIGHT: - return vrs_capabilities.ss_image_tile_size; - case LIMIT_VRS_MAX_FRAGMENT_WIDTH: - case LIMIT_VRS_MAX_FRAGMENT_HEIGHT: - return vrs_capabilities.ss_max_fragment_size; case LIMIT_MAX_SHADER_VARYINGS: return MIN(D3D12_VS_OUTPUT_REGISTER_COUNT, D3D12_PS_INPUT_REGISTER_COUNT); default: { @@ -6286,12 +6288,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_MULTIVIEW: - return multiview_capabilities.is_supported && multiview_capabilities.max_view_count > 1; case SUPPORTS_FSR_HALF_FLOAT: return shader_capabilities.native_16bit_ops && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; - case SUPPORTS_ATTACHMENT_VRS: - return vrs_capabilities.ss_image_supported; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; case SUPPORTS_BUFFER_DEVICE_ADDRESS: @@ -6305,6 +6303,14 @@ const RDD::MultiviewCapabilities &RenderingDeviceDriverD3D12::get_multiview_capa return multiview_capabilities; } +const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverD3D12::get_fragment_shading_rate_capabilities() { + return fsr_capabilities; +} + +const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverD3D12::get_fragment_density_map_capabilities() { + return fdm_capabilities; +} + String RenderingDeviceDriverD3D12::get_api_name() const { return "D3D12"; } @@ -6466,12 +6472,6 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { device_capabilities.version_minor = feature_level % 10; // Assume not supported until proven otherwise. - vrs_capabilities.draw_call_supported = false; - vrs_capabilities.primitive_supported = false; - vrs_capabilities.primitive_in_multiviewport = false; - vrs_capabilities.ss_image_supported = false; - vrs_capabilities.ss_image_tile_size = 1; - vrs_capabilities.additional_rates_supported = false; multiview_capabilities.is_supported = false; multiview_capabilities.geometry_shader_is_supported = false; multiview_capabilities.tessellation_shader_is_supported = false; @@ -6562,14 +6562,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6)); if (SUCCEEDED(res)) { if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_1) { - vrs_capabilities.draw_call_supported = true; + fsr_capabilities.pipeline_supported = true; if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_2) { - vrs_capabilities.primitive_supported = true; - vrs_capabilities.primitive_in_multiviewport = options6.PerPrimitiveShadingRateSupportedWithViewportIndexing; - vrs_capabilities.ss_image_supported = true; - vrs_capabilities.ss_image_tile_size = options6.ShadingRateImageTileSize; - vrs_capabilities.ss_max_fragment_size = 8; // TODO figure out if this is supplied and/or needed - vrs_capabilities.additional_rates_supported = options6.AdditionalShadingRatesSupported; + fsr_capabilities.primitive_supported = true; + fsr_capabilities.attachment_supported = true; + fsr_capabilities.min_texel_size = Size2i(options6.ShadingRateImageTileSize, options6.ShadingRateImageTileSize); + fsr_capabilities.max_texel_size = Size2i(8, 8); } } } @@ -6581,19 +6579,16 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { barrier_capabilities.enhanced_barriers_supported = options12.EnhancedBarriersSupported; } - if (vrs_capabilities.draw_call_supported || vrs_capabilities.primitive_supported || vrs_capabilities.ss_image_supported) { + if (fsr_capabilities.pipeline_supported || fsr_capabilities.primitive_supported || fsr_capabilities.attachment_supported) { print_verbose("- D3D12 Variable Rate Shading supported:"); - if (vrs_capabilities.draw_call_supported) { + if (fsr_capabilities.pipeline_supported) { print_verbose(" Draw call"); } - if (vrs_capabilities.primitive_supported) { - print_verbose(String(" Per-primitive (multi-viewport: ") + (vrs_capabilities.primitive_in_multiviewport ? "yes" : "no") + ")"); + if (fsr_capabilities.primitive_supported) { + print_verbose(" Primitive"); } - if (vrs_capabilities.ss_image_supported) { - print_verbose(String(" Screen-space image (tile size: ") + itos(vrs_capabilities.ss_image_tile_size) + ")"); - } - if (vrs_capabilities.additional_rates_supported) { - print_verbose(String(" Additional rates: ") + (vrs_capabilities.additional_rates_supported ? "yes" : "no")); + if (fsr_capabilities.attachment_supported) { + print_verbose(String(" Screen-space image (tile size: ") + itos(fsr_capabilities.min_texel_size.x) + ")"); } } else { print_verbose("- D3D12 Variable Rate Shading not supported"); diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 50d0f2cddf8..c2f75096616 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -116,16 +116,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { uint32_t supported_operations_flags_rd() const; }; - struct VRSCapabilities { - bool draw_call_supported = false; // We can specify our fragment rate on a draw call level. - bool primitive_supported = false; // We can specify our fragment rate on each drawcall. - bool primitive_in_multiviewport = false; - bool ss_image_supported = false; // We can provide a density map attachment on our framebuffer. - uint32_t ss_image_tile_size = 0; - uint32_t ss_max_fragment_size = 0; - bool additional_rates_supported = false; - }; - struct ShaderCapabilities { D3D_SHADER_MODEL shader_model = (D3D_SHADER_MODEL)0; bool native_16bit_ops = false; @@ -157,7 +147,8 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { uint32_t feature_level = 0; // Major * 10 + minor. SubgroupCapabilities subgroup_capabilities; RDD::MultiviewCapabilities multiview_capabilities; - VRSCapabilities vrs_capabilities; + FragmentShadingRateCapabilities fsr_capabilities; + FragmentDensityMapCapabilities fdm_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; FormatCapabilities format_capabilities; @@ -834,7 +825,7 @@ private: }; public: - virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) override final; + virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) override final; virtual void render_pass_free(RenderPassID p_render_pass) override final; // ----- COMMANDS ----- @@ -1002,6 +993,8 @@ public: virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final; + virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final; virtual String get_api_name() const override final; virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 5ac1bab4c1b..0e970447b9f 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -66,6 +66,8 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet RDD::Capabilities capabilities; RDD::MultiviewCapabilities multiview_capabilities; + RDD::FragmentShadingRateCapabilities fsr_capabilities; + RDD::FragmentDensityMapCapabilities fdm_capabilities; id archive = nil; uint32_t archive_count = 0; @@ -316,7 +318,7 @@ public: // ----- SUBPASS ----- - virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) override final; + virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) override final; virtual void render_pass_free(RenderPassID p_render_pass) override final; // ----- COMMANDS ----- @@ -421,6 +423,8 @@ public: virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final; + virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final; virtual String get_api_name() const override final { return "Metal"; } virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index b8c12385fcf..6e7aece912d 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -974,7 +974,7 @@ RDD::SwapChainID RenderingDeviceDriverMetal::swap_chain_create(RenderingContextD color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT); subpass.color_references.push_back(color_ref); - RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1); + RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1, RDD::AttachmentReference()); ERR_FAIL_COND_V(!render_pass, SwapChainID()); // Create the empty swap chain until it is resized. @@ -3104,7 +3104,7 @@ Vector RenderingDeviceDriverMetal::pipeline_cache_serialize() { // ----- SUBPASS ----- -RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) { +RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) { PixelFormats &pf = *pixel_formats; size_t subpass_count = p_subpasses.size(); @@ -4006,10 +4006,6 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) { return (uint64_t)((1.0 / limits.temporalScalerInputContentMinScale) * 1000'000); case LIMIT_MAX_SHADER_VARYINGS: return limits.maxShaderVaryings; - UNKNOWN(LIMIT_VRS_TEXEL_WIDTH); - UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT); - UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH); - UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT); default: { #ifdef DEV_ENABLED WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + "."); @@ -4032,17 +4028,8 @@ uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverMetal::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_MULTIVIEW: - return multiview_capabilities.is_supported; case SUPPORTS_FSR_HALF_FLOAT: return true; - case SUPPORTS_ATTACHMENT_VRS: - // TODO(sgc): Maybe supported via https://developer.apple.com/documentation/metal/render_passes/rendering_at_different_rasterization_rates?language=objc - // See also: - // - // * https://forum.beyond3d.com/threads/variable-rate-shading-vs-variable-rate-rasterization.62243/post-2191363 - // - return false; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; case SUPPORTS_BUFFER_DEVICE_ADDRESS: @@ -4060,6 +4047,14 @@ const RDD::MultiviewCapabilities &RenderingDeviceDriverMetal::get_multiview_capa return multiview_capabilities; } +const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverMetal::get_fragment_shading_rate_capabilities() { + return fsr_capabilities; +} + +const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverMetal::get_fragment_density_map_capabilities() { + return fdm_capabilities; +} + String RenderingDeviceDriverMetal::get_api_version() const { return vformat("%d.%d", version_major, version_minor); } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 5eb8ccf69b6..6a9604f2eed 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -304,7 +304,8 @@ static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = { VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL - VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, // TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL + VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, // TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL + VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT, // TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL }; static VkPipelineStageFlags _rd_to_vk_pipeline_stages(BitField p_stages) { @@ -518,6 +519,7 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true); _register_requested_device_extension(VK_KHR_MULTIVIEW_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, false); @@ -530,6 +532,10 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_EXTENSION_NAME, false); + // We don't actually use this extension, but some runtime components on some platforms + // can and will fill the validation layers with useless info otherwise if not enabled. + _register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, false); + if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); } @@ -747,7 +753,8 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDeviceVulkan12Features device_features_vk_1_2 = {}; VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = {}; VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features = {}; - VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; + VkPhysicalDeviceFragmentShadingRateFeaturesKHR fsr_features = {}; + VkPhysicalDeviceFragmentDensityMapFeaturesEXT fdm_features = {}; VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; @@ -771,9 +778,15 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { } if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { - vrs_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; - vrs_features.pNext = next_features; - next_features = &vrs_features; + fsr_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; + fsr_features.pNext = next_features; + next_features = &fsr_features; + } + + if (enabled_device_extension_names.has(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME)) { + fdm_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT; + fdm_features.pNext = next_features; + next_features = &fdm_features; } if (enabled_device_extension_names.has(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { @@ -821,11 +834,21 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { } if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { - vrs_capabilities.pipeline_vrs_supported = vrs_features.pipelineFragmentShadingRate; - vrs_capabilities.primitive_vrs_supported = vrs_features.primitiveFragmentShadingRate; - vrs_capabilities.attachment_vrs_supported = vrs_features.attachmentFragmentShadingRate; + fsr_capabilities.pipeline_supported = fsr_features.pipelineFragmentShadingRate; + fsr_capabilities.primitive_supported = fsr_features.primitiveFragmentShadingRate; + fsr_capabilities.attachment_supported = fsr_features.attachmentFragmentShadingRate; } + if (enabled_device_extension_names.has(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME)) { + fdm_capabilities.attachment_supported = fdm_features.fragmentDensityMap; + fdm_capabilities.dynamic_attachment_supported = fdm_features.fragmentDensityMapDynamic; + fdm_capabilities.non_subsampled_images_supported = fdm_features.fragmentDensityMapNonSubsampledImages; + } + + // Multiple VRS techniques can't co-exist during the existence of one device, so we must + // choose one at creation time and only report one of them as available. + _choose_vrs_capabilities(); + if (enabled_device_extension_names.has(VK_KHR_MULTIVIEW_EXTENSION_NAME)) { multiview_capabilities.is_supported = multiview_features.multiview; multiview_capabilities.geometry_shader_is_supported = multiview_features.multiviewGeometryShader; @@ -855,7 +878,8 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { if (functions.GetPhysicalDeviceProperties2 != nullptr) { void *next_properties = nullptr; - VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrs_properties = {}; + VkPhysicalDeviceFragmentShadingRatePropertiesKHR fsr_properties = {}; + VkPhysicalDeviceFragmentDensityMapPropertiesEXT fdm_properties = {}; VkPhysicalDeviceMultiviewProperties multiview_properties = {}; VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {}; @@ -881,10 +905,16 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_properties = &multiview_properties; } - if (vrs_capabilities.attachment_vrs_supported) { - vrs_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; - vrs_properties.pNext = next_properties; - next_properties = &vrs_properties; + if (fsr_capabilities.attachment_supported) { + fsr_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + fsr_properties.pNext = next_properties; + next_properties = &fsr_properties; + } + + if (fdm_capabilities.attachment_supported) { + fdm_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT; + fdm_properties.pNext = next_properties; + next_properties = &fdm_properties; } physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; @@ -907,33 +937,53 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { subgroup_capabilities.max_size = subgroup_size_control_properties.maxSubgroupSize; } - if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { - print_verbose("- Vulkan Variable Rate Shading supported:"); - if (vrs_capabilities.pipeline_vrs_supported) { + if (fsr_capabilities.pipeline_supported || fsr_capabilities.primitive_supported || fsr_capabilities.attachment_supported) { + print_verbose("- Vulkan Fragment Shading Rate supported:"); + if (fsr_capabilities.pipeline_supported) { print_verbose(" Pipeline fragment shading rate"); } - if (vrs_capabilities.primitive_vrs_supported) { + if (fsr_capabilities.primitive_supported) { print_verbose(" Primitive fragment shading rate"); } - if (vrs_capabilities.attachment_vrs_supported) { + if (fsr_capabilities.attachment_supported) { // TODO: Expose these somehow to the end user. - vrs_capabilities.min_texel_size.x = vrs_properties.minFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.min_texel_size.y = vrs_properties.minFragmentShadingRateAttachmentTexelSize.height; - vrs_capabilities.max_texel_size.x = vrs_properties.maxFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.max_texel_size.y = vrs_properties.maxFragmentShadingRateAttachmentTexelSize.height; - vrs_capabilities.max_fragment_size.x = vrs_properties.maxFragmentSize.width; // either 4 or 8 - vrs_capabilities.max_fragment_size.y = vrs_properties.maxFragmentSize.height; // generally the same as width + fsr_capabilities.min_texel_size.x = fsr_properties.minFragmentShadingRateAttachmentTexelSize.width; + fsr_capabilities.min_texel_size.y = fsr_properties.minFragmentShadingRateAttachmentTexelSize.height; + fsr_capabilities.max_texel_size.x = fsr_properties.maxFragmentShadingRateAttachmentTexelSize.width; + fsr_capabilities.max_texel_size.y = fsr_properties.maxFragmentShadingRateAttachmentTexelSize.height; + fsr_capabilities.max_fragment_size.x = fsr_properties.maxFragmentSize.width; // either 4 or 8 + fsr_capabilities.max_fragment_size.y = fsr_properties.maxFragmentSize.height; // generally the same as width - // We'll attempt to default to a texel size of 16x16. - vrs_capabilities.texel_size = Vector2i(16, 16).clamp(vrs_capabilities.min_texel_size, vrs_capabilities.max_texel_size); - - print_verbose(String(" Attachment fragment shading rate") + String(", min texel size: (") + itos(vrs_capabilities.min_texel_size.x) + String(", ") + itos(vrs_capabilities.min_texel_size.y) + String(")") + String(", max texel size: (") + itos(vrs_capabilities.max_texel_size.x) + String(", ") + itos(vrs_capabilities.max_texel_size.y) + String(")") + String(", max fragment size: (") + itos(vrs_capabilities.max_fragment_size.x) + String(", ") + itos(vrs_capabilities.max_fragment_size.y) + String(")")); + print_verbose(String(" Attachment fragment shading rate") + + String(", min texel size: (") + itos(fsr_capabilities.min_texel_size.x) + String(", ") + itos(fsr_capabilities.min_texel_size.y) + String(")") + + String(", max texel size: (") + itos(fsr_capabilities.max_texel_size.x) + String(", ") + itos(fsr_capabilities.max_texel_size.y) + String(")") + + String(", max fragment size: (") + itos(fsr_capabilities.max_fragment_size.x) + String(", ") + itos(fsr_capabilities.max_fragment_size.y) + String(")")); } } else { print_verbose("- Vulkan Variable Rate Shading not supported"); } + if (fdm_capabilities.attachment_supported || fdm_capabilities.dynamic_attachment_supported || fdm_capabilities.non_subsampled_images_supported) { + print_verbose("- Vulkan Fragment Density Map supported"); + + fdm_capabilities.min_texel_size.x = fdm_properties.minFragmentDensityTexelSize.width; + fdm_capabilities.min_texel_size.y = fdm_properties.minFragmentDensityTexelSize.height; + fdm_capabilities.max_texel_size.x = fdm_properties.maxFragmentDensityTexelSize.width; + fdm_capabilities.max_texel_size.y = fdm_properties.maxFragmentDensityTexelSize.height; + fdm_capabilities.invocations_supported = fdm_properties.fragmentDensityInvocations; + + if (fdm_capabilities.dynamic_attachment_supported) { + print_verbose(" - dynamic fragment density map supported"); + } + + if (fdm_capabilities.non_subsampled_images_supported) { + print_verbose(" - non-subsampled images supported"); + } + } else { + print_verbose("- Vulkan Fragment Density Map not supported"); + } + if (multiview_capabilities.is_supported) { multiview_capabilities.max_view_count = multiview_properties.maxMultiviewViewCount; multiview_capabilities.max_instance_count = multiview_properties.maxMultiviewInstanceIndex; @@ -959,6 +1009,22 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { return OK; } +void RenderingDeviceDriverVulkan::_choose_vrs_capabilities() { + bool prefer_fdm_on_qualcomm = physical_device_properties.vendorID == RenderingContextDriver::Vendor::VENDOR_QUALCOMM; + if (fdm_capabilities.attachment_supported && (!fsr_capabilities.attachment_supported || prefer_fdm_on_qualcomm)) { + // If available, we prefer using fragment density maps on Qualcomm as they adjust tile distribution when using + // this technique. Performance as a result is higher than when using fragment shading rate. + fsr_capabilities = FragmentShadingRateCapabilities(); + } else if (fsr_capabilities.attachment_supported) { + // Disable any possibility of fragment density maps being used. + fdm_capabilities = FragmentDensityMapCapabilities(); + } else { + // Do not report or enable any VRS capabilities if attachment is not supported. + fsr_capabilities = FragmentShadingRateCapabilities(); + fdm_capabilities = FragmentDensityMapCapabilities(); + } +} + Error RenderingDeviceDriverVulkan::_add_queue_create_info(LocalVector &r_queue_create_info) { uint32_t queue_family_count = queue_family_properties.size(); queue_families.resize(queue_family_count); @@ -1009,14 +1075,24 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector RenderingDeviceDriverVulkan::texture_get_usages_ if (!(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) { supported.clear_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT); } - // Validation via VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR fails if VRS attachment is not supported. - if (p_format != DATA_FORMAT_R8_UINT) { + if (p_format != DATA_FORMAT_R8_UINT && p_format != DATA_FORMAT_R8G8_UNORM) { supported.clear_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT); } @@ -2317,6 +2396,8 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPE static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT, VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT)); // RDD::BarrierAccessBits == VkAccessFlagBits. static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT)); @@ -2335,6 +2416,7 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_WRITE_BIT, VK_ACCESS_H static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_MEMORY_READ_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT)); void RenderingDeviceDriverVulkan::command_pipeline_barrier( CommandBufferID p_cmd_buffer, @@ -2400,8 +2482,9 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( } #endif + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; vkCmdPipelineBarrier( - (VkCommandBuffer)p_cmd_buffer.id, + command_buffer->vk_command_buffer, _rd_to_vk_pipeline_stages(p_src_stages), _rd_to_vk_pipeline_stages(p_dst_stages), 0, @@ -2600,7 +2683,8 @@ Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueu signal_semaphores.clear(); for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) { - command_buffers.push_back(VkCommandBuffer(p_cmd_buffers[i].id)); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)(p_cmd_buffers[i].id); + command_buffers.push_back(command_buffer->vk_command_buffer); } for (uint32_t i = 0; i < p_cmd_semaphores.size(); i++) { @@ -2810,6 +2894,10 @@ void RenderingDeviceDriverVulkan::command_pool_free(CommandPoolID p_cmd_pool) { DEV_ASSERT(p_cmd_pool); CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id); + for (CommandBufferInfo *command_buffer : command_pool->command_buffers_created) { + VersatileResource::free(resources_allocator, command_buffer); + } + vkDestroyCommandPool(vk_device, command_pool->vk_command_pool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_COMMAND_POOL)); memdelete(command_pool); } @@ -2819,7 +2907,7 @@ void RenderingDeviceDriverVulkan::command_pool_free(CommandPoolID p_cmd_pool) { RDD::CommandBufferID RenderingDeviceDriverVulkan::command_buffer_create(CommandPoolID p_cmd_pool) { DEV_ASSERT(p_cmd_pool); - const CommandPool *command_pool = (const CommandPool *)(p_cmd_pool.id); + CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id); VkCommandBufferAllocateInfo cmd_buf_info = {}; cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; cmd_buf_info.commandPool = command_pool->vk_command_pool; @@ -2831,19 +2919,24 @@ RDD::CommandBufferID RenderingDeviceDriverVulkan::command_buffer_create(CommandP cmd_buf_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; } - VkCommandBuffer vk_cmd_buffer = VK_NULL_HANDLE; - VkResult err = vkAllocateCommandBuffers(vk_device, &cmd_buf_info, &vk_cmd_buffer); + VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE; + VkResult err = vkAllocateCommandBuffers(vk_device, &cmd_buf_info, &vk_command_buffer); ERR_FAIL_COND_V_MSG(err, CommandBufferID(), "vkAllocateCommandBuffers failed with error " + itos(err) + "."); - return CommandBufferID(vk_cmd_buffer); + CommandBufferInfo *command_buffer = VersatileResource::allocate(resources_allocator); + command_buffer->vk_command_buffer = vk_command_buffer; + command_pool->command_buffers_created.push_back(command_buffer); + return CommandBufferID(command_buffer); } bool RenderingDeviceDriverVulkan::command_buffer_begin(CommandBufferID p_cmd_buffer) { + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + VkCommandBufferBeginInfo cmd_buf_begin_info = {}; cmd_buf_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VkResult err = vkBeginCommandBuffer((VkCommandBuffer)p_cmd_buffer.id, &cmd_buf_begin_info); + VkResult err = vkBeginCommandBuffer(command_buffer->vk_command_buffer, &cmd_buf_begin_info); ERR_FAIL_COND_V_MSG(err, false, "vkBeginCommandBuffer failed with error " + itos(err) + "."); return true; @@ -2851,10 +2944,12 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin(CommandBufferID p_cmd_buf bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) { Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id); + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); VkCommandBufferInheritanceInfo inheritance_info = {}; inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; - inheritance_info.renderPass = (VkRenderPass)p_render_pass.id; + inheritance_info.renderPass = render_pass->vk_render_pass; inheritance_info.subpass = p_subpass; inheritance_info.framebuffer = framebuffer->vk_framebuffer; @@ -2863,18 +2958,27 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; cmd_buf_begin_info.pInheritanceInfo = &inheritance_info; - VkResult err = vkBeginCommandBuffer((VkCommandBuffer)p_cmd_buffer.id, &cmd_buf_begin_info); + VkResult err = vkBeginCommandBuffer(command_buffer->vk_command_buffer, &cmd_buf_begin_info); ERR_FAIL_COND_V_MSG(err, false, "vkBeginCommandBuffer failed with error " + itos(err) + "."); return true; } void RenderingDeviceDriverVulkan::command_buffer_end(CommandBufferID p_cmd_buffer) { - vkEndCommandBuffer((VkCommandBuffer)p_cmd_buffer.id); + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + vkEndCommandBuffer(command_buffer->vk_command_buffer); } void RenderingDeviceDriverVulkan::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView p_secondary_cmd_buffers) { - vkCmdExecuteCommands((VkCommandBuffer)p_cmd_buffer.id, p_secondary_cmd_buffers.size(), (const VkCommandBuffer *)p_secondary_cmd_buffers.ptr()); + thread_local LocalVector secondary_command_buffers; + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + secondary_command_buffers.resize(p_secondary_cmd_buffers.size()); + for (uint32_t i = 0; i < p_secondary_cmd_buffers.size(); i++) { + CommandBufferInfo *secondary_command_buffer = (CommandBufferInfo *)(p_secondary_cmd_buffers[i].id); + secondary_command_buffers[i] = secondary_command_buffer->vk_command_buffer; + } + + vkCmdExecuteCommands(command_buffer->vk_command_buffer, p_secondary_cmd_buffers.size(), secondary_command_buffers.ptr()); } /********************/ @@ -2986,15 +3090,18 @@ RenderingDeviceDriver::SwapChainID RenderingDeviceDriverVulkan::swap_chain_creat pass_info.subpassCount = 1; pass_info.pSubpasses = &subpass; - VkRenderPass render_pass = VK_NULL_HANDLE; - err = _create_render_pass(vk_device, &pass_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &render_pass); + VkRenderPass vk_render_pass = VK_NULL_HANDLE; + err = _create_render_pass(vk_device, &pass_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &vk_render_pass); ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID()); + RenderPassInfo *render_pass_info = VersatileResource::allocate(resources_allocator); + render_pass_info->vk_render_pass = vk_render_pass; + SwapChain *swap_chain = memnew(SwapChain); swap_chain->surface = p_surface; swap_chain->format = format; swap_chain->color_space = color_space; - swap_chain->render_pass = RenderPassID(render_pass); + swap_chain->render_pass = RenderPassID(render_pass_info); return SwapChainID(swap_chain); } @@ -3226,9 +3333,10 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, swap_chain->framebuffers.reserve(image_count); + const RenderPassInfo *render_pass = (const RenderPassInfo *)(swap_chain->render_pass.id); VkFramebufferCreateInfo fb_create_info = {}; fb_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - fb_create_info.renderPass = VkRenderPass(swap_chain->render_pass.id); + fb_create_info.renderPass = render_pass->vk_render_pass; fb_create_info.attachmentCount = 1; fb_create_info.width = surface->width; fb_create_info.height = surface->height; @@ -3369,8 +3477,8 @@ void RenderingDeviceDriverVulkan::swap_chain_free(SwapChainID p_swap_chain) { SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); _swap_chain_release(swap_chain); - if (swap_chain->render_pass.id != 0) { - vkDestroyRenderPass(vk_device, VkRenderPass(swap_chain->render_pass.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS)); + if (swap_chain->render_pass) { + render_pass_free(swap_chain->render_pass); } memdelete(swap_chain); @@ -3381,14 +3489,23 @@ void RenderingDeviceDriverVulkan::swap_chain_free(SwapChainID p_swap_chain) { /*********************/ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID p_render_pass, VectorView p_attachments, uint32_t p_width, uint32_t p_height) { + RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id); + + uint32_t fragment_density_map_offsets_layers = 0; VkImageView *vk_img_views = ALLOCA_ARRAY(VkImageView, p_attachments.size()); for (uint32_t i = 0; i < p_attachments.size(); i++) { - vk_img_views[i] = ((const TextureInfo *)p_attachments[i].id)->vk_view; + const TextureInfo *texture = (const TextureInfo *)p_attachments[i].id; + vk_img_views[i] = texture->vk_view; + + if (render_pass->uses_fragment_density_map_offsets && (texture->vk_create_info.usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT)) { + // If the render pass uses the FDM and the usage fits, we store the amount of layers to use it later on the render pass's end. + fragment_density_map_offsets_layers = texture->vk_create_info.arrayLayers; + } } VkFramebufferCreateInfo framebuffer_create_info = {}; framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_create_info.renderPass = (VkRenderPass)p_render_pass.id; + framebuffer_create_info.renderPass = render_pass->vk_render_pass; framebuffer_create_info.attachmentCount = p_attachments.size(); framebuffer_create_info.pAttachments = vk_img_views; framebuffer_create_info.width = p_width; @@ -3409,6 +3526,7 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID Framebuffer *framebuffer = memnew(Framebuffer); framebuffer->vk_framebuffer = vk_framebuffer; + framebuffer->fragment_density_map_offsets_layers = fragment_density_map_offsets_layers; return FramebufferID(framebuffer); } @@ -4366,14 +4484,16 @@ static void _texture_copy_region_to_vk(const RDD::TextureCopyRegion &p_copy_regi } void RenderingDeviceDriverVulkan::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; - vkCmdFillBuffer((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_size, 0); + vkCmdFillBuffer(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_size, 0); } void RenderingDeviceDriverVulkan::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView p_regions) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *src_buf_info = (const BufferInfo *)p_src_buffer.id; const BufferInfo *dst_buf_info = (const BufferInfo *)p_dst_buffer.id; - vkCmdCopyBuffer((VkCommandBuffer)p_cmd_buffer.id, src_buf_info->vk_buffer, dst_buf_info->vk_buffer, p_regions.size(), (const VkBufferCopy *)p_regions.ptr()); + vkCmdCopyBuffer(command_buffer->vk_command_buffer, src_buf_info->vk_buffer, dst_buf_info->vk_buffer, p_regions.size(), (const VkBufferCopy *)p_regions.ptr()); } void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { @@ -4382,6 +4502,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buf _texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]); } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id; const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id; @@ -4394,10 +4515,11 @@ void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buf } #endif - vkCmdCopyImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); + vkCmdCopyImage(command_buffer->vk_command_buffer, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); } void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id; const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id; @@ -4423,7 +4545,7 @@ void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_ } #endif - vkCmdResolveImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], 1, &vk_resolve); + vkCmdResolveImage(command_buffer->vk_command_buffer, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], 1, &vk_resolve); } void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { @@ -4433,13 +4555,14 @@ void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_ VkImageSubresourceRange vk_subresources = {}; _texture_subresource_range_to_vk(p_subresources, &vk_subresources); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const TextureInfo *tex_info = (const TextureInfo *)p_texture.id; #ifdef DEBUG_ENABLED if (tex_info->transient) { ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_texture must not be used in command_clear_color_texture. Use a clear store action pass instead."); } #endif - vkCmdClearColorImage((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_color, 1, &vk_subresources); + vkCmdClearColorImage(command_buffer->vk_command_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_color, 1, &vk_subresources); } void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { @@ -4448,6 +4571,7 @@ void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID _buffer_texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]); } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_src_buffer.id; const TextureInfo *tex_info = (const TextureInfo *)p_dst_texture.id; #ifdef DEBUG_ENABLED @@ -4455,7 +4579,7 @@ void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_dst_texture must not be used in command_copy_buffer_to_texture."); } #endif - vkCmdCopyBufferToImage((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); + vkCmdCopyBufferToImage(command_buffer->vk_command_buffer, buf_info->vk_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); } void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView p_regions) { @@ -4464,6 +4588,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID _buffer_texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]); } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const TextureInfo *tex_info = (const TextureInfo *)p_src_texture.id; const BufferInfo *buf_info = (const BufferInfo *)p_dst_buffer.id; #ifdef DEBUG_ENABLED @@ -4471,7 +4596,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_src_texture must not be used in command_copy_texture_to_buffer."); } #endif - vkCmdCopyImageToBuffer((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], buf_info->vk_buffer, p_regions.size(), vk_copy_regions); + vkCmdCopyImageToBuffer(command_buffer->vk_command_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], buf_info->vk_buffer, p_regions.size(), vk_copy_regions); } /******************/ @@ -4485,8 +4610,9 @@ void RenderingDeviceDriverVulkan::pipeline_free(PipelineID p_pipeline) { // ----- BINDING ----- void RenderingDeviceDriverVulkan::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView p_data) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; - vkCmdPushConstants((VkCommandBuffer)p_cmd_buffer.id, shader_info->vk_pipeline_layout, shader_info->vk_push_constant_stages, p_dst_first_index * sizeof(uint32_t), p_data.size() * sizeof(uint32_t), p_data.ptr()); + vkCmdPushConstants(command_buffer->vk_command_buffer, shader_info->vk_pipeline_layout, shader_info->vk_push_constant_stages, p_dst_first_index * sizeof(uint32_t), p_data.size() * sizeof(uint32_t), p_data.ptr()); } // ----- CACHE ----- @@ -4617,7 +4743,7 @@ static void _attachment_reference_to_vk(const RDD::AttachmentReference &p_attach r_vk_attachment_reference->aspectMask = (VkImageAspectFlags)p_attachment_reference.aspect; } -RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) { +RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) { // These are only used if we use multiview but we need to define them in scope. const uint32_t view_mask = (1 << p_view_count) - 1; const uint32_t correlation_mask = (1 << p_view_count) - 1; @@ -4672,22 +4798,22 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorViewsType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR; - vk_subpass_vrs_attachment->attachment = p_subpasses[i].vrs_reference.attachment; - vk_subpass_vrs_attachment->layout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR; + // Fragment shading rate. + if (fsr_capabilities.attachment_supported && p_subpasses[i].fragment_shading_rate_reference.attachment != AttachmentReference::UNUSED) { + VkAttachmentReference2KHR *vk_subpass_fsr_attachment = ALLOCA_SINGLE(VkAttachmentReference2KHR); + *vk_subpass_fsr_attachment = {}; + vk_subpass_fsr_attachment->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR; + vk_subpass_fsr_attachment->attachment = p_subpasses[i].fragment_shading_rate_reference.attachment; + vk_subpass_fsr_attachment->layout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR; - VkFragmentShadingRateAttachmentInfoKHR *vk_vrs_info = ALLOCA_SINGLE(VkFragmentShadingRateAttachmentInfoKHR); - *vk_vrs_info = {}; - vk_vrs_info->sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR; - vk_vrs_info->pFragmentShadingRateAttachment = vk_subpass_vrs_attachment; - vk_vrs_info->shadingRateAttachmentTexelSize.width = vrs_capabilities.texel_size.x; - vk_vrs_info->shadingRateAttachmentTexelSize.height = vrs_capabilities.texel_size.y; + VkFragmentShadingRateAttachmentInfoKHR *vk_fsr_info = ALLOCA_SINGLE(VkFragmentShadingRateAttachmentInfoKHR); + *vk_fsr_info = {}; + vk_fsr_info->sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR; + vk_fsr_info->pFragmentShadingRateAttachment = vk_subpass_fsr_attachment; + vk_fsr_info->shadingRateAttachmentTexelSize.width = p_subpasses[i].fragment_shading_rate_texel_size.x; + vk_fsr_info->shadingRateAttachmentTexelSize.height = p_subpasses[i].fragment_shading_rate_texel_size.y; - vk_subpasses[i].pNext = vk_vrs_info; + vk_subpasses[i].pNext = vk_fsr_info; } } @@ -4736,15 +4862,30 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorViewsType = VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT; + vk_fdm_info->fragmentDensityMapAttachment.attachment = p_fragment_density_map_attachment.attachment; + vk_fdm_info->fragmentDensityMapAttachment.layout = RD_TO_VK_LAYOUT[p_fragment_density_map_attachment.layout]; + vk_fdm_info->pNext = create_info.pNext; + create_info.pNext = vk_fdm_info; + } + VkRenderPass vk_render_pass = VK_NULL_HANDLE; VkResult res = _create_render_pass(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &vk_render_pass); ERR_FAIL_COND_V_MSG(res, RenderPassID(), "vkCreateRenderPass2KHR failed with error " + itos(res) + "."); - return RenderPassID(vk_render_pass); + RenderPassInfo *render_pass = VersatileResource::allocate(resources_allocator); + render_pass->vk_render_pass = vk_render_pass; + return RenderPassID(render_pass); } void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) { - vkDestroyRenderPass(vk_device, (VkRenderPass)p_render_pass.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS)); + RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id); + vkDestroyRenderPass(vk_device, render_pass->vk_render_pass, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS)); + VersatileResource::free(resources_allocator, render_pass); } // ----- COMMANDS ----- @@ -4752,7 +4893,10 @@ void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) { static_assert(ARRAYS_COMPATIBLE_FIELDWISE(RDD::RenderPassClearValue, VkClearValue)); void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView p_clear_values) { + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id); Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + if (framebuffer->swap_chain_acquired) { // Insert a barrier to wait for the acquisition of the framebuffer before the render pass begins. VkImageMemoryBarrier image_barrier = {}; @@ -4763,13 +4907,13 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.image = framebuffer->swap_chain_image; image_barrier.subresourceRange = framebuffer->swap_chain_image_subresource_range; - vkCmdPipelineBarrier((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); + vkCmdPipelineBarrier(command_buffer->vk_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); framebuffer->swap_chain_acquired = false; } VkRenderPassBeginInfo render_pass_begin = {}; render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - render_pass_begin.renderPass = (VkRenderPass)p_render_pass.id; + render_pass_begin.renderPass = render_pass->vk_render_pass; render_pass_begin.framebuffer = framebuffer->vk_framebuffer; render_pass_begin.renderArea.offset.x = p_rect.position.x; @@ -4781,7 +4925,10 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm render_pass_begin.pClearValues = (const VkClearValue *)p_clear_values.ptr(); VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS; - vkCmdBeginRenderPass((VkCommandBuffer)p_cmd_buffer.id, &render_pass_begin, vk_subpass_contents); + vkCmdBeginRenderPass(command_buffer->vk_command_buffer, &render_pass_begin, vk_subpass_contents); + + command_buffer->active_framebuffer = framebuffer; + command_buffer->active_render_pass = render_pass; #if PRINT_NATIVE_COMMANDS print_line(vformat("vkCmdBeginRenderPass Pass 0x%uX Framebuffer 0x%uX", p_render_pass.id, p_framebuffer.id)); @@ -4789,7 +4936,14 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm } void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_buffer) { - vkCmdEndRenderPass((VkCommandBuffer)p_cmd_buffer.id); + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + DEV_ASSERT(command_buffer->active_framebuffer != nullptr && "A framebuffer must be active."); + DEV_ASSERT(command_buffer->active_render_pass != nullptr && "A render pass must be active."); + + vkCmdEndRenderPass(command_buffer->vk_command_buffer); + + command_buffer->active_render_pass = nullptr; + command_buffer->active_framebuffer = nullptr; #if PRINT_NATIVE_COMMANDS print_line("vkCmdEndRenderPass"); @@ -4797,11 +4951,13 @@ void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_ } void RenderingDeviceDriverVulkan::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS; - vkCmdNextSubpass((VkCommandBuffer)p_cmd_buffer.id, vk_subpass_contents); + vkCmdNextSubpass(command_buffer->vk_command_buffer, vk_subpass_contents); } void RenderingDeviceDriverVulkan::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView p_viewports) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; VkViewport *vk_viewports = ALLOCA_ARRAY(VkViewport, p_viewports.size()); for (uint32_t i = 0; i < p_viewports.size(); i++) { vk_viewports[i] = {}; @@ -4812,14 +4968,17 @@ void RenderingDeviceDriverVulkan::command_render_set_viewport(CommandBufferID p_ vk_viewports[i].minDepth = 0.0f; vk_viewports[i].maxDepth = 1.0f; } - vkCmdSetViewport((VkCommandBuffer)p_cmd_buffer.id, 0, p_viewports.size(), vk_viewports); + vkCmdSetViewport(command_buffer->vk_command_buffer, 0, p_viewports.size(), vk_viewports); } void RenderingDeviceDriverVulkan::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView p_scissors) { - vkCmdSetScissor((VkCommandBuffer)p_cmd_buffer.id, 0, p_scissors.size(), (VkRect2D *)p_scissors.ptr()); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdSetScissor(command_buffer->vk_command_buffer, 0, p_scissors.size(), (VkRect2D *)p_scissors.ptr()); } void RenderingDeviceDriverVulkan::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView p_attachment_clears, VectorView p_rects) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + VkClearAttachment *vk_clears = ALLOCA_ARRAY(VkClearAttachment, p_attachment_clears.size()); for (uint32_t i = 0; i < p_attachment_clears.size(); i++) { vk_clears[i] = {}; @@ -4839,17 +4998,19 @@ void RenderingDeviceDriverVulkan::command_render_clear_attachments(CommandBuffer vk_rects[i].layerCount = 1; } - vkCmdClearAttachments((VkCommandBuffer)p_cmd_buffer.id, p_attachment_clears.size(), vk_clears, p_rects.size(), vk_rects); + vkCmdClearAttachments(command_buffer->vk_command_buffer, p_attachment_clears.size(), vk_clears, p_rects.size(), vk_rects); } void RenderingDeviceDriverVulkan::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { - vkCmdBindPipeline((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_GRAPHICS, (VkPipeline)p_pipeline.id); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, (VkPipeline)p_pipeline.id); } void RenderingDeviceDriverVulkan::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id; - vkCmdBindDescriptorSets((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); + vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); } void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) { @@ -4865,59 +5026,71 @@ void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBuffer sets[i] = ((const UniformSetInfo *)p_uniform_sets[i].id)->vk_descriptor_set; } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; - vkCmdBindDescriptorSets((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], 0, nullptr); + vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], 0, nullptr); } void RenderingDeviceDriverVulkan::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) { - vkCmdDraw((VkCommandBuffer)p_cmd_buffer.id, p_vertex_count, p_instance_count, p_base_vertex, p_first_instance); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdDraw(command_buffer->vk_command_buffer, p_vertex_count, p_instance_count, p_base_vertex, p_first_instance); } void RenderingDeviceDriverVulkan::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) { - vkCmdDrawIndexed((VkCommandBuffer)p_cmd_buffer.id, p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdDrawIndexed(command_buffer->vk_command_buffer, p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance); } void RenderingDeviceDriverVulkan::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id; - vkCmdDrawIndexedIndirect((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); + vkCmdDrawIndexedIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); } void RenderingDeviceDriverVulkan::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *indirect_buf_info = (const BufferInfo *)p_indirect_buffer.id; const BufferInfo *count_buf_info = (const BufferInfo *)p_count_buffer.id; - vkCmdDrawIndexedIndirectCount((VkCommandBuffer)p_cmd_buffer.id, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); + vkCmdDrawIndexedIndirectCount(command_buffer->vk_command_buffer, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } void RenderingDeviceDriverVulkan::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id; - vkCmdDrawIndirect((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); + vkCmdDrawIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); } void RenderingDeviceDriverVulkan::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *indirect_buf_info = (const BufferInfo *)p_indirect_buffer.id; const BufferInfo *count_buf_info = (const BufferInfo *)p_count_buffer.id; - vkCmdDrawIndirectCount((VkCommandBuffer)p_cmd_buffer.id, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); + vkCmdDrawIndirectCount(command_buffer->vk_command_buffer, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } void RenderingDeviceDriverVulkan::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + VkBuffer *vk_buffers = ALLOCA_ARRAY(VkBuffer, p_binding_count); for (uint32_t i = 0; i < p_binding_count; i++) { vk_buffers[i] = ((const BufferInfo *)p_buffers[i].id)->vk_buffer; } - vkCmdBindVertexBuffers((VkCommandBuffer)p_cmd_buffer.id, 0, p_binding_count, vk_buffers, p_offsets); + vkCmdBindVertexBuffers(command_buffer->vk_command_buffer, 0, p_binding_count, vk_buffers, p_offsets); } void RenderingDeviceDriverVulkan::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; - vkCmdBindIndexBuffer((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32); + vkCmdBindIndexBuffer(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32); } void RenderingDeviceDriverVulkan::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) { - vkCmdSetBlendConstants((VkCommandBuffer)p_cmd_buffer.id, p_constants.components); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdSetBlendConstants(command_buffer->vk_command_buffer, p_constants.components); } void RenderingDeviceDriverVulkan::command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) { - vkCmdSetLineWidth((VkCommandBuffer)p_cmd_buffer.id, p_width); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdSetLineWidth(command_buffer->vk_command_buffer, p_width); } // ----- PIPELINE ----- @@ -5189,23 +5362,22 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( dynamic_state_create_info.dynamicStateCount = vk_dynamic_states_count; dynamic_state_create_info.pDynamicStates = vk_dynamic_states; - // VRS. - void *graphics_pipeline_nextptr = nullptr; - if (vrs_capabilities.attachment_vrs_supported) { - // If VRS is used, this defines how the different VRS types are combined. - // combinerOps[0] decides how we use the output of pipeline and primitive (drawcall) VRS. - // combinerOps[1] decides how we use the output of combinerOps[0] and our attachment VRS. + if (fsr_capabilities.attachment_supported) { + // Fragment shading rate. + // If FSR is used, this defines how the different FSR types are combined. + // combinerOps[0] decides how we use the output of pipeline and primitive (drawcall) FSR. + // combinerOps[1] decides how we use the output of combinerOps[0] and our attachment FSR. - VkPipelineFragmentShadingRateStateCreateInfoKHR *vrs_create_info = ALLOCA_SINGLE(VkPipelineFragmentShadingRateStateCreateInfoKHR); - *vrs_create_info = {}; - vrs_create_info->sType = VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR; - vrs_create_info->fragmentSize = { 4, 4 }; - vrs_create_info->combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; // We don't use pipeline/primitive VRS so this really doesn't matter. - vrs_create_info->combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR; // Always use the outcome of attachment VRS if enabled. + VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_create_info = ALLOCA_SINGLE(VkPipelineFragmentShadingRateStateCreateInfoKHR); + *fsr_create_info = {}; + fsr_create_info->sType = VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR; + fsr_create_info->fragmentSize = { 4, 4 }; + fsr_create_info->combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; // We don't use pipeline/primitive FSR so this really doesn't matter. + fsr_create_info->combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR; // Always use the outcome of attachment FSR if enabled. - graphics_pipeline_nextptr = vrs_create_info; + graphics_pipeline_nextptr = fsr_create_info; } // Finally, pipeline create info. @@ -5245,6 +5417,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( } } + const RenderPassInfo *render_pass = (const RenderPassInfo *)(p_render_pass.id); pipeline_create_info.pStages = vk_pipeline_stages; pipeline_create_info.pVertexInputState = vertex_input_state_create_info; pipeline_create_info.pInputAssemblyState = &input_assembly_create_info; @@ -5256,7 +5429,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( pipeline_create_info.pColorBlendState = &color_blend_state_create_info; pipeline_create_info.pDynamicState = &dynamic_state_create_info; pipeline_create_info.layout = shader_info->vk_pipeline_layout; - pipeline_create_info.renderPass = (VkRenderPass)p_render_pass.id; + pipeline_create_info.renderPass = render_pass->vk_render_pass; pipeline_create_info.subpass = p_render_subpass; // --- @@ -5275,13 +5448,15 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( // ----- COMMANDS ----- void RenderingDeviceDriverVulkan::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { - vkCmdBindPipeline((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_COMPUTE, (VkPipeline)p_pipeline.id); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, (VkPipeline)p_pipeline.id); } void RenderingDeviceDriverVulkan::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id; - vkCmdBindDescriptorSets((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); + vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); } void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) { @@ -5297,17 +5472,20 @@ void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBuffe sets[i] = ((const UniformSetInfo *)p_uniform_sets[i].id)->vk_descriptor_set; } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; - vkCmdBindDescriptorSets((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], 0, nullptr); + vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], 0, nullptr); } void RenderingDeviceDriverVulkan::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { - vkCmdDispatch((VkCommandBuffer)p_cmd_buffer.id, p_x_groups, p_y_groups, p_z_groups); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdDispatch(command_buffer->vk_command_buffer, p_x_groups, p_y_groups, p_z_groups); } void RenderingDeviceDriverVulkan::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id; - vkCmdDispatchIndirect((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset); + vkCmdDispatchIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset); } // ----- PIPELINE ----- @@ -5406,11 +5584,13 @@ uint64_t RenderingDeviceDriverVulkan::timestamp_query_result_to_time(uint64_t p_ } void RenderingDeviceDriverVulkan::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) { - vkCmdResetQueryPool((VkCommandBuffer)p_cmd_buffer.id, (VkQueryPool)p_pool_id.id, 0, p_query_count); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdResetQueryPool(command_buffer->vk_command_buffer, (VkQueryPool)p_pool_id.id, 0, p_query_count); } void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) { - vkCmdWriteTimestamp((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, (VkQueryPool)p_pool_id.id, p_index); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdWriteTimestamp(command_buffer->vk_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, (VkQueryPool)p_pool_id.id, p_index); } /****************/ @@ -5418,6 +5598,7 @@ void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_ /****************/ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); if (!functions.CmdBeginDebugUtilsLabelEXT) { if (functions.CmdDebugMarkerBeginEXT) { @@ -5430,7 +5611,7 @@ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buff marker.color[1] = p_color[1]; marker.color[2] = p_color[2]; marker.color[3] = p_color[3]; - functions.CmdDebugMarkerBeginEXT((VkCommandBuffer)p_cmd_buffer.id, &marker); + functions.CmdDebugMarkerBeginEXT(command_buffer->vk_command_buffer, &marker); } return; } @@ -5442,19 +5623,20 @@ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buff label.color[1] = p_color[1]; label.color[2] = p_color[2]; label.color[3] = p_color[3]; - functions.CmdBeginDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id, &label); + functions.CmdBeginDebugUtilsLabelEXT(command_buffer->vk_command_buffer, &label); } void RenderingDeviceDriverVulkan::command_end_label(CommandBufferID p_cmd_buffer) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); if (!functions.CmdEndDebugUtilsLabelEXT) { if (functions.CmdDebugMarkerEndEXT) { // Debug marker extensions. - functions.CmdDebugMarkerEndEXT((VkCommandBuffer)p_cmd_buffer.id); + functions.CmdDebugMarkerEndEXT(command_buffer->vk_command_buffer); } return; } - functions.CmdEndDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id); + functions.CmdEndDebugUtilsLabelEXT(command_buffer->vk_command_buffer); } /****************/ @@ -5466,6 +5648,7 @@ void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cm return; } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; if (Engine::get_singleton()->is_accurate_breadcrumbs_enabled()) { // Force a full barrier so commands are not executed in parallel. // This will mean that the last breadcrumb to see was actually the @@ -5505,7 +5688,7 @@ void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cm VK_ACCESS_HOST_WRITE_BIT; vkCmdPipelineBarrier( - (VkCommandBuffer)p_cmd_buffer.id, + command_buffer->vk_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1u, &memoryBarrier, 0u, nullptr, 0u, nullptr); @@ -5513,8 +5696,8 @@ void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cm // We write to a circular buffer. If you're getting barrier sync errors here, // increase the value of BREADCRUMB_BUFFER_ENTRIES. - vkCmdFillBuffer((VkCommandBuffer)p_cmd_buffer.id, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset, sizeof(uint32_t), breadcrumb_id++); - vkCmdFillBuffer((VkCommandBuffer)p_cmd_buffer.id, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset + sizeof(uint32_t), sizeof(uint32_t), p_data); + vkCmdFillBuffer(command_buffer->vk_command_buffer, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset, sizeof(uint32_t), breadcrumb_id++); + vkCmdFillBuffer(command_buffer->vk_command_buffer, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset + sizeof(uint32_t), sizeof(uint32_t), p_data); breadcrumb_offset += sizeof(uint32_t) * 2u; if (breadcrumb_offset >= BREADCRUMB_BUFFER_ENTRIES * sizeof(uint32_t) * 2u) { breadcrumb_offset = 0u; @@ -5933,14 +6116,6 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { return subgroup_capabilities.supported_stages_flags_rd(); case LIMIT_SUBGROUP_OPERATIONS: return subgroup_capabilities.supported_operations_flags_rd(); - case LIMIT_VRS_TEXEL_WIDTH: - return vrs_capabilities.texel_size.x; - case LIMIT_VRS_TEXEL_HEIGHT: - return vrs_capabilities.texel_size.y; - case LIMIT_VRS_MAX_FRAGMENT_WIDTH: - return vrs_capabilities.max_fragment_size.x; - case LIMIT_VRS_MAX_FRAGMENT_HEIGHT: - return vrs_capabilities.max_fragment_size.y; case LIMIT_MAX_SHADER_VARYINGS: // The Vulkan spec states that built in varyings like gl_FragCoord should count against this, but in // practice, that doesn't seem to be the case. The validation layers don't even complain. @@ -5967,12 +6142,8 @@ uint64_t RenderingDeviceDriverVulkan::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_MULTIVIEW: - return multiview_capabilities.is_supported && multiview_capabilities.max_view_count > 1; case SUPPORTS_FSR_HALF_FLOAT: return shader_capabilities.shader_float16_is_supported && physical_device_features.shaderInt16 && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; - case SUPPORTS_ATTACHMENT_VRS: - return vrs_capabilities.attachment_vrs_supported && physical_device_features.shaderStorageImageExtendedFormats; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; case SUPPORTS_BUFFER_DEVICE_ADDRESS: @@ -5986,6 +6157,14 @@ const RDD::MultiviewCapabilities &RenderingDeviceDriverVulkan::get_multiview_cap return multiview_capabilities; } +const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverVulkan::get_fragment_shading_rate_capabilities() { + return fsr_capabilities; +} + +const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverVulkan::get_fragment_density_map_capabilities() { + return fdm_capabilities; +} + String RenderingDeviceDriverVulkan::get_api_name() const { return "Vulkan"; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index ea12450a4cd..d36ad7984d0 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -54,6 +54,9 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { struct CommandQueue; struct SwapChain; + struct CommandBufferInfo; + struct RenderPassInfo; + struct Framebuffer; struct Queue { VkQueue queue = VK_NULL_HANDLE; @@ -76,18 +79,6 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { String supported_operations_desc() const; }; - struct VRSCapabilities { - bool pipeline_vrs_supported = false; // We can specify our fragment rate on a pipeline level. - bool primitive_vrs_supported = false; // We can specify our fragment rate on each drawcall. - bool attachment_vrs_supported = false; // We can provide a density map attachment on our framebuffer. - - Size2i min_texel_size; - Size2i max_texel_size; - Size2i max_fragment_size; - - Size2i texel_size; // The texel size we'll use - }; - struct ShaderCapabilities { bool shader_float16_is_supported = false; bool shader_int8_is_supported = false; @@ -107,6 +98,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { PFN_vkAcquireNextImageKHR AcquireNextImageKHR = nullptr; PFN_vkQueuePresentKHR QueuePresentKHR = nullptr; PFN_vkCreateRenderPass2KHR CreateRenderPass2KHR = nullptr; + PFN_vkCmdEndRenderPass2KHR EndRenderPass2KHR = nullptr; // Debug marker extensions. PFN_vkCmdDebugMarkerBeginEXT CmdDebugMarkerBeginEXT = nullptr; @@ -135,7 +127,8 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { RDD::Capabilities device_capabilities; SubgroupCapabilities subgroup_capabilities; MultiviewCapabilities multiview_capabilities; - VRSCapabilities vrs_capabilities; + FragmentShadingRateCapabilities fsr_capabilities; + FragmentDensityMapCapabilities fdm_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; bool buffer_device_address_support = false; @@ -155,6 +148,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { Error _initialize_device_extensions(); Error _check_device_features(); Error _check_device_capabilities(); + void _choose_vrs_capabilities(); Error _add_queue_create_info(LocalVector &r_queue_create_info); Error _initialize_device(const LocalVector &p_queue_create_info); Error _initialize_allocator(); @@ -332,6 +326,7 @@ private: struct CommandPool { VkCommandPool vk_command_pool = VK_NULL_HANDLE; CommandBufferType buffer_type = COMMAND_BUFFER_TYPE_PRIMARY; + LocalVector command_buffers_created; }; public: @@ -339,8 +334,16 @@ public: virtual bool command_pool_reset(CommandPoolID p_cmd_pool) override final; virtual void command_pool_free(CommandPoolID p_cmd_pool) override final; +private: // ----- BUFFER ----- + struct CommandBufferInfo { + VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE; + Framebuffer *active_framebuffer = nullptr; + RenderPassInfo *active_render_pass = nullptr; + }; + +public: virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override final; virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) override final; virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) override final; @@ -382,6 +385,7 @@ public: virtual void swap_chain_set_max_fps(SwapChainID p_swap_chain, int p_max_fps) override final; virtual void swap_chain_free(SwapChainID p_swap_chain) override final; +private: /*********************/ /**** FRAMEBUFFER ****/ /*********************/ @@ -389,12 +393,16 @@ public: struct Framebuffer { VkFramebuffer vk_framebuffer = VK_NULL_HANDLE; + // Only filled in if the framebuffer uses a fragment density map with offsets. Unused otherwise. + uint32_t fragment_density_map_offsets_layers = 0; + // Only filled in by a framebuffer created by a swap chain. Unused otherwise. VkImage swap_chain_image = VK_NULL_HANDLE; VkImageSubresourceRange swap_chain_image_subresource_range = {}; bool swap_chain_acquired = false; }; +public: virtual FramebufferID framebuffer_create(RenderPassID p_render_pass, VectorView p_attachments, uint32_t p_width, uint32_t p_height) override final; virtual void framebuffer_free(FramebufferID p_framebuffer) override final; @@ -572,9 +580,16 @@ public: /**** RENDERING ****/ /*******************/ +private: // ----- SUBPASS ----- - virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) override final; + struct RenderPassInfo { + VkRenderPass vk_render_pass = VK_NULL_HANDLE; + bool uses_fragment_density_map_offsets = false; + }; + +public: + virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) override final; virtual void render_pass_free(RenderPassID p_render_pass) override final; // ----- COMMANDS ----- @@ -692,6 +707,8 @@ public: virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final; + virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final; virtual String get_api_name() const override final; virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; @@ -709,7 +726,9 @@ private: TextureInfo, VertexFormatInfo, ShaderInfo, - UniformSetInfo>; + UniformSetInfo, + RenderPassInfo, + CommandBufferInfo>; PagedAllocator resources_allocator; /******************/ diff --git a/servers/rendering/renderer_rd/effects/vrs.cpp b/servers/rendering/renderer_rd/effects/vrs.cpp index 9cc22f6f5ef..cbeefdd4d18 100644 --- a/servers/rendering/renderer_rd/effects/vrs.cpp +++ b/servers/rendering/renderer_rd/effects/vrs.cpp @@ -44,6 +44,8 @@ VRS::VRS() { Vector vrs_modes; vrs_modes.push_back("\n"); // VRS_DEFAULT vrs_modes.push_back("\n#define USE_MULTIVIEW\n"); // VRS_MULTIVIEW + vrs_modes.push_back("\n#define SPLIT_RG\n"); // VRS_RG + vrs_modes.push_back("\n#define SPLIT_RG\n#define USE_MULTIVIEW\n"); // VRS_RG_MULTIVIEW vrs_shader.shader.initialize(vrs_modes); @@ -80,14 +82,16 @@ void VRS::copy_vrs(RID p_source_rd_texture, RID p_dest_framebuffer, bool p_multi RD::Uniform u_source_rd_texture(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_source_rd_texture })); + int mode = 0; VRSPushConstant push_constant = {}; - - int mode = p_multiview ? VRS_MULTIVIEW : VRS_DEFAULT; - - // Set maximum texel factor based on maximum fragment size, some GPUs do not support 8x8 (fragment shading rate approach). - if (MIN(RD::get_singleton()->limit_get(RD::LIMIT_VRS_MAX_FRAGMENT_WIDTH), RD::get_singleton()->limit_get(RD::LIMIT_VRS_MAX_FRAGMENT_HEIGHT)) > 4) { - push_constant.max_texel_factor = 3.0; + bool uses_rg_format = RD::get_singleton()->vrs_get_format() == RD::DATA_FORMAT_R8G8_UNORM; + if (uses_rg_format) { + mode = p_multiview ? VRS_RG_MULTIVIEW : VRS_RG; } else { + mode = p_multiview ? VRS_MULTIVIEW : VRS_DEFAULT; + + // Default to 4x4 as it's not possible to query the max fragment size from RenderingDevice. This can be improved to use the largest size + // available if this code is moved over to RenderingDevice at some point. push_constant.max_texel_factor = 2.0; } @@ -103,18 +107,8 @@ void VRS::copy_vrs(RID p_source_rd_texture, RID p_dest_framebuffer, bool p_multi } Size2i VRS::get_vrs_texture_size(const Size2i p_base_size) const { - int32_t texel_width = RD::get_singleton()->limit_get(RD::LIMIT_VRS_TEXEL_WIDTH); - int32_t texel_height = RD::get_singleton()->limit_get(RD::LIMIT_VRS_TEXEL_HEIGHT); - - int width = p_base_size.x / texel_width; - if (p_base_size.x % texel_width != 0) { - width++; - } - int height = p_base_size.y / texel_height; - if (p_base_size.y % texel_height != 0) { - height++; - } - return Size2i(width, height); + Size2i vrs_texel_size = RD::get_singleton()->vrs_get_texel_size(); + return Size2i((p_base_size.x + vrs_texel_size.x - 1) / vrs_texel_size.x, (p_base_size.y + vrs_texel_size.y - 1) / vrs_texel_size.y); } void VRS::update_vrs_texture(RID p_vrs_fb, RID p_render_target) { diff --git a/servers/rendering/renderer_rd/effects/vrs.h b/servers/rendering/renderer_rd/effects/vrs.h index bac86898f73..0c13e0a44b8 100644 --- a/servers/rendering/renderer_rd/effects/vrs.h +++ b/servers/rendering/renderer_rd/effects/vrs.h @@ -41,6 +41,8 @@ private: enum VRSMode { VRS_DEFAULT, VRS_MULTIVIEW, + VRS_RG, + VRS_RG_MULTIVIEW, VRS_MAX, }; diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index 7770013bcd8..eba29e096bd 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -167,7 +167,6 @@ RID RenderForwardClustered::RenderBufferDataForwardClustered::get_color_only_fb( if (render_buffers->has_texture(RB_SCOPE_VRS, RB_TEXTURE)) { RID vrs_texture = render_buffers->get_texture(RB_SCOPE_VRS, RB_TEXTURE); - return FramebufferCacheRD::get_singleton()->get_cache_multiview(render_buffers->get_view_count(), color, depth, vrs_texture); } else { return FramebufferCacheRD::get_singleton()->get_cache_multiview(render_buffers->get_view_count(), color, depth); @@ -197,7 +196,6 @@ RID RenderForwardClustered::RenderBufferDataForwardClustered::get_color_pass_fb( if (render_buffers->has_texture(RB_SCOPE_VRS, RB_TEXTURE)) { RID vrs_texture = render_buffers->get_texture(RB_SCOPE_VRS, RB_TEXTURE); - return FramebufferCacheRD::get_singleton()->get_cache_multiview(v_count, color, specular, velocity_buffer, depth, vrs_texture); } else { return FramebufferCacheRD::get_singleton()->get_cache_multiview(v_count, color, specular, velocity_buffer, depth); diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index a4b241c26ae..bcba8f58d0f 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -208,9 +208,6 @@ RID RenderForwardMobile::RenderBufferDataForwardMobile::get_color_fbs(Framebuffe RD::FramebufferPass pass; pass.color_attachments.push_back(0); pass.depth_attachment = 1; - if (vrs_texture.is_valid()) { - pass.vrs_attachment = 2; - } if (use_msaa) { // Add resolve @@ -231,9 +228,6 @@ RID RenderForwardMobile::RenderBufferDataForwardMobile::get_color_fbs(Framebuffe RD::FramebufferPass pass; pass.color_attachments.push_back(0); pass.depth_attachment = 1; - if (vrs_texture.is_valid()) { - pass.vrs_attachment = 2; - } if (use_msaa) { // add resolve @@ -2905,6 +2899,7 @@ static RD::FramebufferFormatID _get_color_framebuffer_format_for_pipeline(RD::Da attachments.push_back(attachment); if (p_vrs) { + // VRS attachment. attachment.samples = RD::TEXTURE_SAMPLES_1; attachment.format = RenderSceneBuffersRD::get_vrs_format(); attachment.usage_flags = RenderSceneBuffersRD::get_vrs_usage_bits(); @@ -2926,10 +2921,6 @@ static RD::FramebufferFormatID _get_color_framebuffer_format_for_pipeline(RD::Da pass.color_attachments.push_back(0); pass.depth_attachment = 1; - if (p_vrs) { - pass.vrs_attachment = 2; - } - if (multisampling) { pass.resolve_attachments.push_back(attachments.size() - 1); } @@ -2955,7 +2946,8 @@ static RD::FramebufferFormatID _get_color_framebuffer_format_for_pipeline(RD::Da passes.push_back(blit_pass); } - return RD::get_singleton()->framebuffer_format_create_multipass(attachments, passes, p_view_count); + int32_t vrs_attachment = p_vrs ? 2 : -1; + return RD::get_singleton()->framebuffer_format_create_multipass(attachments, passes, p_view_count, vrs_attachment); } static RD::FramebufferFormatID _get_reflection_probe_color_framebuffer_format_for_pipeline() { diff --git a/servers/rendering/renderer_rd/framebuffer_cache_rd.h b/servers/rendering/renderer_rd/framebuffer_cache_rd.h index abb2a5808de..e245e3bd0d0 100644 --- a/servers/rendering/renderer_rd/framebuffer_cache_rd.h +++ b/servers/rendering/renderer_rd/framebuffer_cache_rd.h @@ -59,7 +59,6 @@ class FramebufferCacheRD : public Object { static _FORCE_INLINE_ uint32_t _hash_pass(const RD::FramebufferPass &p, uint32_t h) { h = hash_murmur3_one_32(p.depth_attachment, h); - h = hash_murmur3_one_32(p.vrs_attachment, h); h = hash_murmur3_one_32(p.color_attachments.size(), h); for (int i = 0; i < p.color_attachments.size(); i++) { @@ -84,10 +83,6 @@ class FramebufferCacheRD : public Object { return false; } - if (a.vrs_attachment != b.vrs_attachment) { - return false; - } - if (a.color_attachments.size() != b.color_attachments.size()) { return false; } diff --git a/servers/rendering/renderer_rd/shaders/effects/vrs.glsl b/servers/rendering/renderer_rd/shaders/effects/vrs.glsl index b4fcaa46734..1d3463dd2bf 100644 --- a/servers/rendering/renderer_rd/shaders/effects/vrs.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/vrs.glsl @@ -59,7 +59,11 @@ layout(location = 0) in vec2 uv_interp; layout(set = 0, binding = 0) uniform sampler2D source_color; #endif /* USE_MULTIVIEW */ +#ifdef SPLIT_RG +layout(location = 0) out vec2 frag_color; +#else layout(location = 0) out uint frag_color; +#endif layout(push_constant, std430) uniform Params { float max_texel_factor; @@ -79,6 +83,10 @@ void main() { // Input is standardized. R for X, G for Y, 0.0 (0) = 1, 0.33 (85) = 2, 0.66 (170) = 3, 1.0 (255) = 8 vec4 color = textureLod(source_color, uv, 0.0); +#ifdef SPLIT_RG + // Density map for VRS according to VK_EXT_fragment_density_map, we can use as is. + frag_color = max(vec2(1.0f) - color.rg, vec2(1.0f / 255.0f)); +#else // Output image shading rate image for VRS according to VK_KHR_fragment_shading_rate. color.r = clamp(floor(color.r * params.max_texel_factor + 0.1), 0.0, params.max_texel_factor); color.g = clamp(floor(color.g * params.max_texel_factor + 0.1), 0.0, params.max_texel_factor); @@ -94,4 +102,5 @@ void main() { // Encode to frag_color; frag_color = int(color.r + 0.1) << 2; frag_color += int(color.g + 0.1); +#endif } diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp index 26dec52d24f..a286af91db3 100644 --- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp +++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp @@ -756,9 +756,9 @@ uint32_t RenderSceneBuffersRD::get_velocity_usage_bits(bool p_resolve, bool p_ms } RD::DataFormat RenderSceneBuffersRD::get_vrs_format() { - return RD::DATA_FORMAT_R8_UINT; + return RD::get_singleton()->vrs_get_format(); } uint32_t RenderSceneBuffersRD::get_vrs_usage_bits() { - return RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + return RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT; } diff --git a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp index 6808ebb0637..d041df236f2 100644 --- a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp @@ -482,17 +482,15 @@ TextureStorage::TextureStorage() { } } - { //create default VRS - + { + // Create default VRS texture. + bool vrs_supported = RD::get_singleton()->has_feature(RD::SUPPORTS_ATTACHMENT_VRS); RD::TextureFormat tformat; - tformat.format = RD::DATA_FORMAT_R8_UINT; + tformat.format = vrs_supported ? RD::get_singleton()->vrs_get_format() : RD::DATA_FORMAT_R8_UINT; tformat.width = 4; tformat.height = 4; - tformat.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT; + tformat.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | (vrs_supported ? RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT : 0); tformat.texture_type = RD::TEXTURE_TYPE_2D; - if (!RD::get_singleton()->has_feature(RD::SUPPORTS_ATTACHMENT_VRS)) { - tformat.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; - } Vector pv; pv.resize(4 * 4); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 65fd680c44e..6d6d710b499 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -951,22 +951,38 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture ERR_FAIL_COND_V_MSG(required_mipmaps < format.mipmaps, RID(), "Too many mipmaps requested for texture format and dimensions (" + itos(format.mipmaps) + "), maximum allowed: (" + itos(required_mipmaps) + ")."); - uint32_t forced_usage_bits = 0; - if (p_data.size()) { - ERR_FAIL_COND_V_MSG(p_data.size() != (int)format.array_layers, RID(), - "Default supplied data for image format is of invalid length (" + itos(p_data.size()) + "), should be (" + itos(format.array_layers) + ")."); + Vector> data = p_data; + bool immediate_flush = false; + + // If this is a VRS texture, we make sure that it is created with valid initial data. This prevents a crash on Qualcomm Snapdragon XR2 Gen 1 + // (used in Quest 2, Quest Pro, Pico 4, HTC Vive XR Elite and others) where the driver will read the texture before we've had time to finish updating it. + if (data.is_empty() && (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT)) { + immediate_flush = true; + for (uint32_t i = 0; i < format.array_layers; i++) { + uint32_t required_size = get_image_format_required_size(format.format, format.width, format.height, format.depth, format.mipmaps); + Vector layer; + layer.resize(required_size); + layer.fill(255); + data.push_back(layer); + } + } + + uint32_t forced_usage_bits = _texture_vrs_method_to_usage_bits(); + if (data.size()) { + ERR_FAIL_COND_V_MSG(data.size() != (int)format.array_layers, RID(), + "Default supplied data for image format is of invalid length (" + itos(data.size()) + "), should be (" + itos(format.array_layers) + ")."); for (uint32_t i = 0; i < format.array_layers; i++) { uint32_t required_size = get_image_format_required_size(format.format, format.width, format.height, format.depth, format.mipmaps); - ERR_FAIL_COND_V_MSG((uint32_t)p_data[i].size() != required_size, RID(), - "Data for slice index " + itos(i) + " (mapped to layer " + itos(i) + ") differs in size (supplied: " + itos(p_data[i].size()) + ") than what is required by the format (" + itos(required_size) + ")."); + ERR_FAIL_COND_V_MSG((uint32_t)data[i].size() != required_size, RID(), + "Data for slice index " + itos(i) + " (mapped to layer " + itos(i) + ") differs in size (supplied: " + itos(data[i].size()) + ") than what is required by the format (" + itos(required_size) + ")."); } ERR_FAIL_COND_V_MSG(format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, RID(), "Textures created as depth attachments can't be initialized with data directly. Use RenderingDevice::texture_update() instead."); if (!(format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT)) { - forced_usage_bits = TEXTURE_USAGE_CAN_UPDATE_BIT; + forced_usage_bits |= TEXTURE_USAGE_CAN_UPDATE_BIT; } } @@ -993,7 +1009,7 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as atomic storage image."); } if ((format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && !supported_usage.has_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT)) { - ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as VRS attachment."); + ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as variable shading rate attachment."); } } @@ -1035,7 +1051,7 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture texture.usage_flags = format.usage_bits & ~forced_usage_bits; texture.samples = format.samples; texture.allowed_shared_formats = format.shareable_formats; - texture.has_initial_data = !p_data.is_empty(); + texture.has_initial_data = !data.is_empty(); if ((format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) { texture.read_aspect_flags.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); @@ -1051,8 +1067,8 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture texture.bound = false; // Textures are only assumed to be immutable if they have initial data and none of the other bits that indicate write usage are enabled. - bool texture_mutable_by_default = texture.usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_STORAGE_ATOMIC_BIT | TEXTURE_USAGE_VRS_ATTACHMENT_BIT); - if (p_data.is_empty() || texture_mutable_by_default) { + bool texture_mutable_by_default = texture.usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_STORAGE_ATOMIC_BIT); + if (data.is_empty() || texture_mutable_by_default) { _texture_make_mutable(&texture, RID()); } @@ -1063,9 +1079,9 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture set_resource_name(id, "RID:" + itos(id.get_id())); #endif - if (p_data.size()) { + if (data.size()) { for (uint32_t i = 0; i < p_format.array_layers; i++) { - _texture_initialize(id, i, p_data[i]); + _texture_initialize(id, i, data[i], immediate_flush); } if (texture.draw_tracker != nullptr) { @@ -1399,7 +1415,7 @@ uint32_t RenderingDevice::_texture_alignment(Texture *p_texture) const { return STEPIFY(alignment, driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT)); } -Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, const Vector &p_data) { +Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_immediate_flush) { Texture *texture = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL_V(texture, ERR_INVALID_PARAMETER); @@ -1531,6 +1547,12 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons transfer_worker->texture_barriers.push_back(tb); } + if (p_immediate_flush) { + _end_transfer_worker(transfer_worker); + _submit_transfer_worker(transfer_worker); + _wait_for_transfer_worker(transfer_worker); + } + _release_transfer_worker(transfer_worker); } } @@ -1863,6 +1885,17 @@ void RenderingDevice::_texture_create_reinterpret_buffer(Texture *p_texture) { p_texture->shared_fallback->buffer_tracker = tracker; } +uint32_t RenderingDevice::_texture_vrs_method_to_usage_bits() const { + switch (vrs_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + return RDD::TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + return RDD::TEXTURE_USAGE_VRS_FRAGMENT_DENSITY_MAP_BIT; + default: + return 0; + } +} + Vector RenderingDevice::_texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d) { uint32_t width, height, depth; uint32_t tight_mip_size = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, tex->mipmaps, &width, &height, &depth); @@ -2424,7 +2457,7 @@ bool RenderingDevice::texture_is_format_supported_for_usage(DataFormat p_format, /**** FRAMEBUFFER ****/ /*********************/ -RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_driver, const Vector &p_attachments, const Vector &p_passes, VectorView p_load_ops, VectorView p_store_ops, uint32_t p_view_count, Vector *r_samples) { +RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_driver, const Vector &p_attachments, const Vector &p_passes, VectorView p_load_ops, VectorView p_store_ops, uint32_t p_view_count, VRSMethod p_vrs_method, int32_t p_vrs_attachment, Size2i p_vrs_texel_size, Vector *r_samples) { // NOTE: // Before the refactor to RenderingDevice-RenderingDeviceDriver, there was commented out code to // specify dependencies to external subpasses. Since it had been unused for a long timel it wasn't ported @@ -2464,15 +2497,14 @@ RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_ // We can setup a framebuffer where we write to our VRS texture to set it up. // We make the assumption here that if our texture is actually used as our VRS attachment. // It is used as such for each subpass. This is fairly certain seeing the restrictions on subpasses. - bool is_vrs = (p_attachments[i].usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && i == p_passes[0].vrs_attachment; - + bool is_vrs = (p_attachments[i].usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && i == p_vrs_attachment; if (is_vrs) { description.load_op = RDD::ATTACHMENT_LOAD_OP_LOAD; description.store_op = RDD::ATTACHMENT_STORE_OP_DONT_CARE; - description.stencil_load_op = RDD::ATTACHMENT_LOAD_OP_LOAD; + description.stencil_load_op = RDD::ATTACHMENT_LOAD_OP_DONT_CARE; description.stencil_store_op = RDD::ATTACHMENT_STORE_OP_DONT_CARE; - description.initial_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - description.final_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + description.initial_layout = _vrs_layout_from_method(p_vrs_method); + description.final_layout = _vrs_layout_from_method(p_vrs_method); } else { if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.load_op = p_load_ops[i]; @@ -2605,14 +2637,15 @@ RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_ subpass.depth_stencil_reference.layout = RDD::TEXTURE_LAYOUT_UNDEFINED; } - if (pass->vrs_attachment != ATTACHMENT_UNUSED) { - int32_t attachment = pass->vrs_attachment; + if (p_vrs_method == VRS_METHOD_FRAGMENT_SHADING_RATE && p_vrs_attachment >= 0) { + int32_t attachment = p_vrs_attachment; ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), RDD::RenderPassID(), "Invalid framebuffer VRS format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), VRS attachment."); ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT), RDD::RenderPassID(), "Invalid framebuffer VRS format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as VRS, but it's not a VRS attachment."); ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, RDD::RenderPassID(), "Invalid framebuffer VRS attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass."); - subpass.vrs_reference.attachment = attachment_remap[attachment]; - subpass.vrs_reference.layout = RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL; + subpass.fragment_shading_rate_reference.attachment = attachment_remap[attachment]; + subpass.fragment_shading_rate_reference.layout = RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL; + subpass.fragment_shading_rate_texel_size = p_vrs_texel_size; attachment_last_pass[attachment] = i; } @@ -2647,7 +2680,13 @@ RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_ } } - RDD::RenderPassID render_pass = p_driver->render_pass_create(attachments, subpasses, subpass_dependencies, p_view_count); + RDD::AttachmentReference fragment_density_map_attachment_reference; + if (p_vrs_method == VRS_METHOD_FRAGMENT_DENSITY_MAP && p_vrs_attachment >= 0) { + fragment_density_map_attachment_reference.attachment = p_vrs_attachment; + fragment_density_map_attachment_reference.layout = RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL; + } + + RDD::RenderPassID render_pass = p_driver->render_pass_create(attachments, subpasses, subpass_dependencies, p_view_count, fragment_density_map_attachment_reference); ERR_FAIL_COND_V(!render_pass, RDD::RenderPassID()); return render_pass; @@ -2661,10 +2700,74 @@ RDD::RenderPassID RenderingDevice::_render_pass_create_from_graph(RenderingDevic // resolving the dependencies between commands. This function creates a render pass for the framebuffer accordingly. Framebuffer *framebuffer = (Framebuffer *)(p_user_data); const FramebufferFormatKey &key = framebuffer->rendering_device->framebuffer_formats[framebuffer->format_id].E->key(); - return _render_pass_create(p_driver, key.attachments, key.passes, p_load_ops, p_store_ops, framebuffer->view_count); + return _render_pass_create(p_driver, key.attachments, key.passes, p_load_ops, p_store_ops, framebuffer->view_count, key.vrs_method, key.vrs_attachment, key.vrs_texel_size); } -RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create(const Vector &p_format, uint32_t p_view_count) { +RDG::ResourceUsage RenderingDevice::_vrs_usage_from_method(VRSMethod p_method) { + switch (p_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + return RDG::RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + return RDG::RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ; + default: + return RDG::RESOURCE_USAGE_NONE; + } +} + +RDD::PipelineStageBits RenderingDevice::_vrs_stages_from_method(VRSMethod p_method) { + switch (p_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + return RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + return RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT; + default: + return RDD::PipelineStageBits(0); + } +} + +RDD::TextureLayout RenderingDevice::_vrs_layout_from_method(VRSMethod p_method) { + switch (p_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + return RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + return RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL; + default: + return RDD::TEXTURE_LAYOUT_UNDEFINED; + } +} + +void RenderingDevice::_vrs_detect_method() { + const RDD::FragmentShadingRateCapabilities &fsr_capabilities = driver->get_fragment_shading_rate_capabilities(); + const RDD::FragmentDensityMapCapabilities &fdm_capabilities = driver->get_fragment_density_map_capabilities(); + if (fsr_capabilities.attachment_supported) { + vrs_method = VRS_METHOD_FRAGMENT_SHADING_RATE; + } else if (fdm_capabilities.attachment_supported) { + vrs_method = VRS_METHOD_FRAGMENT_DENSITY_MAP; + } + + switch (vrs_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + vrs_format = DATA_FORMAT_R8_UINT; + vrs_texel_size = Vector2i(16, 16).clamp(fsr_capabilities.min_texel_size, fsr_capabilities.max_texel_size); + break; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + vrs_format = DATA_FORMAT_R8G8_UNORM; + vrs_texel_size = Vector2i(32, 32).clamp(fdm_capabilities.min_texel_size, fdm_capabilities.max_texel_size); + break; + default: + break; + } +} + +RD::DataFormat RenderingDevice::vrs_get_format() const { + return vrs_format; +} + +Size2i RenderingDevice::vrs_get_texel_size() const { + return vrs_texel_size; +} + +RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create(const Vector &p_format, uint32_t p_view_count, int32_t p_fragment_density_map_attachment) { FramebufferPass pass; for (int i = 0; i < p_format.size(); i++) { if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { @@ -2676,16 +2779,19 @@ RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create( Vector passes; passes.push_back(pass); - return framebuffer_format_create_multipass(p_format, passes, p_view_count); + return framebuffer_format_create_multipass(p_format, passes, p_view_count, p_fragment_density_map_attachment); } -RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_multipass(const Vector &p_attachments, const Vector &p_passes, uint32_t p_view_count) { +RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_multipass(const Vector &p_attachments, const Vector &p_passes, uint32_t p_view_count, int32_t p_vrs_attachment) { _THREAD_SAFE_METHOD_ FramebufferFormatKey key; key.attachments = p_attachments; key.passes = p_passes; key.view_count = p_view_count; + key.vrs_method = vrs_method; + key.vrs_attachment = p_vrs_attachment; + key.vrs_texel_size = vrs_texel_size; const RBMap::Element *E = framebuffer_format_cache.find(key); if (E) { @@ -2701,7 +2807,7 @@ RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_ store_ops.push_back(RDD::ATTACHMENT_STORE_OP_STORE); } - RDD::RenderPassID render_pass = _render_pass_create(driver, p_attachments, p_passes, load_ops, store_ops, p_view_count, &samples); // Actions don't matter for this use case. + RDD::RenderPassID render_pass = _render_pass_create(driver, p_attachments, p_passes, load_ops, store_ops, p_view_count, vrs_method, p_vrs_attachment, vrs_texel_size, &samples); // Actions don't matter for this use case. if (!render_pass) { // Was likely invalid. return INVALID_ID; } @@ -2741,7 +2847,7 @@ RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_ LocalVector subpass; subpass.resize(1); - RDD::RenderPassID render_pass = driver->render_pass_create({}, subpass, {}, 1); + RDD::RenderPassID render_pass = driver->render_pass_create({}, subpass, {}, 1, RDD::AttachmentReference()); ERR_FAIL_COND_V(!render_pass, FramebufferFormatID()); FramebufferFormatID id = FramebufferFormatID(framebuffer_format_cache.size()) | (FramebufferFormatID(ID_TYPE_FRAMEBUFFER_FORMAT) << FramebufferFormatID(ID_BASE_SHIFT)); @@ -2812,8 +2918,6 @@ RID RenderingDevice::framebuffer_create(const Vector &p_texture_attachments if (texture && texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { pass.depth_attachment = i; - } else if (texture && texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) { - pass.vrs_attachment = i; } else { if (texture && texture->is_resolve_buffer) { pass.resolve_attachments.push_back(i); @@ -2835,6 +2939,7 @@ RID RenderingDevice::framebuffer_create_multipass(const Vector &p_texture_a Vector attachments; LocalVector textures; LocalVector trackers; + int32_t vrs_attachment = -1; attachments.resize(p_texture_attachments.size()); Size2i size; bool size_set = false; @@ -2849,6 +2954,11 @@ RID RenderingDevice::framebuffer_create_multipass(const Vector &p_texture_a _check_transfer_worker_texture(texture); + if (i != 0 && texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) { + // Detect if the texture is the fragment density map and it's not the first attachment. + vrs_attachment = i; + } + if (!size_set) { size.width = texture->width; size.height = texture->height; @@ -2876,7 +2986,7 @@ RID RenderingDevice::framebuffer_create_multipass(const Vector &p_texture_a ERR_FAIL_COND_V_MSG(!size_set, RID(), "All attachments unused."); - FramebufferFormatID format_id = framebuffer_format_create_multipass(attachments, p_passes, p_view_count); + FramebufferFormatID format_id = framebuffer_format_create_multipass(attachments, p_passes, p_view_count, vrs_attachment); if (format_id == INVALID_ID) { return RID(); } @@ -4259,7 +4369,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin_for_screen(DisplayS clear_value.color = p_clear_color; RDD::RenderPassID render_pass = driver->swap_chain_get_render_pass(sc_it->value); - draw_graph.add_draw_list_begin(render_pass, fb_it->value, viewport, RDG::ATTACHMENT_OPERATION_CLEAR, clear_value, true, false, RDD::BreadcrumbMarker::BLIT_PASS, split_swapchain_into_its_own_cmd_buffer); + draw_graph.add_draw_list_begin(render_pass, fb_it->value, viewport, RDG::ATTACHMENT_OPERATION_CLEAR, clear_value, RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, RDD::BreadcrumbMarker::BLIT_PASS, split_swapchain_into_its_own_cmd_buffer); draw_graph.add_draw_list_set_viewport(viewport); draw_graph.add_draw_list_set_scissor(viewport); @@ -4275,6 +4385,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer); ERR_FAIL_NULL_V(framebuffer, INVALID_ID); + const FramebufferFormatKey &framebuffer_key = framebuffer_formats[framebuffer->format_id].E->key(); Point2i viewport_offset; Point2i viewport_size = framebuffer->size; @@ -4295,12 +4406,12 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, thread_local LocalVector clear_values; thread_local LocalVector resource_trackers; thread_local LocalVector resource_usages; - bool uses_color = false; - bool uses_depth = false; + BitField stages; operations.resize(framebuffer->texture_ids.size()); clear_values.resize(framebuffer->texture_ids.size()); resource_trackers.clear(); resource_usages.clear(); + stages.clear(); uint32_t color_index = 0; for (int i = 0; i < framebuffer->texture_ids.size(); i++) { @@ -4317,7 +4428,11 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, RDG::AttachmentOperation operation = RDG::ATTACHMENT_OPERATION_DEFAULT; RDD::RenderPassClearValue clear_value; - if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + if (framebuffer_key.vrs_attachment == i && (texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT)) { + resource_trackers.push_back(texture->draw_tracker); + resource_usages.push_back(_vrs_usage_from_method(framebuffer_key.vrs_method)); + stages.set_flag(_vrs_stages_from_method(framebuffer_key.vrs_method)); + } else if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { if (p_draw_flags.has_flag(DrawFlags(DRAW_CLEAR_COLOR_0 << color_index))) { ERR_FAIL_COND_V_MSG(color_index >= p_clear_color_values.size(), INVALID_ID, vformat("Color texture (%d) was specified to be cleared but no color value was provided.", color_index)); operation = RDG::ATTACHMENT_OPERATION_CLEAR; @@ -4328,7 +4443,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, resource_trackers.push_back(texture->draw_tracker); resource_usages.push_back(RDG::RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE); - uses_color = true; + stages.set_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); color_index++; } else if (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { if (p_draw_flags.has_flag(DRAW_CLEAR_DEPTH) || p_draw_flags.has_flag(DRAW_CLEAR_STENCIL)) { @@ -4341,14 +4456,15 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, resource_trackers.push_back(texture->draw_tracker); resource_usages.push_back(RDG::RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE); - uses_depth = true; + stages.set_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT); + stages.set_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); } operations[i] = operation; clear_values[i] = clear_value; } - draw_graph.add_draw_list_begin(framebuffer->framebuffer_cache, Rect2i(viewport_offset, viewport_size), operations, clear_values, uses_color, uses_depth, p_breadcrumb); + draw_graph.add_draw_list_begin(framebuffer->framebuffer_cache, Rect2i(viewport_offset, viewport_size), operations, clear_values, stages, p_breadcrumb); draw_graph.add_draw_list_usages(resource_trackers, resource_usages); // Mark textures as bound. @@ -4369,9 +4485,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, draw_list_framebuffer_format = framebuffer->format_id; #endif draw_list_current_subpass = 0; - - const FramebufferFormatKey &key = framebuffer_formats[framebuffer->format_id].E->key(); - draw_list_subpass_count = key.passes.size(); + draw_list_subpass_count = framebuffer_key.passes.size(); Rect2i viewport_rect(viewport_offset, viewport_size); draw_graph.add_draw_list_set_viewport(viewport_rect); @@ -6832,6 +6946,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ } } + // Find the best method available for VRS on the current hardware. + _vrs_detect_method(); + return OK; } @@ -7259,7 +7376,20 @@ RenderingDevice *RenderingDevice::create_local_device() { } bool RenderingDevice::has_feature(const Features p_feature) const { - return driver->has_feature(p_feature); + // Some features can be deduced from the capabilities without querying the driver and looking at the capabilities. + switch (p_feature) { + case SUPPORTS_MULTIVIEW: { + const RDD::MultiviewCapabilities &multiview_capabilities = driver->get_multiview_capabilities(); + return multiview_capabilities.is_supported && multiview_capabilities.max_view_count > 1; + } + case SUPPORTS_ATTACHMENT_VRS: { + const RDD::FragmentShadingRateCapabilities &fsr_capabilities = driver->get_fragment_shading_rate_capabilities(); + const RDD::FragmentDensityMapCapabilities &fdm_capabilities = driver->get_fragment_density_map_capabilities(); + return fsr_capabilities.attachment_supported || fdm_capabilities.attachment_supported; + } + default: + return driver->has_feature(p_feature); + } } void RenderingDevice::_bind_methods() { diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 6d480258e12..5d641702f1c 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -254,6 +254,8 @@ public: CALLBACK_RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE, CALLBACK_RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE, CALLBACK_RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE, + CALLBACK_RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ, + CALLBACK_RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ, CALLBACK_RESOURCE_USAGE_MAX }; @@ -359,12 +361,13 @@ public: Vector _texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d = false); uint32_t _texture_layer_count(Texture *p_texture) const; uint32_t _texture_alignment(Texture *p_texture) const; - Error _texture_initialize(RID p_texture, uint32_t p_layer, const Vector &p_data); + Error _texture_initialize(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_immediate_flush = false); void _texture_check_shared_fallback(Texture *p_texture); void _texture_update_shared_fallback(RID p_texture_rid, Texture *p_texture, bool p_for_writing); void _texture_free_shared_fallback(Texture *p_texture); void _texture_copy_shared(RID p_src_texture_rid, Texture *p_src_texture, RID p_dst_texture_rid, Texture *p_dst_texture); void _texture_create_reinterpret_buffer(Texture *p_texture); + uint32_t _texture_vrs_method_to_usage_bits() const; struct TextureGetDataRequest { uint32_t frame_local_index = 0; @@ -426,6 +429,30 @@ public: void texture_set_discardable(RID p_texture, bool p_discardable); bool texture_is_discardable(RID p_texture); +private: + /*************/ + /**** VRS ****/ + /*************/ + + enum VRSMethod { + VRS_METHOD_NONE, + VRS_METHOD_FRAGMENT_SHADING_RATE, + VRS_METHOD_FRAGMENT_DENSITY_MAP, + }; + + VRSMethod vrs_method = VRS_METHOD_NONE; + DataFormat vrs_format = DATA_FORMAT_MAX; + Size2i vrs_texel_size; + + static RDG::ResourceUsage _vrs_usage_from_method(VRSMethod p_method); + static RDD::PipelineStageBits _vrs_stages_from_method(VRSMethod p_method); + static RDD::TextureLayout _vrs_layout_from_method(VRSMethod p_method); + void _vrs_detect_method(); + +public: + DataFormat vrs_get_format() const; + Size2i vrs_get_texel_size() const; + /*********************/ /**** FRAMEBUFFER ****/ /*********************/ @@ -456,7 +483,6 @@ public: Vector resolve_attachments; Vector preserve_attachments; int32_t depth_attachment = ATTACHMENT_UNUSED; - int32_t vrs_attachment = ATTACHMENT_UNUSED; // density map for VRS, only used if supported }; typedef int64_t FramebufferFormatID; @@ -466,8 +492,23 @@ private: Vector attachments; Vector passes; uint32_t view_count = 1; + VRSMethod vrs_method = VRS_METHOD_NONE; + int32_t vrs_attachment = ATTACHMENT_UNUSED; + Size2i vrs_texel_size; bool operator<(const FramebufferFormatKey &p_key) const { + if (vrs_texel_size != p_key.vrs_texel_size) { + return vrs_texel_size < p_key.vrs_texel_size; + } + + if (vrs_attachment != p_key.vrs_attachment) { + return vrs_attachment < p_key.vrs_attachment; + } + + if (vrs_method != p_key.vrs_method) { + return vrs_method < p_key.vrs_method; + } + if (view_count != p_key.view_count) { return view_count < p_key.view_count; } @@ -572,7 +613,7 @@ private: } }; - static RDD::RenderPassID _render_pass_create(RenderingDeviceDriver *p_driver, const Vector &p_attachments, const Vector &p_passes, VectorView p_load_ops, VectorView p_store_ops, uint32_t p_view_count = 1, Vector *r_samples = nullptr); + static RDD::RenderPassID _render_pass_create(RenderingDeviceDriver *p_driver, const Vector &p_attachments, const Vector &p_passes, VectorView p_load_ops, VectorView p_store_ops, uint32_t p_view_count = 1, VRSMethod p_vrs_method = VRS_METHOD_NONE, int32_t p_vrs_attachment = -1, Size2i p_vrs_texel_size = Size2i(), Vector *r_samples = nullptr); static RDD::RenderPassID _render_pass_create_from_graph(RenderingDeviceDriver *p_driver, VectorView p_load_ops, VectorView p_store_ops, void *p_user_data); // This is a cache and it's never freed, it ensures @@ -603,8 +644,8 @@ private: public: // This ID is warranted to be unique for the same formats, does not need to be freed - FramebufferFormatID framebuffer_format_create(const Vector &p_format, uint32_t p_view_count = 1); - FramebufferFormatID framebuffer_format_create_multipass(const Vector &p_attachments, const Vector &p_passes, uint32_t p_view_count = 1); + FramebufferFormatID framebuffer_format_create(const Vector &p_format, uint32_t p_view_count = 1, int32_t p_vrs_attachment = -1); + FramebufferFormatID framebuffer_format_create_multipass(const Vector &p_attachments, const Vector &p_passes, uint32_t p_view_count = 1, int32_t p_vrs_attachment = -1); FramebufferFormatID framebuffer_format_create_empty(TextureSamples p_samples = TEXTURE_SAMPLES_1); TextureSamples framebuffer_format_get_texture_samples(FramebufferFormatID p_format, uint32_t p_pass = 0); diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 796e3c2b66c..7212edfb9be 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -391,6 +391,7 @@ public: // Try to set this bit as much as possible. If you set it, validation doesn't complain // and it works fine on mobile, then go ahead. TEXTURE_USAGE_TRANSIENT_BIT = (1 << 11), + TEXTURE_USAGE_MAX_BIT = TEXTURE_USAGE_TRANSIENT_BIT, }; struct TextureFormat { @@ -883,11 +884,7 @@ public: LIMIT_SUBGROUP_MAX_SIZE, LIMIT_SUBGROUP_IN_SHADERS, // Set flags using SHADER_STAGE_VERTEX_BIT, SHADER_STAGE_FRAGMENT_BIT, etc. LIMIT_SUBGROUP_OPERATIONS, - LIMIT_VRS_TEXEL_WIDTH, - LIMIT_VRS_TEXEL_HEIGHT, - LIMIT_VRS_MAX_FRAGMENT_WIDTH, - LIMIT_VRS_MAX_FRAGMENT_HEIGHT, - LIMIT_METALFX_TEMPORAL_SCALER_MIN_SCALE, + LIMIT_METALFX_TEMPORAL_SCALER_MIN_SCALE = 46, LIMIT_METALFX_TEMPORAL_SCALER_MAX_SCALE, LIMIT_MAX_SHADER_VARYINGS, }; diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 1f65cdcaa9a..0f95265694b 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -238,7 +238,8 @@ public: TEXTURE_LAYOUT_COPY_DST_OPTIMAL, TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL, TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL, - TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL, + TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL, + TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL, TEXTURE_LAYOUT_MAX }; @@ -249,6 +250,11 @@ public: TEXTURE_ASPECT_MAX }; + enum TextureUsageMethod { + TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT = TEXTURE_USAGE_MAX_BIT << 1, + TEXTURE_USAGE_VRS_FRAGMENT_DENSITY_MAP_BIT = TEXTURE_USAGE_MAX_BIT << 2, + }; + enum TextureAspectBits { TEXTURE_ASPECT_COLOR_BIT = (1 << TEXTURE_ASPECT_COLOR), TEXTURE_ASPECT_DEPTH_BIT = (1 << TEXTURE_ASPECT_DEPTH), @@ -335,6 +341,8 @@ public: PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), + PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT = (1 << 22), + PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT = (1 << 23), }; enum BarrierAccessBits { @@ -356,8 +364,9 @@ public: BARRIER_ACCESS_MEMORY_READ_BIT = (1 << 15), BARRIER_ACCESS_MEMORY_WRITE_BIT = (1 << 16), BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT = (1 << 23), - BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 24), - BARRIER_ACCESS_RESOLVE_WRITE_BIT = (1 << 25), + BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT = (1 << 24), + BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 25), + BARRIER_ACCESS_RESOLVE_WRITE_BIT = (1 << 26), BARRIER_ACCESS_STORAGE_CLEAR_BIT = (1 << 27), }; @@ -629,7 +638,8 @@ public: AttachmentReference depth_stencil_reference; LocalVector resolve_references; LocalVector preserve_attachments; - AttachmentReference vrs_reference; + AttachmentReference fragment_shading_rate_reference; + Size2i fragment_shading_rate_texel_size; }; struct SubpassDependency { @@ -641,7 +651,7 @@ public: BitField dst_access; }; - virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) = 0; + virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) = 0; virtual void render_pass_free(RenderPassID p_render_pass) = 0; // ----- COMMANDS ----- @@ -787,6 +797,26 @@ public: uint32_t max_instance_count = 0; }; + struct FragmentShadingRateCapabilities { + Size2i min_texel_size; + Size2i max_texel_size; + Size2i max_fragment_size; + bool pipeline_supported = false; + bool primitive_supported = false; + bool attachment_supported = false; + }; + + struct FragmentDensityMapCapabilities { + Size2i min_texel_size; + Size2i max_texel_size; + Size2i offset_granularity; + bool attachment_supported = false; + bool dynamic_attachment_supported = false; + bool non_subsampled_images_supported = false; + bool invocations_supported = false; + bool offset_supported = false; + }; + enum ApiTrait { API_TRAIT_HONORS_PIPELINE_BARRIERS, API_TRAIT_SHADER_CHANGE_INVALIDATION, @@ -828,6 +858,8 @@ public: virtual uint64_t api_trait_get(ApiTrait p_trait); virtual bool has_feature(Features p_feature) = 0; virtual const MultiviewCapabilities &get_multiview_capabilities() = 0; + virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() = 0; + virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() = 0; virtual String get_api_name() const = 0; virtual String get_api_version() const = 0; virtual String get_pipeline_cache_uuid() const = 0; diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index ae91e1b3e4a..26ce6cab188 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -98,6 +98,8 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_INDEX_BUFFER_READ: case RESOURCE_USAGE_TEXTURE_SAMPLE: case RESOURCE_USAGE_STORAGE_IMAGE_READ: + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ: + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ: return false; case RESOURCE_USAGE_COPY_TO: case RESOURCE_USAGE_RESOLVE_TO: @@ -132,6 +134,10 @@ RDD::TextureLayout RenderingDeviceGraph::_usage_to_image_layout(ResourceUsage p_ return RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: return RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ: + return RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL; + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ: + return RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL; case RESOURCE_USAGE_NONE: return RDD::TEXTURE_LAYOUT_UNDEFINED; default: @@ -176,6 +182,10 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT | RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ: + return RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT; + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ: + return RDD::BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT; default: DEV_ASSERT(false && "Invalid usage."); return RDD::BarrierAccessBits(0); @@ -918,7 +928,7 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command } } -void RenderingDeviceGraph::_add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb, bool p_split_cmd_buffer) { +void RenderingDeviceGraph::_add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb, bool p_split_cmd_buffer) { DEV_ASSERT(p_attachment_operations.size() == p_attachment_clear_values.size()); draw_instruction_list.clear(); @@ -927,6 +937,7 @@ void RenderingDeviceGraph::_add_draw_list_begin(FramebufferCache *p_framebuffer_ draw_instruction_list.render_pass = p_render_pass; draw_instruction_list.framebuffer = p_framebuffer; draw_instruction_list.region = p_region; + draw_instruction_list.stages = p_stages; draw_instruction_list.attachment_operations.resize(p_attachment_operations.size()); draw_instruction_list.attachment_clear_values.resize(p_attachment_clear_values.size()); @@ -935,15 +946,6 @@ void RenderingDeviceGraph::_add_draw_list_begin(FramebufferCache *p_framebuffer_ draw_instruction_list.attachment_clear_values[i] = p_attachment_clear_values[i]; } - if (p_uses_color) { - draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); - } - - if (p_uses_depth) { - draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT); - draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); - } - draw_instruction_list.split_cmd_buffer = p_split_cmd_buffer; #if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) @@ -1789,12 +1791,12 @@ void RenderingDeviceGraph::add_compute_list_end() { _add_command_to_graph(compute_instruction_list.command_trackers.ptr(), compute_instruction_list.command_tracker_usages.ptr(), compute_instruction_list.command_trackers.size(), command_index, command); } -void RenderingDeviceGraph::add_draw_list_begin(FramebufferCache *p_framebuffer_cache, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb, bool p_split_cmd_buffer) { - _add_draw_list_begin(p_framebuffer_cache, RDD::RenderPassID(), RDD::FramebufferID(), p_region, p_attachment_operations, p_attachment_clear_values, p_uses_color, p_uses_depth, p_breadcrumb, p_split_cmd_buffer); +void RenderingDeviceGraph::add_draw_list_begin(FramebufferCache *p_framebuffer_cache, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb, bool p_split_cmd_buffer) { + _add_draw_list_begin(p_framebuffer_cache, RDD::RenderPassID(), RDD::FramebufferID(), p_region, p_attachment_operations, p_attachment_clear_values, p_stages, p_breadcrumb, p_split_cmd_buffer); } -void RenderingDeviceGraph::add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb, bool p_split_cmd_buffer) { - _add_draw_list_begin(nullptr, p_render_pass, p_framebuffer, p_region, p_attachment_operations, p_attachment_clear_values, p_uses_color, p_uses_depth, p_breadcrumb, p_split_cmd_buffer); +void RenderingDeviceGraph::add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb, bool p_split_cmd_buffer) { + _add_draw_list_begin(nullptr, p_render_pass, p_framebuffer, p_region, p_attachment_operations, p_attachment_clear_values, p_stages, p_breadcrumb, p_split_cmd_buffer); } void RenderingDeviceGraph::add_draw_list_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint32_t p_offset) { diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 567893e6fe8..2fad8fa5d32 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -149,6 +149,8 @@ public: RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE, RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE, RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE, + RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ, + RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ, RESOURCE_USAGE_MAX }; @@ -752,7 +754,7 @@ private: void _run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _get_draw_list_render_pass_and_framebuffer(const RecordedDrawListCommand *p_draw_list_command, RDD::RenderPassID &r_render_pass, RDD::FramebufferID &r_framebuffer); void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); - void _add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb, bool p_split_cmd_buffer); + void _add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb, bool p_split_cmd_buffer); void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary); void _wait_for_secondary_command_buffer_tasks(); void _run_render_commands(int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, RDD::CommandBufferID &r_command_buffer, CommandBufferPool &r_command_buffer_pool, int32_t &r_current_label_index, int32_t &r_current_label_level); @@ -785,8 +787,8 @@ public: void add_compute_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage); void add_compute_list_usages(VectorView p_trackers, VectorView p_usages); void add_compute_list_end(); - void add_draw_list_begin(FramebufferCache *p_framebuffer_cache, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb = 0, bool p_split_cmd_buffer = false); - void add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb = 0, bool p_split_cmd_buffer = false); + void add_draw_list_begin(FramebufferCache *p_framebuffer_cache, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb = 0, bool p_split_cmd_buffer = false); + void add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb = 0, bool p_split_cmd_buffer = false); void add_draw_list_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint32_t p_offset); void add_draw_list_bind_pipeline(RDD::PipelineID p_pipeline, BitField p_pipeline_stage_bits); void add_draw_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); diff --git a/servers/xr/xr_vrs.cpp b/servers/xr/xr_vrs.cpp index 8edefda889c..ff4b09dfeff 100644 --- a/servers/xr/xr_vrs.cpp +++ b/servers/xr/xr_vrs.cpp @@ -104,13 +104,12 @@ void XRVRS::set_vrs_render_region(const Rect2i &p_vrs_render_region) { RID XRVRS::make_vrs_texture(const Size2 &p_target_size, const PackedVector2Array &p_eye_foci) { ERR_FAIL_COND_V(p_eye_foci.is_empty(), RID()); - int32_t texel_width = RD::get_singleton()->limit_get(RD::LIMIT_VRS_TEXEL_WIDTH); - int32_t texel_height = RD::get_singleton()->limit_get(RD::LIMIT_VRS_TEXEL_HEIGHT); + Size2i texel_size = RD::get_singleton()->vrs_get_texel_size(); // Should return sensible data or graphics API does not support VRS. - ERR_FAIL_COND_V(texel_width < 1 || texel_height < 1, RID()); + ERR_FAIL_COND_V(texel_size.x < 1 || texel_size.y < 1, RID()); - Size2 vrs_size = Size2(0.5 + p_target_size.x / texel_width, 0.5 + p_target_size.y / texel_height).round(); + Size2 vrs_size = Size2(0.5 + p_target_size.x / texel_size.x, 0.5 + p_target_size.y / texel_size.y).floor(); // Make sure we have at least one pixel. vrs_size = vrs_size.maxf(1.0); @@ -150,16 +149,18 @@ RID XRVRS::make_vrs_texture(const Size2 &p_target_size, const PackedVector2Array Vector2i view_center; view_center.x = int(vrs_size.x * (eye_foci[i].x + 1.0) * region_ratio.x * 0.5) + region_offset.x; - view_center.y = int(vrs_size.y * (eye_foci[i].y + 1.0) * region_ratio.y * 0.5) + region_offset.y; + view_center.y = int(vrs_size.y * (-eye_foci[i].y + 1.0) * region_ratio.y * 0.5) + region_offset.y; int d = 0; for (int y = 0; y < vrs_sizei.y; y++) { for (int x = 0; x < vrs_sizei.x; x++) { + // Generate a density map that represents the distance to the view focus point. While this leaves the opportunities + // offered by the density map being different in each direction currently unused, it was found to give better tile + // distribution on hardware that supports the feature natively. This area is open to improvements in the future. Vector2 offset = Vector2(x - view_center.x, y - view_center.y) / region_ratio; - real_t density = 255.0 * MAX(0.0, (Math::abs(offset.x) - min_radius) / outer_radius); - data_ptr[d++] = MIN(255, density); - density = 255.0 * MAX(0.0, (Math::abs(offset.y) - min_radius) / outer_radius); - data_ptr[d++] = MIN(255, density); + real_t density = MAX(offset.length() - min_radius, 0.0) / outer_radius; + data_ptr[d++] = CLAMP(255.0 * density, 0, 255); + data_ptr[d++] = CLAMP(255.0 * density, 0, 255); } } images.push_back(Image::create_from_data(vrs_sizei.x, vrs_sizei.y, false, Image::FORMAT_RG8, data));