diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 8378e266660..45137407a28 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -35,6 +35,8 @@ #include "thirdparty/misc/smolv.h" #include "vulkan_context.h" +#define PRINT_NATIVE_COMMANDS 0 + /*****************/ /**** GENERIC ****/ /*****************/ @@ -622,6 +624,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat & tex_info->allocation.handle = allocation; vmaGetAllocationInfo(allocator, tex_info->allocation.handle, &tex_info->allocation.info); +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(vk_image_view), uint64_t(vk_image))); +#endif + return TextureID(tex_info); } @@ -710,6 +716,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_or tex_info->vk_view_create_info = image_view_create_info; tex_info->allocation = {}; +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image))); +#endif + return TextureID(tex_info); } @@ -759,6 +769,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex tex_info->vk_view_create_info = image_view_create_info; tex_info->allocation = {}; +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX (%d %d %d %d)", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image), p_mipmap, p_mipmaps, p_layer, p_layers)); +#endif + return TextureID(tex_info); } @@ -1071,6 +1085,23 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vk_image_barriers[i].subresourceRange.layerCount = p_texture_barriers[i].subresources.layer_count; } +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCmdPipelineBarrier MEMORY %d BUFFER %d TEXTURE %d", p_memory_barriers.size(), p_buffer_barriers.size(), p_texture_barriers.size())); + for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { + print_line(vformat(" VkMemoryBarrier #%d src 0x%uX dst 0x%uX", i, vk_memory_barriers[i].srcAccessMask, vk_memory_barriers[i].dstAccessMask)); + } + + for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) { + print_line(vformat(" VkBufferMemoryBarrier #%d src 0x%uX dst 0x%uX buffer 0x%ux", i, vk_buffer_barriers[i].srcAccessMask, vk_buffer_barriers[i].dstAccessMask, uint64_t(vk_buffer_barriers[i].buffer))); + } + + for (uint32_t i = 0; i < p_texture_barriers.size(); i++) { + print_line(vformat(" VkImageMemoryBarrier #%d src 0x%uX dst 0x%uX image 0x%ux old %d new %d (%d %d %d %d)", i, vk_image_barriers[i].srcAccessMask, vk_image_barriers[i].dstAccessMask, + uint64_t(vk_image_barriers[i].image), vk_image_barriers[i].oldLayout, vk_image_barriers[i].newLayout, vk_image_barriers[i].subresourceRange.baseMipLevel, vk_image_barriers[i].subresourceRange.levelCount, + vk_image_barriers[i].subresourceRange.baseArrayLayer, vk_image_barriers[i].subresourceRange.layerCount)); + } +#endif + vkCmdPipelineBarrier( (VkCommandBuffer)p_cmd_buffer.id, (VkPipelineStageFlags)p_src_stages, @@ -1225,6 +1256,14 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, nullptr, &vk_framebuffer); ERR_FAIL_COND_V_MSG(err, FramebufferID(), "vkCreateFramebuffer failed with error " + itos(err) + "."); +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCreateFramebuffer 0x%uX with %d attachments", uint64_t(vk_framebuffer), p_attachments.size())); + for (uint32_t i = 0; i < p_attachments.size(); i++) { + const TextureInfo *attachment_info = (const TextureInfo *)p_attachments[i].id; + print_line(vformat(" Attachment #%d: IMAGE 0x%uX VIEW 0x%uX", i, uint64_t(attachment_info->vk_view_create_info.image), uint64_t(attachment_info->vk_view))); + } +#endif + return FramebufferID(vk_framebuffer); } @@ -2467,10 +2506,18 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS; vkCmdBeginRenderPass((VkCommandBuffer)p_cmd_buffer.id, &render_pass_begin, vk_subpass_contents); + +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCmdBeginRenderPass Pass 0x%uX Framebuffer 0x%uX", p_render_pass.id, p_framebuffer.id)); +#endif } void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_buffer) { vkCmdEndRenderPass((VkCommandBuffer)p_cmd_buffer.id); + +#if PRINT_NATIVE_COMMANDS + print_line("vkCmdEndRenderPass"); +#endif } void RenderingDeviceDriverVulkan::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) { diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index 28526b8f580..904b439e652 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -33,6 +33,7 @@ #define PRINT_RENDER_GRAPH 0 #define FORCE_FULL_ACCESS_BITS 0 #define PRINT_RESOURCE_TRACKER_TOTAL 0 +#define PRINT_COMMAND_RECORDING 0 RenderingDeviceGraph::RenderingDeviceGraph() { // Default initialization. @@ -163,20 +164,35 @@ void RenderingDeviceGraph::_add_adjacent_command(int32_t p_previous_command_inde const uint32_t previous_command_data_offset = command_data_offsets[p_previous_command_index]; RecordedCommand &previous_command = *reinterpret_cast(&command_data[previous_command_data_offset]); previous_command.adjacent_command_list_index = _add_to_command_list(p_command_index, previous_command.adjacent_command_list_index); - r_command->src_stages = r_command->src_stages | previous_command.dst_stages; + previous_command.next_stages = previous_command.next_stages | r_command->self_stages; + r_command->previous_stages = r_command->previous_stages | previous_command.self_stages; } -int32_t RenderingDeviceGraph::_add_to_write_list(int32_t p_command_index, Rect2i suberesources, int32_t p_list_index) { +int32_t RenderingDeviceGraph::_add_to_slice_read_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index) { DEV_ASSERT(p_command_index < int32_t(command_count)); - DEV_ASSERT(p_list_index < int32_t(write_list_nodes.size())); + DEV_ASSERT(p_list_index < int32_t(read_slice_list_nodes.size())); - int32_t next_index = int32_t(write_list_nodes.size()); - write_list_nodes.resize(next_index + 1); + int32_t next_index = int32_t(read_slice_list_nodes.size()); + read_slice_list_nodes.resize(next_index + 1); - RecordedWriteListNode &new_node = write_list_nodes[next_index]; + RecordedSliceListNode &new_node = read_slice_list_nodes[next_index]; new_node.command_index = p_command_index; new_node.next_list_index = p_list_index; - new_node.subresources = suberesources; + new_node.subresources = p_subresources; + return next_index; +} + +int32_t RenderingDeviceGraph::_add_to_write_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index) { + DEV_ASSERT(p_command_index < int32_t(command_count)); + DEV_ASSERT(p_list_index < int32_t(write_slice_list_nodes.size())); + + int32_t next_index = int32_t(write_slice_list_nodes.size()); + write_slice_list_nodes.resize(next_index + 1); + + RecordedSliceListNode &new_node = write_slice_list_nodes[next_index]; + new_node.command_index = p_command_index; + new_node.next_list_index = p_list_index; + new_node.subresources = p_subresources; return next_index; } @@ -203,6 +219,9 @@ RenderingDeviceGraph::ComputeListInstruction *RenderingDeviceGraph::_allocate_co } void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command) { + // Assign the next stages derived from the stages the command requires first. + r_command->next_stages = r_command->self_stages; + if (command_label_index >= 0) { // If a label is active, tag the command with the label. r_command->label_index = command_label_index; @@ -242,6 +261,10 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr resource_tracker->reset_if_outdated(tracking_frame); + const RDD::TextureSubresourceRange &subresources = resource_tracker->texture_subresources; + const Rect2i resource_tracker_rect(subresources.base_mipmap, subresources.base_layer, subresources.mipmap_count, subresources.layer_count); + Rect2i search_tracker_rect = resource_tracker_rect; + ResourceUsage new_resource_usage = p_resource_usages[i]; bool write_usage = _is_write_usage(new_resource_usage); BitField new_usage_access = _usage_to_access_bits(new_resource_usage); @@ -264,9 +287,14 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr // If the parent hasn't been used yet, we assign the usage of the slice to the entire resource. resource_tracker->parent->usage = new_resource_usage; - // Also assign the usage to the slice and consider it a write operation. + // Also assign the usage to the slice and consider it a write operation. Consider the parent's current usage access as its own. resource_tracker->usage = new_resource_usage; + resource_tracker->usage_access = resource_tracker->parent->usage_access; write_usage = true; + + // Indicate the area that should be tracked is the entire resource. + const RDD::TextureSubresourceRange &parent_subresources = resource_tracker->parent->texture_subresources; + search_tracker_rect = Rect2i(parent_subresources.base_mipmap, parent_subresources.base_layer, parent_subresources.mipmap_count, parent_subresources.layer_count); } else if (resource_tracker->in_parent_dirty_list) { if (resource_tracker->parent->usage == new_resource_usage) { // The slice will be transitioned to the resource of the parent and can be deleted from the dirty list. @@ -274,6 +302,8 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr ResourceTracker *current_tracker = resource_tracker->parent->dirty_shared_list; bool initialized_dirty_rect = false; while (current_tracker != nullptr) { + current_tracker->reset_if_outdated(tracking_frame); + if (current_tracker == resource_tracker) { current_tracker->in_parent_dirty_list = false; @@ -305,6 +335,8 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr ResourceTracker *current_tracker = resource_tracker->parent->dirty_shared_list; bool initialized_dirty_rect = false; while (current_tracker != nullptr) { + current_tracker->reset_if_outdated(tracking_frame); + if (current_tracker->texture_slice_or_dirty_rect.intersects(resource_tracker->texture_slice_or_dirty_rect)) { if (current_tracker->command_frame == tracking_frame && current_tracker->texture_slice_command_index == p_command_index) { ERR_FAIL_MSG("Texture slices that overlap can't be used in the same command."); @@ -312,6 +344,10 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr // Delete the slice from the dirty list and revert it to the usage of the parent. if (current_tracker->texture_driver_id != 0) { _add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->parent->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count); + + // Merge the area of the slice with the current tracking area of the command and indicate it's a write usage as well. + search_tracker_rect = search_tracker_rect.merge(current_tracker->texture_slice_or_dirty_rect); + write_usage = true; } current_tracker->in_parent_dirty_list = false; @@ -339,8 +375,9 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr } } - // If it wasn't in the list, assume the usage is the same as the parent. + // If it wasn't in the list, assume the usage is the same as the parent. Consider the parent's current usage access as its own. resource_tracker->usage = resource_tracker->parent->usage; + resource_tracker->usage_access = resource_tracker->parent->usage_access; if (resource_tracker->usage != new_resource_usage) { // Insert to the dirty list if the requested usage is different. @@ -355,27 +392,30 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr } } } else { - if (resource_tracker->dirty_shared_list != nullptr) { + ResourceTracker *current_tracker = resource_tracker->dirty_shared_list; + if (current_tracker != nullptr) { // Consider the usage as write if we must transition any of the slices. write_usage = true; } - while (resource_tracker->dirty_shared_list != nullptr) { - if (resource_tracker->dirty_shared_list->texture_driver_id != 0) { + while (current_tracker != nullptr) { + current_tracker->reset_if_outdated(tracking_frame); + + if (current_tracker->texture_driver_id != 0) { // Transition all slices to the layout of the parent resource. - _add_texture_barrier_to_command(resource_tracker->dirty_shared_list->texture_driver_id, resource_tracker->dirty_shared_list->usage_access, new_usage_access, resource_tracker->dirty_shared_list->usage, resource_tracker->usage, resource_tracker->dirty_shared_list->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count); + _add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count); } - resource_tracker->dirty_shared_list->in_parent_dirty_list = false; - resource_tracker->dirty_shared_list = resource_tracker->dirty_shared_list->next_shared; + current_tracker->in_parent_dirty_list = false; + current_tracker = current_tracker->next_shared; } + + resource_tracker->dirty_shared_list = nullptr; } // Use the resource's parent tracker directly for all search operations. bool resource_has_parent = resource_tracker->parent != nullptr; ResourceTracker *search_tracker = resource_has_parent ? resource_tracker->parent : resource_tracker; - const RDD::TextureSubresourceRange &subresources = resource_tracker->texture_subresources; - Rect2i resource_tracker_rect(subresources.base_mipmap, subresources.base_layer, subresources.mipmap_count, subresources.layer_count); bool different_usage = resource_tracker->usage != new_resource_usage; bool write_usage_after_write = (write_usage && search_tracker->write_command_or_list_index >= 0); if (different_usage || write_usage_after_write) { @@ -418,18 +458,18 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr int32_t previous_write_list_index = -1; int32_t write_list_index = search_tracker->write_command_or_list_index; while (write_list_index >= 0) { - const RecordedWriteListNode &write_list_node = write_list_nodes[write_list_index]; - if (!resource_has_parent || resource_tracker_rect.intersects(write_list_node.subresources)) { + const RecordedSliceListNode &write_list_node = write_slice_list_nodes[write_list_index]; + if (!resource_has_parent || search_tracker_rect.intersects(write_list_node.subresources)) { if (write_list_node.command_index == p_command_index) { ERR_FAIL_COND_MSG(!resource_has_parent, "Command can't have itself as a dependency."); } else { // Command is dependent on this command. Add this command to the adjacency list of the write command. _add_adjacent_command(write_list_node.command_index, p_command_index, r_command); - if (resource_has_parent && write_usage && resource_tracker_rect.encloses(write_list_node.subresources)) { + if (resource_has_parent && write_usage && search_tracker_rect.encloses(write_list_node.subresources)) { // Eliminate redundant writes from the list. if (previous_write_list_index >= 0) { - RecordedWriteListNode &previous_list_node = write_list_nodes[previous_write_list_index]; + RecordedSliceListNode &previous_list_node = write_slice_list_nodes[previous_write_list_index]; previous_list_node.next_list_index = write_list_node.next_list_index; } else { search_tracker->write_command_or_list_index = write_list_node.next_list_index; @@ -463,47 +503,69 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr search_tracker->write_command_or_list_index = _add_to_write_list(search_tracker->write_command_or_list_index, tracker_rect, -1); } - search_tracker->write_command_or_list_index = _add_to_write_list(p_command_index, resource_tracker_rect, search_tracker->write_command_or_list_index); + search_tracker->write_command_or_list_index = _add_to_write_list(p_command_index, search_tracker_rect, search_tracker->write_command_or_list_index); search_tracker->write_command_list_enabled = true; } else { search_tracker->write_command_or_list_index = p_command_index; search_tracker->write_command_list_enabled = false; } - // We add this command to the adjacency list of all commands that were reading from this resource. We clear the list in the process. - int32_t previous_command_list_index = -1; - int32_t read_command_list_index = search_tracker->read_command_list_index; - while (read_command_list_index >= 0) { - const RecordedCommandListNode &command_list_node = command_list_nodes[read_command_list_index]; + // We add this command to the adjacency list of all commands that were reading from the entire resource. + int32_t read_full_command_list_index = search_tracker->read_full_command_list_index; + while (read_full_command_list_index >= 0) { + const RecordedCommandListNode &command_list_node = command_list_nodes[read_full_command_list_index]; if (command_list_node.command_index == p_command_index) { if (!resource_has_parent) { - // Slices are allowed to be in different usages in the same command as they are guaranteed to have no overlap in the same command. + // Only slices are allowed to be in different usages in the same command as they are guaranteed to have no overlap in the same command. ERR_FAIL_MSG("Command can't have itself as a dependency."); - } else { - // Advance to the next element. - read_command_list_index = command_list_node.next_list_index; - previous_command_list_index = read_command_list_index; } } else { - if (previous_command_list_index >= 0) { - // Erase this element and connect the previous one to the next element. - command_list_nodes[previous_command_list_index].next_list_index = command_list_node.next_list_index; - read_command_list_index = command_list_node.next_list_index; - previous_command_list_index = read_command_list_index; - } else { - // Erase this element from the head of the list. - DEV_ASSERT(search_tracker->read_command_list_index == read_command_list_index); - read_command_list_index = command_list_node.next_list_index; - search_tracker->read_command_list_index = read_command_list_index; - } - // Add this command to the adjacency list of each command that was reading this resource. _add_adjacent_command(command_list_node.command_index, p_command_index, r_command); } + + read_full_command_list_index = command_list_node.next_list_index; } + + if (!resource_has_parent) { + // Clear the full list if this resource is not a slice. + search_tracker->read_full_command_list_index = -1; + } + + // We add this command to the adjacency list of all commands that were reading from resource slices. + int32_t previous_slice_command_list_index = -1; + int32_t read_slice_command_list_index = search_tracker->read_slice_command_list_index; + while (read_slice_command_list_index >= 0) { + const RecordedSliceListNode &read_list_node = read_slice_list_nodes[read_slice_command_list_index]; + if (!resource_has_parent || search_tracker_rect.encloses(read_list_node.subresources)) { + if (previous_slice_command_list_index >= 0) { + // Erase this element and connect the previous one to the next element. + read_slice_list_nodes[previous_slice_command_list_index].next_list_index = read_list_node.next_list_index; + } else { + // Erase this element from the head of the list. + DEV_ASSERT(search_tracker->read_slice_command_list_index == read_slice_command_list_index); + search_tracker->read_slice_command_list_index = read_list_node.next_list_index; + } + + // Advance to the next element. + read_slice_command_list_index = read_list_node.next_list_index; + } else { + previous_slice_command_list_index = read_slice_command_list_index; + read_slice_command_list_index = read_list_node.next_list_index; + } + + if (!resource_has_parent || search_tracker_rect.intersects(read_list_node.subresources)) { + // Add this command to the adjacency list of each command that was reading this resource. + // We only add the dependency if there's an intersection between slices or this resource isn't a slice. + _add_adjacent_command(read_list_node.command_index, p_command_index, r_command); + } + } + } else if (resource_has_parent) { + // We add a read dependency to the tracker to indicate this command reads from the resource slice. + search_tracker->read_slice_command_list_index = _add_to_slice_read_list(p_command_index, resource_tracker_rect, search_tracker->read_slice_command_list_index); } else { - // We add a read dependency to the tracker to indicate this command reads from the resource. - search_tracker->read_command_list_index = _add_to_command_list(p_command_index, search_tracker->read_command_list_index); + // We add a read dependency to the tracker to indicate this command reads from the entire resource. + search_tracker->read_full_command_list_index = _add_to_command_list(p_command_index, search_tracker->read_full_command_list_index); } } } @@ -913,9 +975,13 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe const uint32_t command_data_offset = command_data_offsets[command_index]; const RecordedCommand *command = reinterpret_cast(&command_data[command_data_offset]); +#if PRINT_COMMAND_RECORDING + print_line(vformat("Grouping barriers for #%d", command_index)); +#endif + // Merge command's stage bits with the barrier group. - barrier_group.src_stages = barrier_group.src_stages | command->src_stages; - barrier_group.dst_stages = barrier_group.dst_stages | command->dst_stages; + barrier_group.src_stages = barrier_group.src_stages | command->previous_stages; + barrier_group.dst_stages = barrier_group.dst_stages | command->next_stages; // Merge command's memory barrier bits with the barrier group. barrier_group.memory_barrier.src_access = barrier_group.memory_barrier.src_access | command->memory_barrier.src_access; @@ -925,11 +991,17 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe for (int32_t j = 0; j < command->normalization_barrier_count; j++) { const RDD::TextureBarrier &recorded_barrier = command_normalization_barriers[command->normalization_barrier_index + j]; barrier_group.normalization_barriers.push_back(recorded_barrier); +#if PRINT_COMMAND_RECORDING + print_line(vformat("Normalization Barrier #%d", barrier_group.normalization_barriers.size() - 1)); +#endif } for (int32_t j = 0; j < command->transition_barrier_count; j++) { const RDD::TextureBarrier &recorded_barrier = command_transition_barriers[command->transition_barrier_index + j]; barrier_group.transition_barriers.push_back(recorded_barrier); +#if PRINT_COMMAND_RECORDING + print_line(vformat("Transition Barrier #%d", barrier_group.transition_barriers.size() - 1)); +#endif } #if USE_BUFFER_BARRIERS @@ -1202,7 +1274,8 @@ void RenderingDeviceGraph::begin() { command_label_colors.clear(); command_label_offsets.clear(); command_list_nodes.clear(); - write_list_nodes.clear(); + read_slice_list_nodes.clear(); + write_slice_list_nodes.clear(); command_count = 0; command_label_count = 0; command_timestamp_index = -1; @@ -1225,7 +1298,7 @@ void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker int32_t command_index; RecordedBufferClearCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferClearCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_CLEAR; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->buffer = p_dst; command->offset = p_offset; command->size = p_size; @@ -1241,7 +1314,7 @@ void RenderingDeviceGraph::add_buffer_copy(RDD::BufferID p_src, ResourceTracker int32_t command_index; RecordedBufferCopyCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferCopyCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_COPY; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->source = p_src; command->destination = p_dst; command->region = p_region; @@ -1256,7 +1329,7 @@ void RenderingDeviceGraph::add_buffer_get_data(RDD::BufferID p_src, ResourceTrac int32_t command_index; RecordedBufferGetDataCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferGetDataCommand), command_index)); command->type = RecordedCommand::TYPE_BUFFER_GET_DATA; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->source = p_src; command->destination = p_dst; command->region = p_region; @@ -1277,7 +1350,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke int32_t command_index; RecordedBufferUpdateCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_BUFFER_UPDATE; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->destination = p_dst; command->buffer_copies_count = p_buffer_copies.size(); @@ -1369,7 +1442,7 @@ void RenderingDeviceGraph::add_compute_list_end() { uint32_t command_size = sizeof(RecordedComputeListCommand) + instruction_data_size; RecordedComputeListCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_COMPUTE_LIST; - command->dst_stages = compute_instruction_list.stages; + command->self_stages = compute_instruction_list.stages; command->instruction_data_size = instruction_data_size; memcpy(command->instruction_data(), compute_instruction_list.data.ptr(), instruction_data_size); _add_command_to_graph(compute_instruction_list.command_trackers.ptr(), compute_instruction_list.command_tracker_usages.ptr(), compute_instruction_list.command_trackers.size(), command_index, command); @@ -1579,7 +1652,7 @@ void RenderingDeviceGraph::add_draw_list_end() { uint32_t command_size = sizeof(RecordedDrawListCommand) + clear_values_size + instruction_data_size; RecordedDrawListCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_DRAW_LIST; - command->dst_stages = draw_instruction_list.stages; + command->self_stages = draw_instruction_list.stages; command->instruction_data_size = instruction_data_size; command->render_pass = draw_instruction_list.render_pass; command->framebuffer = draw_instruction_list.framebuffer; @@ -1602,7 +1675,7 @@ void RenderingDeviceGraph::add_texture_clear(RDD::TextureID p_dst, ResourceTrack int32_t command_index; RecordedTextureClearCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureClearCommand), command_index)); command->type = RecordedCommand::TYPE_TEXTURE_CLEAR; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->texture = p_dst; command->color = p_color; command->range = p_range; @@ -1618,7 +1691,7 @@ void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracke int32_t command_index; RecordedTextureCopyCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureCopyCommand), command_index)); command->type = RecordedCommand::TYPE_TEXTURE_COPY; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->from_texture = p_src; command->to_texture = p_dst; command->region = p_region; @@ -1635,7 +1708,7 @@ void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTr uint64_t command_size = sizeof(RecordedTextureGetDataCommand) + p_buffer_texture_copy_regions.size() * sizeof(RDD::BufferTextureCopyRegion); RecordedTextureGetDataCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_TEXTURE_GET_DATA; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->from_texture = p_src; command->to_buffer = p_dst; command->buffer_texture_copy_regions_count = p_buffer_texture_copy_regions.size(); @@ -1656,7 +1729,7 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra int32_t command_index; RecordedTextureResolveCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureResolveCommand), command_index)); command->type = RecordedCommand::TYPE_TEXTURE_RESOLVE; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->from_texture = p_src; command->to_texture = p_dst; command->src_layer = p_src_layer; @@ -1676,7 +1749,7 @@ void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTrac uint64_t command_size = sizeof(RecordedTextureUpdateCommand) + p_buffer_copies.size() * sizeof(RecordedBufferToTextureCopy); RecordedTextureUpdateCommand *command = static_cast(_allocate_command(command_size, command_index)); command->type = RecordedCommand::TYPE_TEXTURE_UPDATE; - command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; command->to_texture = p_dst; command->buffer_to_texture_copies_count = p_buffer_copies.size(); @@ -1693,7 +1766,7 @@ void RenderingDeviceGraph::add_capture_timestamp(RDD::QueryPoolID p_query_pool, int32_t command_index; RecordedCaptureTimestampCommand *command = static_cast(_allocate_command(sizeof(RecordedCaptureTimestampCommand), command_index)); command->type = RecordedCommand::TYPE_CAPTURE_TIMESTAMP; - command->dst_stages = 0; + command->self_stages = 0; command->pool = p_query_pool; command->index = p_index; _add_command_to_graph(nullptr, nullptr, 0, command_index, command); @@ -1852,6 +1925,10 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo _print_render_commands(commands_sorted.ptr(), command_count); #endif +#if PRINT_COMMAND_RECORDING + print_line(vformat("Recording %d commands", command_count)); +#endif + uint32_t boosted_priority = 0; uint32_t current_level = commands_sorted[0].level; uint32_t current_level_start = 0; @@ -1884,6 +1961,10 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo } _run_label_command_change(p_command_buffer, -1, -1, true, false, nullptr, 0, current_label_index, current_label_level); + +#if PRINT_COMMAND_RECORDING + print_line(vformat("Recorded %d commands", command_count)); +#endif } // Advance the frame counter. It's not necessary to do this if no commands are recorded because that means no secondary command buffers were used. diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 84fbe020955..0d48f0491b2 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -112,8 +112,9 @@ public: int32_t buffer_barrier_count = 0; #endif int32_t label_index = -1; - BitField src_stages; - BitField dst_stages; + BitField previous_stages; + BitField next_stages; + BitField self_stages; }; struct RecordedBufferCopy { @@ -150,7 +151,8 @@ public: struct ResourceTracker { uint32_t reference_count = 0; int64_t command_frame = -1; - int32_t read_command_list_index = -1; + int32_t read_full_command_list_index = -1; + int32_t read_slice_command_list_index = -1; int32_t write_command_or_list_index = -1; int32_t draw_list_index = -1; int32_t compute_list_index = -1; @@ -171,7 +173,8 @@ public: if (new_command_frame != command_frame) { usage_access.clear(); command_frame = new_command_frame; - read_command_list_index = -1; + read_full_command_list_index = -1; + read_slice_command_list_index = -1; write_command_or_list_index = -1; draw_list_index = -1; compute_list_index = -1; @@ -237,7 +240,7 @@ private: int32_t next_list_index = -1; }; - struct RecordedWriteListNode { + struct RecordedSliceListNode { int32_t command_index = -1; int32_t next_list_index = -1; Rect2i subresources; @@ -572,7 +575,8 @@ private: uint32_t command_count = 0; uint32_t command_label_count = 0; LocalVector command_list_nodes; - LocalVector write_list_nodes; + LocalVector read_slice_list_nodes; + LocalVector write_slice_list_nodes; int32_t command_timestamp_index = -1; int32_t command_synchronization_index = -1; bool command_synchronization_pending = false; @@ -590,7 +594,8 @@ private: static RDD::BarrierAccessBits _usage_to_access_bits(ResourceUsage p_usage); int32_t _add_to_command_list(int32_t p_command_index, int32_t p_list_index); void _add_adjacent_command(int32_t p_previous_command_index, int32_t p_command_index, RecordedCommand *r_command); - int32_t _add_to_write_list(int32_t p_command_index, Rect2i suberesources, int32_t p_list_index); + int32_t _add_to_slice_read_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index); + int32_t _add_to_write_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index); RecordedCommand *_allocate_command(uint32_t p_command_size, int32_t &r_command_index); DrawListInstruction *_allocate_draw_list_instruction(uint32_t p_instruction_size); ComputeListInstruction *_allocate_compute_list_instruction(uint32_t p_instruction_size);