diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 1ed6839dd7e..a55abe6a82b 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -5905,6 +5905,64 @@ void RenderingDeviceVulkan::uniform_set_set_invalidation_callback(RID p_uniform_ us->invalidated_callback_userdata = p_userdata; } +Error RenderingDeviceVulkan::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField p_post_barrier) { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Copying buffers is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Copying buffers is forbidden during creation of a compute list"); + + // This method assumes the barriers have been pushed prior to being called, therefore no barriers are pushed + // for the source or destination buffers before performing the copy. These masks are effectively ignored. + VkPipelineShaderStageCreateFlags src_stage_mask = 0; + VkAccessFlags src_access_mask = 0; + Buffer *src_buffer = _get_buffer_from_owner(p_src_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_NO_BARRIER); + if (!src_buffer) { + ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Source buffer argument is not a valid buffer of any type."); + } + + VkPipelineStageFlags dst_stage_mask = 0; + VkAccessFlags dst_access = 0; + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { + // If the post barrier mask defines it, we indicate the destination buffer will require a barrier with these flags set + // after the copy command is queued. + dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + } + + Buffer *dst_buffer = _get_buffer_from_owner(p_dst_buffer, dst_stage_mask, dst_access, p_post_barrier); + if (!dst_buffer) { + ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Destination buffer argument is not a valid buffer of any type."); + } + + // Validate the copy's dimensions for both buffers. + ERR_FAIL_COND_V_MSG((p_size + p_src_offset) > src_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the source buffer."); + ERR_FAIL_COND_V_MSG((p_size + p_dst_offset) > dst_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the destination buffer."); + + // Perform the copy. + VkBufferCopy region; + region.srcOffset = p_src_offset; + region.dstOffset = p_dst_offset; + region.size = p_size; + vkCmdCopyBuffer(frames[frame].draw_command_buffer, src_buffer->buffer, dst_buffer->buffer, 1, ®ion); + +#ifdef FORCE_FULL_BARRIER + _full_barrier(true); +#else + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + // As indicated by the post barrier mask, push a new barrier. + if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) { + _buffer_memory_barrier(dst_buffer->buffer, p_dst_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); + } +#endif + + return OK; +} + Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField p_post_barrier) { _THREAD_SAFE_METHOD_ diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index fd832312acb..edff19a70cd 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -1152,6 +1152,7 @@ public: virtual bool uniform_set_is_valid(RID p_uniform_set); virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata); + virtual Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); // Works for any buffer. virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); virtual Vector buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0); diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp index 6888af7af91..56f2ea0b0c4 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp @@ -1309,13 +1309,10 @@ void MeshStorage::_multimesh_enable_motion_vectors(MultiMesh *multimesh) { RID new_buffer = RD::get_singleton()->storage_buffer_create(new_buffer_size); if (multimesh->buffer_set && multimesh->data_cache.is_empty()) { - // If the buffer was set but there's no data cached in the CPU, we must download it from the GPU and - // upload it because RD does not provide a way to copy the buffer directly yet. + // If the buffer was set but there's no data cached in the CPU, we copy the buffer directly on the GPU. RD::get_singleton()->barrier(); - Vector buffer_data = RD::get_singleton()->buffer_get_data(multimesh->buffer); - ERR_FAIL_COND(buffer_data.size() != int(buffer_size)); - RD::get_singleton()->buffer_update(new_buffer, 0, buffer_size, buffer_data.ptr(), RD::BARRIER_MASK_NO_BARRIER); - RD::get_singleton()->buffer_update(new_buffer, buffer_size, buffer_size, buffer_data.ptr()); + RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, 0, buffer_size, RD::BARRIER_MASK_NO_BARRIER); + RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, buffer_size, buffer_size); } else if (!multimesh->data_cache.is_empty()) { // Simply upload the data cached in the CPU, which should already be doubled in size. ERR_FAIL_COND(multimesh->data_cache.size() * sizeof(float) != size_t(new_buffer_size)); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 30d9b1c7b72..1ade1b25c4d 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -840,6 +840,7 @@ public: virtual bool uniform_set_is_valid(RID p_uniform_set) = 0; virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata) = 0; + virtual Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0; virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0; virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0; virtual Vector buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0) = 0; // This causes stall, only use to retrieve large buffers for saving.