Fixes and optimizations to mobile renderer

* Only apply final actions to attachments used in the last pass.
* Fixes to draw list final action (was using continue instead of read/drop).
* Profiling regions inside draw lists now properly throw errors.
* Ability to enable gpu profile printing from project settings. (used to debug).
This commit is contained in:
reduz 2021-08-10 21:50:28 -03:00
parent 18bd0fee5a
commit ca117910da
8 changed files with 140 additions and 67 deletions

View File

@ -423,6 +423,8 @@
<member name="debug/settings/stdout/print_fps" type="bool" setter="" getter="" default="false">
Print frames per second to standard output every second.
</member>
<member name="debug/settings/stdout/print_gpu_profile" type="bool" setter="" getter="" default="false">
</member>
<member name="debug/settings/stdout/verbose_stdout" type="bool" setter="" getter="" default="false">
Print more information to standard output when running. It displays information such as memory leaks, which scenes and resources are being loaded, etc.
</member>

View File

@ -3299,13 +3299,25 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
// Also, each UNDEFINED will do an immediate layout transition (write), s.t. we must ensure execution synchronization vs.
// the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that
// stage
switch (is_depth ? p_initial_depth_action : p_initial_action) {
case INITIAL_ACTION_CLEAR_REGION:
case INITIAL_ACTION_CLEAR: {
description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
dependency_from_external.srcStageMask |= reading_stages;
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
dependency_from_external.srcStageMask |= reading_stages;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
case INITIAL_ACTION_KEEP: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
@ -3363,7 +3375,58 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
}
}
switch (is_depth ? p_final_depth_action : p_final_action) {
bool used_last = false;
{
int last_pass = p_passes.size() - 1;
if (is_depth) {
//likely missing depth resolve?
if (p_passes[last_pass].depth_attachment == i) {
used_last = true;
}
} else {
if (p_passes[last_pass].resolve_attachments.size()) {
//if using resolve attachments, check resolve attachments
for (int j = 0; j < p_passes[last_pass].resolve_attachments.size(); j++) {
if (p_passes[last_pass].resolve_attachments[j] == i) {
used_last = true;
break;
}
}
} else {
for (int j = 0; j < p_passes[last_pass].color_attachments.size(); j++) {
if (p_passes[last_pass].color_attachments[j] == i) {
used_last = true;
break;
}
}
}
}
if (!used_last) {
for (int j = 0; j < p_passes[last_pass].preserve_attachments.size(); j++) {
if (p_passes[last_pass].preserve_attachments[j] == i) {
used_last = true;
break;
}
}
}
}
FinalAction final_action = p_final_action;
FinalAction final_depth_action = p_final_depth_action;
if (!used_last) {
if (is_depth) {
final_depth_action = FINAL_ACTION_DISCARD;
} else {
final_action = FINAL_ACTION_DISCARD;
}
}
switch (is_depth ? final_depth_action : final_action) {
case FINAL_ACTION_READ: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
@ -3516,21 +3579,6 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
resolve_references.push_back(reference);
}
LocalVector<uint32_t> &preserve_references = preserve_reference_array[i];
for (int j = 0; j < pass->preserve_attachments.size(); j++) {
int32_t attachment = pass->preserve_attachments[j];
ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused.");
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ").");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
attachment_last_pass[attachment] = i;
preserve_references.push_back(attachment);
}
VkAttachmentReference &depth_stencil_reference = depth_reference_array[i];
if (pass->depth_attachment != FramebufferPass::ATTACHMENT_UNUSED) {
@ -3554,6 +3602,22 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
depth_stencil_reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
}
LocalVector<uint32_t> &preserve_references = preserve_reference_array[i];
for (int j = 0; j < pass->preserve_attachments.size(); j++) {
int32_t attachment = pass->preserve_attachments[j];
ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused.");
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ").");
if (attachment_last_pass[attachment] != i) {
//preserve can still be used to keep depth or color from being discarded after use
attachment_last_pass[attachment] = i;
preserve_references.push_back(attachment);
}
}
VkSubpassDescription &subpass = subpasses[i];
subpass.flags = 0;
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
@ -8864,6 +8928,7 @@ void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) {
}
void RenderingDeviceVulkan::capture_timestamp(const String &p_name) {
ERR_FAIL_COND_MSG(draw_list != nullptr, "Capturing timestamps during draw list creation is not allowed. Offending timestap was: " + p_name);
ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements);
//this should be optional for profiling, else it will slow things down

View File

@ -1352,6 +1352,7 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph
PROPERTY_HINT_RANGE, "0,1000,1"));
GLOBAL_DEF("debug/settings/stdout/print_fps", false);
GLOBAL_DEF("debug/settings/stdout/print_gpu_profile", false);
GLOBAL_DEF("debug/settings/stdout/verbose_stdout", false);
if (!OS::get_singleton()->_verbose_stdout) { // Not manually overridden.
@ -1591,7 +1592,7 @@ Error Main::setup2(Thread::ID p_main_tid_override) {
rendering_server->init();
rendering_server->set_render_loop_enabled(!disable_render_loop);
if (profile_gpu) {
if (profile_gpu || (!editor && bool(GLOBAL_GET("debug/settings/stdout/print_gpu_profile")))) {
rendering_server->set_print_gpu_profile(true);
}

View File

@ -519,7 +519,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
using_subpass_post_process = false;
}
if (scene_state.used_screen_texture || scene_state.used_depth_texture) {
if (using_ssr || using_sss || scene_state.used_screen_texture || scene_state.used_depth_texture) {
// can't use our last two subpasses
using_subpass_transparent = false;
using_subpass_post_process = false;
@ -679,17 +679,20 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
_setup_environment(p_render_data, p_render_data->reflection_probe.is_valid(), screen_size, !p_render_data->reflection_probe.is_valid(), p_default_bg_color, p_render_data->render_buffers.is_valid());
RENDER_TIMESTAMP("Render Opaque Subpass");
if (using_subpass_transparent && using_subpass_post_process) {
RENDER_TIMESTAMP("Render Opaque + Transparent + Tonemap");
} else if (using_subpass_transparent) {
RENDER_TIMESTAMP("Render Opaque + Transparent");
} else {
RENDER_TIMESTAMP("Render Opaque");
}
RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, p_render_data, radiance_texture, true);
bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss;
bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss;
bool can_continue_color = !using_subpass_transparent && !scene_state.used_screen_texture && !using_ssr && !using_sss;
bool can_continue_depth = !using_subpass_transparent && !scene_state.used_depth_texture && !using_ssr && !using_sss;
{
bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only);
bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only);
// regular forward for now
Vector<Color> c;
c.push_back(clear_color.to_linear()); // our render buffer
@ -709,11 +712,11 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
// secondary command buffers need more testing at this time
//multi threaded
thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params);
} else {
//single threaded
RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
_render_list(draw_list, fb_format, &render_list_params, 0, render_list_params.element_count);
}
}
@ -721,8 +724,6 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
RD::get_singleton()->draw_command_end_label(); //Render Opaque Subpass
if (draw_sky || draw_sky_fog_only) {
RENDER_TIMESTAMP("Render Sky Subpass");
RD::get_singleton()->draw_command_begin_label("Draw Sky Subpass");
RD::DrawListID draw_list = RD::get_singleton()->draw_list_switch_to_next_pass();
@ -752,7 +753,6 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
}
// transparent pass
RENDER_TIMESTAMP("Render Transparent Subpass");
RD::get_singleton()->draw_command_begin_label("Render Transparent Subpass");
@ -789,6 +789,8 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_ALL);
} else {
RENDER_TIMESTAMP("Render Transparent");
framebuffer = render_buffer->color_fbs[FB_CONFIG_ONE_PASS];
// this may be needed if we re-introduced steps that change info, not sure which do so in the previous implementation

View File

@ -2010,7 +2010,6 @@ void RendererSceneRenderRD::_post_process_subpass(RID p_source_texture, RID p_fr
bool can_use_effects = rb->width >= 8 && rb->height >= 8;
RENDER_TIMESTAMP("Tonemap");
RD::DrawListID draw_list = RD::get_singleton()->draw_list_switch_to_next_pass();
EffectsRD::TonemapSettings tonemap;

View File

@ -3,7 +3,7 @@
#define LIGHT_BAKE_STATIC 2
struct LightData { //this structure needs to be as packed as possible
vec3 position;
highp vec3 position;
float inv_radius;
vec3 direction;
@ -17,8 +17,8 @@ struct LightData { //this structure needs to be as packed as possible
float specular_amount;
bool shadow_enabled;
vec4 atlas_rect; // rect in the shadow atlas
mat4 shadow_matrix;
highp vec4 atlas_rect; // rect in the shadow atlas
highp mat4 shadow_matrix;
float shadow_bias;
float shadow_normal_bias;
float transmittance_bias;
@ -27,7 +27,7 @@ struct LightData { //this structure needs to be as packed as possible
uint mask;
float shadow_volumetric_fog_fade;
uint bake_mode;
vec4 projector_rect; //projector rect in srgb decal atlas
highp vec4 projector_rect; //projector rect in srgb decal atlas
};
#define REFLECTION_AMBIENT_DISABLED 0
@ -69,13 +69,13 @@ struct DirectionalLightData {
vec4 shadow_bias;
vec4 shadow_normal_bias;
vec4 shadow_transmittance_bias;
vec4 shadow_z_range;
vec4 shadow_range_begin;
highp vec4 shadow_z_range;
highp vec4 shadow_range_begin;
vec4 shadow_split_offsets;
mat4 shadow_matrix1;
mat4 shadow_matrix2;
mat4 shadow_matrix3;
mat4 shadow_matrix4;
highp mat4 shadow_matrix1;
highp mat4 shadow_matrix2;
highp mat4 shadow_matrix3;
highp mat4 shadow_matrix4;
vec4 shadow_color1;
vec4 shadow_color2;
vec4 shadow_color3;

View File

@ -59,27 +59,27 @@ layout(location = 11) in vec4 weight_attrib;
/* Varyings */
layout(location = 0) out vec3 vertex_interp;
layout(location = 0) highp out vec3 vertex_interp;
#ifdef NORMAL_USED
layout(location = 1) out vec3 normal_interp;
layout(location = 1) mediump out vec3 normal_interp;
#endif
#if defined(COLOR_USED)
layout(location = 2) out vec4 color_interp;
layout(location = 2) mediump out vec4 color_interp;
#endif
#ifdef UV_USED
layout(location = 3) out vec2 uv_interp;
layout(location = 3) mediump out vec2 uv_interp;
#endif
#if defined(UV2_USED) || defined(USE_LIGHTMAP)
layout(location = 4) out vec2 uv2_interp;
layout(location = 4) mediump out vec2 uv2_interp;
#endif
#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
layout(location = 5) out vec3 tangent_interp;
layout(location = 6) out vec3 binormal_interp;
layout(location = 5) mediump out vec3 tangent_interp;
layout(location = 6) mediump out vec3 binormal_interp;
#endif
#ifdef MATERIAL_UNIFORMS_USED
@ -370,6 +370,10 @@ void main() {
#VERSION_DEFINES
//use medium precision for floats on mobile.
precision mediump float;
/* Specialization Constants */
/* Specialization Constants (Toggles) */
@ -395,32 +399,32 @@ layout(constant_id = 11) const bool sc_projector_use_mipmaps = true;
/* Varyings */
layout(location = 0) in vec3 vertex_interp;
layout(location = 0) highp in vec3 vertex_interp;
#ifdef NORMAL_USED
layout(location = 1) in vec3 normal_interp;
layout(location = 1) mediump in vec3 normal_interp;
#endif
#if defined(COLOR_USED)
layout(location = 2) in vec4 color_interp;
layout(location = 2) mediump in vec4 color_interp;
#endif
#ifdef UV_USED
layout(location = 3) in vec2 uv_interp;
layout(location = 3) mediump in vec2 uv_interp;
#endif
#if defined(UV2_USED) || defined(USE_LIGHTMAP)
layout(location = 4) in vec2 uv2_interp;
layout(location = 4) mediump in vec2 uv2_interp;
#endif
#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
layout(location = 5) in vec3 tangent_interp;
layout(location = 6) in vec3 binormal_interp;
layout(location = 5) mediump in vec3 tangent_interp;
layout(location = 6) mediump in vec3 binormal_interp;
#endif
#ifdef MODE_DUAL_PARABOLOID
layout(location = 8) in float dp_clip;
layout(location = 8) highp in float dp_clip;
#endif

View File

@ -16,12 +16,12 @@
/* don't exceed 128 bytes!! */
/* put instance data into our push content, not a array */
layout(push_constant, binding = 0, std430) uniform DrawCall {
mat4 transform; // 64 - 64
highp mat4 transform; // 64 - 64
uint flags; // 04 - 68
uint instance_uniforms_ofs; //base offset in global buffer for instance variables // 04 - 72
uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) // 04 - 76
uint layer_mask; // 04 - 80
vec4 lightmap_uv_scale; // 16 - 96 doubles as uv_offset when needed
highp vec4 lightmap_uv_scale; // 16 - 96 doubles as uv_offset when needed
uvec2 reflection_probes; // 08 - 104
uvec2 omni_lights; // 08 - 112
@ -126,14 +126,14 @@ global_variables;
/* Set 1: Render Pass (changes per render pass) */
layout(set = 1, binding = 0, std140) uniform SceneData {
mat4 projection_matrix;
mat4 inv_projection_matrix;
mat4 camera_matrix;
mat4 inv_camera_matrix;
highp mat4 projection_matrix;
highp mat4 inv_projection_matrix;
highp mat4 camera_matrix;
highp mat4 inv_camera_matrix;
// only used for multiview
mat4 projection_matrix_view[MAX_VIEWS];
mat4 inv_projection_matrix_view[MAX_VIEWS];
highp mat4 projection_matrix_view[MAX_VIEWS];
highp mat4 inv_projection_matrix_view[MAX_VIEWS];
vec2 viewport_size;
vec2 screen_pixel_size;