Merge pull request #87388 from clayjohn/Vulkan-limit-add

Implement `LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE` to `limit_get` in the Vulkan backend
This commit is contained in:
Thaddeus Crews 2025-03-05 12:08:00 -06:00
commit aef8ed2901
No known key found for this signature in database
GPG Key ID: 62181B86FE9E5D84
5 changed files with 31 additions and 7 deletions

View File

@ -6218,6 +6218,8 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) {
return D3D12_CS_THREAD_GROUP_MAX_Y;
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return D3D12_CS_THREAD_GROUP_MAX_Z;
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
return D3D12_CS_TGSM_REGISTER_COUNT * sizeof(float);
case LIMIT_SUBGROUP_SIZE:
// Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html),
// but at this time I don't know the implications on the transpilation to DXIL, etc.

View File

@ -125,6 +125,7 @@ struct MetalLimits {
uint32_t maxVertexInputBindingStride;
uint32_t maxDrawIndexedIndexValue;
uint32_t maxShaderVaryings;
uint32_t maxThreadGroupMemoryAllocation;
double temporalScalerInputContentMinScale;
double temporalScalerInputContentMaxScale;

View File

@ -305,6 +305,14 @@ void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {
limits.maxVertexInputBindingStride = (2 * KIBI);
limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
limits.maxThreadGroupMemoryAllocation = 32768;
} else if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
limits.maxThreadGroupMemoryAllocation = 16384;
} else {
limits.maxThreadGroupMemoryAllocation = 16352;
}
#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST
limits.minUniformBufferOffsetAlignment = 64;
#endif

View File

@ -3901,16 +3901,16 @@ uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() {
uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
MetalDeviceProperties const &props = (*device_properties);
MetalLimits const &limits = props.limits;
uint64_t safe_unbounded = ((uint64_t)1 << 30);
#if defined(DEV_ENABLED)
#define UNKNOWN(NAME) \
case NAME: \
WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \
return (uint64_t)1 << 30;
return safe_unbounded;
#else
#define UNKNOWN(NAME) \
case NAME: \
return (uint64_t)1 << 30
return safe_unbounded
#endif
// clang-format off
@ -3983,6 +3983,8 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
return limits.maxThreadsPerThreadGroup.height;
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return limits.maxThreadsPerThreadGroup.depth;
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
return limits.maxThreadGroupMemoryAllocation;
case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
return limits.maxViewportDimensionX;
case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
@ -4008,8 +4010,12 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT);
UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH);
UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT);
default:
ERR_FAIL_V(0);
default: {
#ifdef DEV_ENABLED
WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
#endif
return safe_unbounded;
}
}
// clang-format on
return 0;

View File

@ -5828,6 +5828,7 @@ uint64_t RenderingDeviceDriverVulkan::get_lazily_memory_used() {
uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
const VkPhysicalDeviceLimits &limits = physical_device_properties.limits;
uint64_t safe_unbounded = ((uint64_t)1 << 30);
switch (p_limit) {
case LIMIT_MAX_BOUND_UNIFORM_SETS:
return limits.maxBoundDescriptorSets;
@ -5897,6 +5898,8 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
return limits.maxComputeWorkGroupSize[1];
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return limits.maxComputeWorkGroupSize[2];
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
return limits.maxComputeSharedMemorySize;
case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
return limits.maxViewportDimensions[0];
case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
@ -5923,8 +5926,12 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
// The Vulkan spec states that built in varyings like gl_FragCoord should count against this, but in
// practice, that doesn't seem to be the case. The validation layers don't even complain.
return MIN(limits.maxVertexOutputComponents / 4, limits.maxFragmentInputComponents / 4);
default:
ERR_FAIL_V(0);
default: {
#ifdef DEV_ENABLED
WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
#endif
return safe_unbounded;
}
}
}