From efd7ef212a8b3371030a81978af5c92a064674f1 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 31 May 2022 23:23:10 +0300 Subject: [PATCH] [D3D12] 128 megatexel limit explanation based on the spec [ci skip] --- src/xenia/gpu/d3d12/d3d12_render_target_cache.cc | 5 +++-- src/xenia/gpu/d3d12/d3d12_shared_memory.h | 4 ++-- src/xenia/gpu/d3d12/d3d12_texture_cache.cc | 10 ++++++---- src/xenia/gpu/draw_util.h | 3 ++- src/xenia/gpu/dxbc_shader_translator_memexport.cc | 13 +++++++------ 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc index 8c541e531..8ac3f921c 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc @@ -1413,8 +1413,9 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory, if (copy_dest_committed) { // Write the descriptors and transition the resources. // Full shared memory without resolution scaling, range of the scaled - // resolve buffer with scaling because only 128 R32 elements can be - // addressed on Nvidia. + // resolve buffer with scaling because only at least 128 * 2^20 R32 + // elements must be addressable + // (D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP). ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_dest; ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_source; ui::d3d12::util::DescriptorCpuGpuHandlePair descriptors[2]; diff --git a/src/xenia/gpu/d3d12/d3d12_shared_memory.h b/src/xenia/gpu/d3d12/d3d12_shared_memory.h index dfd1e52c2..abf069447 100644 --- a/src/xenia/gpu/d3d12/d3d12_shared_memory.h +++ b/src/xenia/gpu/d3d12/d3d12_shared_memory.h @@ -76,8 +76,8 @@ class D3D12SharedMemory : public SharedMemory { void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); - // Due to the Nvidia 128 megatexel limitation, the smallest supported formats - // are 32-bit. + // Due to the D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP limitation, the + // smallest supported formats are 32-bit. void WriteUintPow2SRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2); void WriteUintPow2UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle, diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index ad9b320fc..74682680c 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -1715,9 +1715,10 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, } // Begin loading. - // May use different buffers for scaled base and mips, and also can't address - // more than 128 megatexels directly on Nvidia - need two separate UAV - // descriptors for base and mips. + // May use different buffers for scaled base and mips, and also addressability + // of more than 128 * 2^20 (2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP) + // texels is not mandatory - need two separate UAV descriptors for base and + // mips. // Destination. uint32_t descriptor_count = 1; if (texture_resolution_scaled) { @@ -1820,7 +1821,8 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, if (texture_resolution_scaled) { // Offset already applied in the buffer because more than 512 MB can't be - // directly addresses on Nvidia as R32. + // directly addresses as R32 on some hardware (above + // 2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP). load_constants.guest_offset = 0; } else { load_constants.guest_offset = guest_address; diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 70f8a3bbb..883193074 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -377,7 +377,8 @@ struct ResolveCopyShaderInfo { // shader (at least 2). uint32_t source_bpe_log2; // Log2 of bytes per element of the type of the destination buffer bound to - // the shader (at least 2 because of Nvidia's 128 megatexel limit that + // the shader (at least 2 because of the 128 megatexel minimum requirement on + // Direct3D 10+ - D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP - that // prevents binding the entire shared memory buffer with smaller element // sizes). uint32_t dest_bpe_log2; diff --git a/src/xenia/gpu/dxbc_shader_translator_memexport.cc b/src/xenia/gpu/dxbc_shader_translator_memexport.cc index b345f12f4..8d1295ee7 100644 --- a/src/xenia/gpu/dxbc_shader_translator_memexport.cc +++ b/src/xenia/gpu/dxbc_shader_translator_memexport.cc @@ -18,12 +18,13 @@ namespace gpu { using namespace ucode; // TODO(Triang3l): Support sub-dword memexports (like k_8 in 58410B86). This -// would require four 128 MB R8_UINT UAVs due to the Nvidia addressing limit. -// Need to be careful with resource binding tiers, however. Resource binding -// tier 1 on feature level 11_0 allows only 8 UAVs _across all stages_. -// RWByteAddressBuffer + 4 typed buffers is 5 per stage already, would need 10 -// for both VS and PS, or even 11 with the eDRAM ROV. Need to drop draw commands -// doing memexport in both VS and PS on FL 11_0 resource binding tier 1. +// would require four 128 MB R8_UINT UAVs due to +// D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP. Need to be careful with +// resource binding tiers, however. Resource binding tier 1 on feature level +// 11_0 allows only 8 UAVs _across all stages_. RWByteAddressBuffer + 4 typed +// buffers is 5 per stage already, would need 10 for both VS and PS, or even 11 +// with the eDRAM ROV. Need to drop draw commands doing memexport in both VS and +// PS on FL 11_0 resource binding tier 1. void DxbcShaderTranslator::ExportToMemory_PackFixed32( const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4],