[D3D12] 128 megatexel limit explanation based on the spec [ci skip]

This commit is contained in:
Triang3l 2022-05-31 23:23:10 +03:00
parent 25594c918c
commit efd7ef212a
5 changed files with 20 additions and 15 deletions

View File

@ -1413,8 +1413,9 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
if (copy_dest_committed) { if (copy_dest_committed) {
// Write the descriptors and transition the resources. // Write the descriptors and transition the resources.
// Full shared memory without resolution scaling, range of the scaled // Full shared memory without resolution scaling, range of the scaled
// resolve buffer with scaling because only 128 R32 elements can be // resolve buffer with scaling because only at least 128 * 2^20 R32
// addressed on Nvidia. // elements must be addressable
// (D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP).
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_dest; ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_dest;
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_source; ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_source;
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptors[2]; ui::d3d12::util::DescriptorCpuGpuHandlePair descriptors[2];

View File

@ -76,8 +76,8 @@ class D3D12SharedMemory : public SharedMemory {
void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
// Due to the Nvidia 128 megatexel limitation, the smallest supported formats // Due to the D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP limitation, the
// are 32-bit. // smallest supported formats are 32-bit.
void WriteUintPow2SRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle, void WriteUintPow2SRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle,
uint32_t element_size_bytes_pow2); uint32_t element_size_bytes_pow2);
void WriteUintPow2UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle, void WriteUintPow2UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle,

View File

@ -1715,9 +1715,10 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
} }
// Begin loading. // Begin loading.
// May use different buffers for scaled base and mips, and also can't address // May use different buffers for scaled base and mips, and also addressability
// more than 128 megatexels directly on Nvidia - need two separate UAV // of more than 128 * 2^20 (2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP)
// descriptors for base and mips. // texels is not mandatory - need two separate UAV descriptors for base and
// mips.
// Destination. // Destination.
uint32_t descriptor_count = 1; uint32_t descriptor_count = 1;
if (texture_resolution_scaled) { if (texture_resolution_scaled) {
@ -1820,7 +1821,8 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
if (texture_resolution_scaled) { if (texture_resolution_scaled) {
// Offset already applied in the buffer because more than 512 MB can't be // Offset already applied in the buffer because more than 512 MB can't be
// directly addresses on Nvidia as R32. // directly addresses as R32 on some hardware (above
// 2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP).
load_constants.guest_offset = 0; load_constants.guest_offset = 0;
} else { } else {
load_constants.guest_offset = guest_address; load_constants.guest_offset = guest_address;

View File

@ -377,7 +377,8 @@ struct ResolveCopyShaderInfo {
// shader (at least 2). // shader (at least 2).
uint32_t source_bpe_log2; uint32_t source_bpe_log2;
// Log2 of bytes per element of the type of the destination buffer bound to // Log2 of bytes per element of the type of the destination buffer bound to
// the shader (at least 2 because of Nvidia's 128 megatexel limit that // the shader (at least 2 because of the 128 megatexel minimum requirement on
// Direct3D 10+ - D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP - that
// prevents binding the entire shared memory buffer with smaller element // prevents binding the entire shared memory buffer with smaller element
// sizes). // sizes).
uint32_t dest_bpe_log2; uint32_t dest_bpe_log2;

View File

@ -18,12 +18,13 @@ namespace gpu {
using namespace ucode; using namespace ucode;
// TODO(Triang3l): Support sub-dword memexports (like k_8 in 58410B86). This // TODO(Triang3l): Support sub-dword memexports (like k_8 in 58410B86). This
// would require four 128 MB R8_UINT UAVs due to the Nvidia addressing limit. // would require four 128 MB R8_UINT UAVs due to
// Need to be careful with resource binding tiers, however. Resource binding // D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP. Need to be careful with
// tier 1 on feature level 11_0 allows only 8 UAVs _across all stages_. // resource binding tiers, however. Resource binding tier 1 on feature level
// RWByteAddressBuffer + 4 typed buffers is 5 per stage already, would need 10 // 11_0 allows only 8 UAVs _across all stages_. RWByteAddressBuffer + 4 typed
// for both VS and PS, or even 11 with the eDRAM ROV. Need to drop draw commands // buffers is 5 per stage already, would need 10 for both VS and PS, or even 11
// doing memexport in both VS and PS on FL 11_0 resource binding tier 1. // with the eDRAM ROV. Need to drop draw commands doing memexport in both VS and
// PS on FL 11_0 resource binding tier 1.
void DxbcShaderTranslator::ExportToMemory_PackFixed32( void DxbcShaderTranslator::ExportToMemory_PackFixed32(
const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4], const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4],