[D3D12] 128 megatexel limit explanation based on the spec [ci skip]
This commit is contained in:
parent
25594c918c
commit
efd7ef212a
|
@ -1413,8 +1413,9 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
|
||||||
if (copy_dest_committed) {
|
if (copy_dest_committed) {
|
||||||
// Write the descriptors and transition the resources.
|
// Write the descriptors and transition the resources.
|
||||||
// Full shared memory without resolution scaling, range of the scaled
|
// Full shared memory without resolution scaling, range of the scaled
|
||||||
// resolve buffer with scaling because only 128 R32 elements can be
|
// resolve buffer with scaling because only at least 128 * 2^20 R32
|
||||||
// addressed on Nvidia.
|
// elements must be addressable
|
||||||
|
// (D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP).
|
||||||
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_dest;
|
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_dest;
|
||||||
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_source;
|
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_source;
|
||||||
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptors[2];
|
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptors[2];
|
||||||
|
|
|
@ -76,8 +76,8 @@ class D3D12SharedMemory : public SharedMemory {
|
||||||
|
|
||||||
void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||||
void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||||
// Due to the Nvidia 128 megatexel limitation, the smallest supported formats
|
// Due to the D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP limitation, the
|
||||||
// are 32-bit.
|
// smallest supported formats are 32-bit.
|
||||||
void WriteUintPow2SRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle,
|
void WriteUintPow2SRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle,
|
||||||
uint32_t element_size_bytes_pow2);
|
uint32_t element_size_bytes_pow2);
|
||||||
void WriteUintPow2UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle,
|
void WriteUintPow2UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle,
|
||||||
|
|
|
@ -1715,9 +1715,10 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Begin loading.
|
// Begin loading.
|
||||||
// May use different buffers for scaled base and mips, and also can't address
|
// May use different buffers for scaled base and mips, and also addressability
|
||||||
// more than 128 megatexels directly on Nvidia - need two separate UAV
|
// of more than 128 * 2^20 (2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP)
|
||||||
// descriptors for base and mips.
|
// texels is not mandatory - need two separate UAV descriptors for base and
|
||||||
|
// mips.
|
||||||
// Destination.
|
// Destination.
|
||||||
uint32_t descriptor_count = 1;
|
uint32_t descriptor_count = 1;
|
||||||
if (texture_resolution_scaled) {
|
if (texture_resolution_scaled) {
|
||||||
|
@ -1820,7 +1821,8 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
||||||
|
|
||||||
if (texture_resolution_scaled) {
|
if (texture_resolution_scaled) {
|
||||||
// Offset already applied in the buffer because more than 512 MB can't be
|
// Offset already applied in the buffer because more than 512 MB can't be
|
||||||
// directly addresses on Nvidia as R32.
|
// directly addresses as R32 on some hardware (above
|
||||||
|
// 2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP).
|
||||||
load_constants.guest_offset = 0;
|
load_constants.guest_offset = 0;
|
||||||
} else {
|
} else {
|
||||||
load_constants.guest_offset = guest_address;
|
load_constants.guest_offset = guest_address;
|
||||||
|
|
|
@ -377,7 +377,8 @@ struct ResolveCopyShaderInfo {
|
||||||
// shader (at least 2).
|
// shader (at least 2).
|
||||||
uint32_t source_bpe_log2;
|
uint32_t source_bpe_log2;
|
||||||
// Log2 of bytes per element of the type of the destination buffer bound to
|
// Log2 of bytes per element of the type of the destination buffer bound to
|
||||||
// the shader (at least 2 because of Nvidia's 128 megatexel limit that
|
// the shader (at least 2 because of the 128 megatexel minimum requirement on
|
||||||
|
// Direct3D 10+ - D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP - that
|
||||||
// prevents binding the entire shared memory buffer with smaller element
|
// prevents binding the entire shared memory buffer with smaller element
|
||||||
// sizes).
|
// sizes).
|
||||||
uint32_t dest_bpe_log2;
|
uint32_t dest_bpe_log2;
|
||||||
|
|
|
@ -18,12 +18,13 @@ namespace gpu {
|
||||||
using namespace ucode;
|
using namespace ucode;
|
||||||
|
|
||||||
// TODO(Triang3l): Support sub-dword memexports (like k_8 in 58410B86). This
|
// TODO(Triang3l): Support sub-dword memexports (like k_8 in 58410B86). This
|
||||||
// would require four 128 MB R8_UINT UAVs due to the Nvidia addressing limit.
|
// would require four 128 MB R8_UINT UAVs due to
|
||||||
// Need to be careful with resource binding tiers, however. Resource binding
|
// D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP. Need to be careful with
|
||||||
// tier 1 on feature level 11_0 allows only 8 UAVs _across all stages_.
|
// resource binding tiers, however. Resource binding tier 1 on feature level
|
||||||
// RWByteAddressBuffer + 4 typed buffers is 5 per stage already, would need 10
|
// 11_0 allows only 8 UAVs _across all stages_. RWByteAddressBuffer + 4 typed
|
||||||
// for both VS and PS, or even 11 with the eDRAM ROV. Need to drop draw commands
|
// buffers is 5 per stage already, would need 10 for both VS and PS, or even 11
|
||||||
// doing memexport in both VS and PS on FL 11_0 resource binding tier 1.
|
// with the eDRAM ROV. Need to drop draw commands doing memexport in both VS and
|
||||||
|
// PS on FL 11_0 resource binding tier 1.
|
||||||
|
|
||||||
void DxbcShaderTranslator::ExportToMemory_PackFixed32(
|
void DxbcShaderTranslator::ExportToMemory_PackFixed32(
|
||||||
const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4],
|
const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4],
|
||||||
|
|
Loading…
Reference in New Issue