[D3D12] Depth untiling, update depth resolve documentation
This commit is contained in:
parent
da1be211eb
commit
ddc8f17fa5
|
@ -922,8 +922,11 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
}
|
}
|
||||||
assert_true(src_texture_format != TextureFormat::kUnknown);
|
assert_true(src_texture_format != TextureFormat::kUnknown);
|
||||||
src_texture_format = GetBaseFormat(src_texture_format);
|
src_texture_format = GetBaseFormat(src_texture_format);
|
||||||
|
// The destination format is specified as k_8_8_8_8 when resolving depth,
|
||||||
|
// apparently there's no format conversion.
|
||||||
TextureFormat dest_format =
|
TextureFormat dest_format =
|
||||||
GetBaseFormat(TextureFormat((dest_info >> 7) & 0x3F));
|
is_depth ? src_texture_format
|
||||||
|
: GetBaseFormat(TextureFormat((dest_info >> 7) & 0x3F));
|
||||||
|
|
||||||
// Get the destination location.
|
// Get the destination location.
|
||||||
uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF;
|
uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF;
|
||||||
|
@ -946,30 +949,25 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
|
|
||||||
// There are 3 paths for resolving in this function - they don't necessarily
|
// There are 3 paths for resolving in this function - they don't necessarily
|
||||||
// have to map directly to kRaw and kConvert CopyCommands.
|
// have to map directly to kRaw and kConvert CopyCommands.
|
||||||
|
// - Depth - tiling raw D24S8 or D24FS8 directly from the EDRAM buffer to the
|
||||||
|
// shared memory. Only 1 sample is resolved from a depth buffer, and it
|
||||||
|
// looks like format conversion can't be done when resolving depth buffers
|
||||||
|
// since k_8_8_8_8 is specified as the destination format, while the texture
|
||||||
|
// is being used as k_24_8 or k_24_8_FLOAT.
|
||||||
// - Raw color - when the source is single-sampled and has the same format as
|
// - Raw color - when the source is single-sampled and has the same format as
|
||||||
// the destination, and there's no need to apply exponent bias. A regular
|
// the destination, and there's no need to apply exponent bias. A regular
|
||||||
// EDRAM load is done to a buffer, and the buffer is then tiled to the
|
// EDRAM load is done to a buffer, and the buffer is then tiled to the
|
||||||
// shared memory. Because swapping red and blue is very common, this path
|
// shared memory. Because swapping red and blue is very common, this path
|
||||||
// supports swapping.
|
// supports swapping.
|
||||||
// - Depth to depth - when the source and the destination formats are
|
|
||||||
// renderable depth-stencil ones (D24S8 or D24FS8). A single sample is
|
|
||||||
// taken from the EDRAM buffer, converted between D24 and D24F if needed,
|
|
||||||
// and tiled directly to the shared memory buffer.
|
|
||||||
// - Conversion - when a simple copy is not enough. The EDRAM region is loaded
|
// - Conversion - when a simple copy is not enough. The EDRAM region is loaded
|
||||||
// to a render target resource, which is then used as a texture in a shader
|
// to a render target resource, which is then used as a texture in a shader
|
||||||
// performing the resolve (by sampling the texture on or between pixels with
|
// performing the resolve (by sampling the texture on or between pixels with
|
||||||
// bilinear filtering), applying exponent bias and swapping red and blue in
|
// bilinear filtering), applying exponent bias and swapping red and blue in
|
||||||
// a format-agnostic way, then the resulting color is written to a temporary
|
// a format-agnostic way, then the resulting color is written to a temporary
|
||||||
// RTV of the destination format. This also works for converting depth to
|
// RTV of the destination format.
|
||||||
// 16-bit or 32-bit.
|
if (is_depth) {
|
||||||
if (dest_format == TextureFormat::k_24_8 ||
|
// Depth.
|
||||||
dest_format == TextureFormat::k_24_8_FLOAT) {
|
// TODO(Triang3l): Resolve depth.
|
||||||
// Depth to depth.
|
|
||||||
XELOGGPU("Resolving to a depth texture");
|
|
||||||
if (!is_depth) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// TODO(Triang3l): Depth to depth.
|
|
||||||
return false;
|
return false;
|
||||||
} else if (src_texture_format == dest_format &&
|
} else if (src_texture_format == dest_format &&
|
||||||
msaa_samples == MsaaSamples::k1X && dest_exp_bias == 0) {
|
msaa_samples == MsaaSamples::k1X && dest_exp_bias == 0) {
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
#include "pixel_formats.hlsli"
|
||||||
|
#include "texture_copy.hlsli"
|
||||||
|
|
||||||
|
[numthreads(8, 32, 1)]
|
||||||
|
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
|
// 1 thread = 4 depth texels (24-bit float depth converted to 32-bit, can't
|
||||||
|
// read stencil in shaders anyway because it would require a separate
|
||||||
|
// DXGI_FORMAT_X32_TYPELESS_G8X24_UINT SRV).
|
||||||
|
uint3 block_index = xe_thread_id;
|
||||||
|
block_index.x <<= 2u;
|
||||||
|
[branch] if (any(block_index >= xe_texture_copy_size_blocks)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint4 block_offsets_guest =
|
||||||
|
XeTextureCopyGuestBlockOffsets(block_index, 4u, 2u);
|
||||||
|
uint4 blocks = uint4(xe_texture_copy_source.Load(block_offsets_guest.x),
|
||||||
|
xe_texture_copy_source.Load(block_offsets_guest.y),
|
||||||
|
xe_texture_copy_source.Load(block_offsets_guest.z),
|
||||||
|
xe_texture_copy_source.Load(block_offsets_guest.w));
|
||||||
|
blocks = XeByteSwap(blocks, xe_texture_copy_endianness);
|
||||||
|
uint block_offset_host = XeTextureHostLinearOffset(
|
||||||
|
block_index, xe_texture_copy_size_blocks.y, xe_texture_copy_host_pitch,
|
||||||
|
4u) + xe_texture_copy_host_base;
|
||||||
|
xe_texture_copy_dest.Store4(block_offset_host, XeFloat20e4To32(blocks >> 8u));
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
#include "texture_copy.hlsli"
|
||||||
|
|
||||||
|
[numthreads(8, 32, 1)]
|
||||||
|
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
|
// 1 thread = 4 depth texels (24-bit unorm depth converted to 32-bit, can't
|
||||||
|
// read stencil in shaders anyway because it would require a separate
|
||||||
|
// DXGI_FORMAT_X24_TYPELESS_G8_UINT SRV).
|
||||||
|
uint3 block_index = xe_thread_id;
|
||||||
|
block_index.x <<= 2u;
|
||||||
|
[branch] if (any(block_index >= xe_texture_copy_size_blocks)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint4 block_offsets_guest =
|
||||||
|
XeTextureCopyGuestBlockOffsets(block_index, 4u, 2u);
|
||||||
|
uint4 blocks = uint4(xe_texture_copy_source.Load(block_offsets_guest.x),
|
||||||
|
xe_texture_copy_source.Load(block_offsets_guest.y),
|
||||||
|
xe_texture_copy_source.Load(block_offsets_guest.z),
|
||||||
|
xe_texture_copy_source.Load(block_offsets_guest.w));
|
||||||
|
blocks = XeByteSwap(blocks, xe_texture_copy_endianness);
|
||||||
|
uint block_offset_host = XeTextureHostLinearOffset(
|
||||||
|
block_index, xe_texture_copy_size_blocks.y, xe_texture_copy_host_pitch,
|
||||||
|
4u) + xe_texture_copy_host_base;
|
||||||
|
xe_texture_copy_dest.Store4(block_offset_host,
|
||||||
|
asuint(float4(blocks >> 8u) / 16777215.0));
|
||||||
|
}
|
|
@ -31,6 +31,8 @@ namespace d3d12 {
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/texture_load_64bpb_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/texture_load_64bpb_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/texture_load_8bpb_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/texture_load_8bpb_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/texture_load_ctx1_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/texture_load_ctx1_cs.h"
|
||||||
|
#include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_float_cs.h"
|
||||||
|
#include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_unorm_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/texture_load_dxt3a_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/texture_load_dxt3a_cs.h"
|
||||||
|
|
||||||
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
||||||
|
@ -56,8 +58,10 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
||||||
{DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3
|
{DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3
|
||||||
{DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5
|
{DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5
|
||||||
{DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16_EDRAM
|
{DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16_EDRAM
|
||||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8
|
// R32_FLOAT for depth because shaders would require an additional SRV to
|
||||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8_FLOAT
|
// sample stencil, which we don't provide.
|
||||||
|
{DXGI_FORMAT_R32_FLOAT, CopyMode::kDepthUnorm}, // k_24_8
|
||||||
|
{DXGI_FORMAT_R32_FLOAT, CopyMode::kDepthFloat}, // k_24_8_FLOAT
|
||||||
{DXGI_FORMAT_R16_UNORM, CopyMode::k16bpb}, // k_16
|
{DXGI_FORMAT_R16_UNORM, CopyMode::k16bpb}, // k_16
|
||||||
{DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16
|
{DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16
|
||||||
{DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16
|
{DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16
|
||||||
|
@ -112,6 +116,8 @@ const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = {
|
||||||
{texture_load_128bpb_cs, sizeof(texture_load_128bpb_cs)},
|
{texture_load_128bpb_cs, sizeof(texture_load_128bpb_cs)},
|
||||||
{texture_load_dxt3a_cs, sizeof(texture_load_dxt3a_cs)},
|
{texture_load_dxt3a_cs, sizeof(texture_load_dxt3a_cs)},
|
||||||
{texture_load_ctx1_cs, sizeof(texture_load_ctx1_cs)},
|
{texture_load_ctx1_cs, sizeof(texture_load_ctx1_cs)},
|
||||||
|
{texture_load_depth_unorm_cs, sizeof(texture_load_depth_unorm_cs)},
|
||||||
|
{texture_load_depth_float_cs, sizeof(texture_load_depth_float_cs)},
|
||||||
};
|
};
|
||||||
|
|
||||||
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
||||||
|
|
|
@ -92,6 +92,8 @@ class TextureCache {
|
||||||
k128bpb,
|
k128bpb,
|
||||||
kDXT3A,
|
kDXT3A,
|
||||||
kCTX1,
|
kCTX1,
|
||||||
|
kDepthUnorm,
|
||||||
|
kDepthFloat,
|
||||||
|
|
||||||
kCount,
|
kCount,
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue