[D3D12] Depth untiling, update depth resolve documentation

This commit is contained in:
Triang3l 2018-08-21 23:05:41 +03:00
parent da1be211eb
commit ddc8f17fa5
5 changed files with 73 additions and 17 deletions

View File

@ -922,8 +922,11 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
}
assert_true(src_texture_format != TextureFormat::kUnknown);
src_texture_format = GetBaseFormat(src_texture_format);
// The destination format is specified as k_8_8_8_8 when resolving depth,
// apparently there's no format conversion.
TextureFormat dest_format =
GetBaseFormat(TextureFormat((dest_info >> 7) & 0x3F));
is_depth ? src_texture_format
: GetBaseFormat(TextureFormat((dest_info >> 7) & 0x3F));
// Get the destination location.
uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF;
@ -946,30 +949,25 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// There are 3 paths for resolving in this function - they don't necessarily
// have to map directly to kRaw and kConvert CopyCommands.
// - Depth - tiling raw D24S8 or D24FS8 directly from the EDRAM buffer to the
// shared memory. Only 1 sample is resolved from a depth buffer, and it
// looks like format conversion can't be done when resolving depth buffers
// since k_8_8_8_8 is specified as the destination format, while the texture
// is being used as k_24_8 or k_24_8_FLOAT.
// - Raw color - when the source is single-sampled and has the same format as
// the destination, and there's no need to apply exponent bias. A regular
// EDRAM load is done to a buffer, and the buffer is then tiled to the
// shared memory. Because swapping red and blue is very common, this path
// supports swapping.
// - Depth to depth - when the source and the destination formats are
// renderable depth-stencil ones (D24S8 or D24FS8). A single sample is
// taken from the EDRAM buffer, converted between D24 and D24F if needed,
// and tiled directly to the shared memory buffer.
// - Conversion - when a simple copy is not enough. The EDRAM region is loaded
// to a render target resource, which is then used as a texture in a shader
// performing the resolve (by sampling the texture on or between pixels with
// bilinear filtering), applying exponent bias and swapping red and blue in
// a format-agnostic way, then the resulting color is written to a temporary
// RTV of the destination format. This also works for converting depth to
// 16-bit or 32-bit.
if (dest_format == TextureFormat::k_24_8 ||
dest_format == TextureFormat::k_24_8_FLOAT) {
// Depth to depth.
XELOGGPU("Resolving to a depth texture");
if (!is_depth) {
return false;
}
// TODO(Triang3l): Depth to depth.
// RTV of the destination format.
if (is_depth) {
// Depth.
// TODO(Triang3l): Resolve depth.
return false;
} else if (src_texture_format == dest_format &&
msaa_samples == MsaaSamples::k1X && dest_exp_bias == 0) {

View File

@ -0,0 +1,25 @@
#include "pixel_formats.hlsli"
#include "texture_copy.hlsli"
[numthreads(8, 32, 1)]
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
// 1 thread = 4 depth texels (24-bit float depth converted to 32-bit, can't
// read stencil in shaders anyway because it would require a separate
// DXGI_FORMAT_X32_TYPELESS_G8X24_UINT SRV).
uint3 block_index = xe_thread_id;
block_index.x <<= 2u;
[branch] if (any(block_index >= xe_texture_copy_size_blocks)) {
return;
}
uint4 block_offsets_guest =
XeTextureCopyGuestBlockOffsets(block_index, 4u, 2u);
uint4 blocks = uint4(xe_texture_copy_source.Load(block_offsets_guest.x),
xe_texture_copy_source.Load(block_offsets_guest.y),
xe_texture_copy_source.Load(block_offsets_guest.z),
xe_texture_copy_source.Load(block_offsets_guest.w));
blocks = XeByteSwap(blocks, xe_texture_copy_endianness);
uint block_offset_host = XeTextureHostLinearOffset(
block_index, xe_texture_copy_size_blocks.y, xe_texture_copy_host_pitch,
4u) + xe_texture_copy_host_base;
xe_texture_copy_dest.Store4(block_offset_host, XeFloat20e4To32(blocks >> 8u));
}

View File

@ -0,0 +1,25 @@
#include "texture_copy.hlsli"
[numthreads(8, 32, 1)]
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
// 1 thread = 4 depth texels (24-bit unorm depth converted to 32-bit, can't
// read stencil in shaders anyway because it would require a separate
// DXGI_FORMAT_X24_TYPELESS_G8_UINT SRV).
uint3 block_index = xe_thread_id;
block_index.x <<= 2u;
[branch] if (any(block_index >= xe_texture_copy_size_blocks)) {
return;
}
uint4 block_offsets_guest =
XeTextureCopyGuestBlockOffsets(block_index, 4u, 2u);
uint4 blocks = uint4(xe_texture_copy_source.Load(block_offsets_guest.x),
xe_texture_copy_source.Load(block_offsets_guest.y),
xe_texture_copy_source.Load(block_offsets_guest.z),
xe_texture_copy_source.Load(block_offsets_guest.w));
blocks = XeByteSwap(blocks, xe_texture_copy_endianness);
uint block_offset_host = XeTextureHostLinearOffset(
block_index, xe_texture_copy_size_blocks.y, xe_texture_copy_host_pitch,
4u) + xe_texture_copy_host_base;
xe_texture_copy_dest.Store4(block_offset_host,
asuint(float4(blocks >> 8u) / 16777215.0));
}

View File

@ -31,6 +31,8 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/bin/texture_load_64bpb_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/texture_load_8bpb_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/texture_load_ctx1_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_float_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_unorm_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/texture_load_dxt3a_cs.h"
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
@ -56,8 +58,10 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = {
{DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3
{DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5
{DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16_EDRAM
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8_FLOAT
// R32_FLOAT for depth because shaders would require an additional SRV to
// sample stencil, which we don't provide.
{DXGI_FORMAT_R32_FLOAT, CopyMode::kDepthUnorm}, // k_24_8
{DXGI_FORMAT_R32_FLOAT, CopyMode::kDepthFloat}, // k_24_8_FLOAT
{DXGI_FORMAT_R16_UNORM, CopyMode::k16bpb}, // k_16
{DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16
{DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16
@ -112,6 +116,8 @@ const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = {
{texture_load_128bpb_cs, sizeof(texture_load_128bpb_cs)},
{texture_load_dxt3a_cs, sizeof(texture_load_dxt3a_cs)},
{texture_load_ctx1_cs, sizeof(texture_load_ctx1_cs)},
{texture_load_depth_unorm_cs, sizeof(texture_load_depth_unorm_cs)},
{texture_load_depth_float_cs, sizeof(texture_load_depth_float_cs)},
};
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,

View File

@ -92,6 +92,8 @@ class TextureCache {
k128bpb,
kDXT3A,
kCTX1,
kDepthUnorm,
kDepthFloat,
kCount,