From 2c6224ad3731666a74548f4dec64bda15714dd00 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Aug 2018 01:16:35 +0300 Subject: [PATCH] [D3D12] 32bpp tiling shader --- src/xenia/gpu/d3d12/render_target_cache.cc | 48 +- src/xenia/gpu/d3d12/render_target_cache.h | 4 + src/xenia/gpu/d3d12/shaders/byte_swap.hlsli | 12 +- src/xenia/gpu/d3d12/shaders/resolve.ps.hlsl | 2 +- .../gpu/d3d12/shaders/texture_copy.hlsli | 2 +- .../gpu/d3d12/shaders/texture_tile.hlsli | 26 + .../d3d12/shaders/texture_tile_32bpp.cs.hlsl | 31 ++ src/xenia/gpu/d3d12/texture_cache.cc | 453 +++++++++++++----- src/xenia/gpu/d3d12/texture_cache.h | 64 ++- 9 files changed, 492 insertions(+), 150 deletions(-) create mode 100644 src/xenia/gpu/d3d12/shaders/texture_tile.hlsli create mode 100644 src/xenia/gpu/d3d12/shaders/texture_tile_32bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 9bf9b9a0a..32e103fb1 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -1208,7 +1208,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, D3D12_RESOURCE_STATES copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer( - render_target->copy_buffer_size, copy_buffer_state); + std::max(render_target->copy_buffer_size, + resolve_target->copy_buffer_size), + copy_buffer_state); if (copy_buffer == nullptr) { return false; } @@ -1289,10 +1291,6 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, nullptr); - // Done with the copy buffer. - - command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); - // Do the resolve. Render targets unbound already, safe to call // OMSetRenderTargets. @@ -1417,7 +1415,40 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); command_list->DrawInstanced(3, 1, 0, 0); - // TODO(Triang3l): Tile the resolve target in the texture cache. + // Copy the resolve target to the buffer. + + command_processor_->PushTransitionBarrier(resolve_target->resource, + resolve_target->state, + D3D12_RESOURCE_STATE_COPY_SOURCE); + resolve_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE; + command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state, + D3D12_RESOURCE_STATE_COPY_DEST); + copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST; + command_processor_->SubmitBarriers(); + location_source.pResource = resolve_target->resource; + location_source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + location_source.SubresourceIndex = 0; + location_dest.pResource = copy_buffer; + location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + location_dest.PlacedFootprint = resolve_target->footprint; + command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, + nullptr); + + // Tile the resolved texture. The texture cache expects the buffer to be a + // non-pixel-shader SRV. + + command_processor_->PushTransitionBarrier( + copy_buffer, copy_buffer_state, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + texture_cache->TileResolvedTexture( + dest_format, dest_address, dest_pitch, dest_height, copy_width, + copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size, + resolve_target->footprint); + + // Done with the copy buffer. + + command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); } return true; @@ -1566,6 +1597,11 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( resolve_target->rtv_handle.ptr = rtv_handle.ptr; resolve_target->key.value = key.value; resolve_target->heap_page_first = min_heap_page_first; + UINT64 copy_buffer_size; + device->GetCopyableFootprints(&resource_desc, 0, 1, 0, + &resolve_target->footprint, nullptr, nullptr, + ©_buffer_size); + resolve_target->copy_buffer_size = uint32_t(copy_buffer_size); resolve_targets_.insert(std::make_pair(key.value, resolve_target)); return resolve_target; diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index e3f524f40..dbd3a4b08 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -343,7 +343,11 @@ class RenderTargetCache { D3D12_RESOURCE_STATES state; D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle; ResolveTargetKey key; + // The first 4 MB page in the heaps. uint32_t heap_page_first; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; + // Buffer size needed to copy the resolve target to a linear buffer. + uint32_t copy_buffer_size; }; void ClearBindings(); diff --git a/src/xenia/gpu/d3d12/shaders/byte_swap.hlsli b/src/xenia/gpu/d3d12/shaders/byte_swap.hlsli index 1cdd55ae1..d26fa5b09 100644 --- a/src/xenia/gpu/d3d12/shaders/byte_swap.hlsli +++ b/src/xenia/gpu/d3d12/shaders/byte_swap.hlsli @@ -32,18 +32,26 @@ XE_BYTE_SWAP_16_OVERLOAD(uint3) XE_BYTE_SWAP_16_OVERLOAD(uint4) uint2 XeByteSwap64(uint2 v, uint endian) { - if (endian & 4u) { + if ((endian & 4u) != 0u) { v = v.yx; endian = 2u; } return XeByteSwap(v, endian); } uint4 XeByteSwap64(uint4 v, uint endian) { - if (endian & 4u) { + if ((endian & 4u) != 0u) { v = v.yxwz; endian = 2u; } return XeByteSwap(v, endian); } +uint4 XeByteSwap128(uint4 v, uint endian) { + if ((endian & 4u) != 0u) { + v = ((endian & 1u) != 0u) ? v.wzyx /* 8in128 */ : v.yxwz /* 8in64 */; + endian = 2u; + } + return XeByteSwap(v, endian); +} + #endif // XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_ diff --git a/src/xenia/gpu/d3d12/shaders/resolve.ps.hlsl b/src/xenia/gpu/d3d12/shaders/resolve.ps.hlsl index b4a79e16e..eb87264d3 100644 --- a/src/xenia/gpu/d3d12/shaders/resolve.ps.hlsl +++ b/src/xenia/gpu/d3d12/shaders/resolve.ps.hlsl @@ -1,4 +1,4 @@ -cbuffer XeResolveCbuffer : register(b0) { +cbuffer XeResolveConstants : register(b0) { // In samples. // Left and top in the lower 16 bits, width and height in the upper. uint2 xe_resolve_rect_samples; diff --git a/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli b/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli index 9908633f6..76194a2df 100644 --- a/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli +++ b/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli @@ -4,7 +4,7 @@ #include "byte_swap.hlsli" #include "texture_address.hlsli" -cbuffer xe_texture_copy_constants : register(b0) { +cbuffer XeTextureCopyConstants : register(b0) { uint xe_texture_copy_guest_base; // For linear textures - row byte pitch. uint xe_texture_copy_guest_pitch; diff --git a/src/xenia/gpu/d3d12/shaders/texture_tile.hlsli b/src/xenia/gpu/d3d12/shaders/texture_tile.hlsli new file mode 100644 index 000000000..260248676 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_tile.hlsli @@ -0,0 +1,26 @@ +#ifndef XENIA_GPU_D3D12_SHADERS_TEXTURE_TILE_HLSLI_ +#define XENIA_GPU_D3D12_SHADERS_TEXTURE_TILE_HLSLI_ + +#include "byte_swap.hlsli" +#include "texture_address.hlsli" + +cbuffer XeTextureTileConstants : register(b0) { + // Either from the start of the shared memory or from the start of the typed + // UAV, in bytes. + uint xe_texture_tile_guest_base; + // 0:2 - endianness (up to Xin128). + // 3:31 - actual guest texture width. + uint xe_texture_tile_endian_guest_pitch; + // Size to copy, texels with index bigger than this won't be written. + // Width in the lower 16 bits, height in the upper. + uint xe_texture_tile_size; + // Byte offset to the first texel from the beginning of the source buffer. + uint xe_texture_tile_host_base; + // Row pitch of the source buffer. + uint xe_texture_tile_host_pitch; +} + +ByteAddressBuffer xe_texture_tile_source : register(t0); +// The target is u0, may be a raw UAV or a typed UAV depending on the format. + +#endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_TILE_HLSLI_ diff --git a/src/xenia/gpu/d3d12/shaders/texture_tile_32bpp.cs.hlsl b/src/xenia/gpu/d3d12/shaders/texture_tile_32bpp.cs.hlsl new file mode 100644 index 000000000..890746dbc --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_tile_32bpp.cs.hlsl @@ -0,0 +1,31 @@ +#include "texture_tile.hlsli" + +RWByteAddressBuffer xe_texture_tile_dest : register(u0); + +[numthreads(8, 32, 1)] +void main(uint3 xe_thread_id : SV_DispatchThreadID) { + // 1 thread = 4 texels. + uint2 texture_size = (xe_texture_tile_size >> uint2(0u, 16u)) & 0xFFFFu; + uint2 texel_index = xe_thread_id.xy; + texel_index.x <<= 2u; + [branch] if (any(texel_index >= texture_size)) { + return; + } + uint4 texels = xe_texture_tile_source.Load4( + xe_texture_tile_host_base + texel_index.y * xe_texture_tile_host_pitch + + texel_index.x * 4u); + texels = XeByteSwap(texels, xe_texture_tile_endian_guest_pitch & 7u); + uint4 texel_addresses = xe_texture_tile_guest_base + XeTextureTiledOffset2D( + texel_index, xe_texture_tile_endian_guest_pitch >> 3u, 2u); + xe_texture_tile_dest.Store(texel_addresses.x, texels.x); + bool3 texels_inside = uint3(1u, 2u, 3u) + texel_index.x < texture_size.x; + [branch] if (texels_inside.x) { + xe_texture_tile_dest.Store(texel_addresses.y, texels.y); + [branch] if (texels_inside.y) { + xe_texture_tile_dest.Store(texel_addresses.z, texels.z); + [branch] if (texels_inside.z) { + xe_texture_tile_dest.Store(texel_addresses.w, texels.w); + } + } + } +} diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 0ba30d418..becc12a7f 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -34,81 +34,145 @@ namespace d3d12 { #include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_float_cs.h" #include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_unorm_cs.h" #include "xenia/gpu/d3d12/shaders/bin/texture_load_dxt3a_cs.h" +#include "xenia/gpu/d3d12/shaders/bin/texture_tile_32bpp_cs.h" const TextureCache::HostFormat TextureCache::host_formats_[64] = { - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_REVERSE - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1 - {DXGI_FORMAT_R8_UNORM, CopyMode::k8bpb}, // k_8 - {DXGI_FORMAT_B5G5R5A1_UNORM, CopyMode::k16bpb}, // k_1_5_5_5 - {DXGI_FORMAT_B5G6R5_UNORM, CopyMode::k16bpb}, // k_5_6_5 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_6_5_5 - {DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8 - {DXGI_FORMAT_R10G10B10A2_UNORM, CopyMode::k32bpb}, // k_2_10_10_10 - {DXGI_FORMAT_R8_UNORM, CopyMode::k8bpb}, // k_8_A - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_B - {DXGI_FORMAT_R8G8_UNORM, CopyMode::k16bpb}, // k_8_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Cr_Y1_Cb_Y0_REP - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Y1_Cr_Y0_Cb_REP - {DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16_EDRAM - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_A - {DXGI_FORMAT_B4G4R4A4_UNORM, CopyMode::k16bpb}, // k_4_4_4_4 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10 - {DXGI_FORMAT_BC1_UNORM, CopyMode::k64bpb}, // k_DXT1 - {DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3 - {DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5 - {DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16_EDRAM + // k_1_REVERSE + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_1 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_8 + {DXGI_FORMAT_R8_UNORM, LoadMode::k8bpb, TileMode::kUnknown}, + // k_1_5_5_5 + {DXGI_FORMAT_B5G5R5A1_UNORM, LoadMode::k16bpb, TileMode::kUnknown}, + // k_5_6_5 + {DXGI_FORMAT_B5G6R5_UNORM, LoadMode::k16bpb, TileMode::kUnknown}, + // k_6_5_5 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_8_8_8_8 + {DXGI_FORMAT_R8G8B8A8_UNORM, LoadMode::k32bpb, TileMode::k32bpp}, + // k_2_10_10_10 + {DXGI_FORMAT_R10G10B10A2_UNORM, LoadMode::k32bpb, TileMode::kUnknown}, + // k_8_A + {DXGI_FORMAT_R8_UNORM, LoadMode::k8bpb, TileMode::kUnknown}, + // k_8_B + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_8_8 + {DXGI_FORMAT_R8G8_UNORM, LoadMode::k16bpb, TileMode::kUnknown}, + // k_Cr_Y1_Cb_Y0_REP + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_Y1_Cr_Y0_Cb_REP + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_16_16_EDRAM + {DXGI_FORMAT_R16G16_UNORM, LoadMode::k32bpb, TileMode::kUnknown}, + // k_8_8_8_8_A + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_4_4_4_4 + {DXGI_FORMAT_B4G4R4A4_UNORM, LoadMode::k16bpb, TileMode::kUnknown}, + // k_10_11_11 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_11_11_10 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_DXT1 + {DXGI_FORMAT_BC1_UNORM, LoadMode::k64bpb, TileMode::kUnknown}, + // k_DXT2_3 + {DXGI_FORMAT_BC2_UNORM, LoadMode::k128bpb, TileMode::kUnknown}, + // k_DXT4_5 + {DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, TileMode::kUnknown}, + // k_16_16_16_16_EDRAM + {DXGI_FORMAT_R16G16B16A16_UNORM, LoadMode::k64bpb, TileMode::kUnknown}, // R32_FLOAT for depth because shaders would require an additional SRV to // sample stencil, which we don't provide. - {DXGI_FORMAT_R32_FLOAT, CopyMode::kDepthUnorm}, // k_24_8 - {DXGI_FORMAT_R32_FLOAT, CopyMode::kDepthFloat}, // k_24_8_FLOAT - {DXGI_FORMAT_R16_UNORM, CopyMode::k16bpb}, // k_16 - {DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16 - {DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16 - {DXGI_FORMAT_R16_FLOAT, CopyMode::k16bpb}, // k_16_EXPAND - {DXGI_FORMAT_R16G16_FLOAT, CopyMode::k32bpb}, // k_16_16_EXPAND - {DXGI_FORMAT_R16G16B16A16_FLOAT, CopyMode::k64bpb}, // k_16_16_16_16_EXPAND - {DXGI_FORMAT_R16_FLOAT, CopyMode::k16bpb}, // k_16_FLOAT - {DXGI_FORMAT_R16G16_FLOAT, CopyMode::k32bpb}, // k_16_16_FLOAT - {DXGI_FORMAT_R16G16B16A16_FLOAT, CopyMode::k64bpb}, // k_16_16_16_16_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32 - {DXGI_FORMAT_R32_FLOAT, CopyMode::k32bpb}, // k_32_FLOAT - {DXGI_FORMAT_R32G32_FLOAT, CopyMode::k64bpb}, // k_32_32_FLOAT - {DXGI_FORMAT_R32G32B32A32_FLOAT, CopyMode::k128bpb}, // k_32_32_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG_INTERLACED - {DXGI_FORMAT_BC5_UNORM, CopyMode::k128bpb}, // k_DXN - {DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8_AS_16_16_16_16 - {DXGI_FORMAT_BC1_UNORM, CopyMode::k64bpb}, // k_DXT1_AS_16_16_16_16 - {DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3_AS_16_16_16_16 - {DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5_AS_16_16_16_16 - {DXGI_FORMAT_R10G10B10A2_UNORM, - CopyMode::k32bpb}, // k_2_10_10_10_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_FLOAT - {DXGI_FORMAT_BC2_UNORM, CopyMode::kDXT3A}, // k_DXT3A - {DXGI_FORMAT_BC4_UNORM, CopyMode::k64bpb}, // k_DXT5A - {DXGI_FORMAT_R8G8_UNORM, CopyMode::kCTX1}, // k_CTX1 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A_AS_1_1_1_1 - {DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8_GAMMA - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_FLOAT_EDRAM + // k_24_8 + {DXGI_FORMAT_R32_FLOAT, LoadMode::kDepthUnorm, TileMode::kUnknown}, + // k_24_8_FLOAT + {DXGI_FORMAT_R32_FLOAT, LoadMode::kDepthFloat, TileMode::kUnknown}, + // k_16 + {DXGI_FORMAT_R16_UNORM, LoadMode::k16bpb, TileMode::kUnknown}, + // k_16_16 + {DXGI_FORMAT_R16G16_UNORM, LoadMode::k32bpb, TileMode::kUnknown}, + // k_16_16_16_16 + {DXGI_FORMAT_R16G16B16A16_UNORM, LoadMode::k64bpb, TileMode::kUnknown}, + // k_16_EXPAND + {DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, TileMode::kUnknown}, + // k_16_16_EXPAND + {DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, TileMode::kUnknown}, + // k_16_16_16_16_EXPAND + {DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::k64bpb, TileMode::kUnknown}, + // k_16_FLOAT + {DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, TileMode::kUnknown}, + // k_16_16_FLOAT + {DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, TileMode::kUnknown}, + // k_16_16_16_16_FLOAT + {DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::k64bpb, TileMode::kUnknown}, + // k_32 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_32_32 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_32_32_32_32 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_32_FLOAT + {DXGI_FORMAT_R32_FLOAT, LoadMode::k32bpb, TileMode::kUnknown}, + // k_32_32_FLOAT + {DXGI_FORMAT_R32G32_FLOAT, LoadMode::k64bpb, TileMode::kUnknown}, + // k_32_32_32_32_FLOAT + {DXGI_FORMAT_R32G32B32A32_FLOAT, LoadMode::k128bpb, TileMode::kUnknown}, + // k_32_AS_8 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_32_AS_8_8 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_16_MPEG + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_16_16_MPEG + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_32_AS_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_32_AS_8_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_16_INTERLACED + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_16_MPEG_INTERLACED + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_16_16_MPEG_INTERLACED + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_DXN + {DXGI_FORMAT_BC5_UNORM, LoadMode::k128bpb, TileMode::kUnknown}, + // k_8_8_8_8_AS_16_16_16_16 + {DXGI_FORMAT_R8G8B8A8_UNORM, LoadMode::k32bpb, TileMode::kUnknown}, + // k_DXT1_AS_16_16_16_16 + {DXGI_FORMAT_BC1_UNORM, LoadMode::k64bpb, TileMode::kUnknown}, + // k_DXT2_3_AS_16_16_16_16 + {DXGI_FORMAT_BC2_UNORM, LoadMode::k128bpb, TileMode::kUnknown}, + // k_DXT4_5_AS_16_16_16_16 + {DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, TileMode::kUnknown}, + // k_2_10_10_10_AS_16_16_16_16 + {DXGI_FORMAT_R10G10B10A2_UNORM, LoadMode::k32bpb, TileMode::kUnknown}, + // k_10_11_11_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_11_11_10_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_32_32_32_FLOAT + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_DXT3A + {DXGI_FORMAT_BC2_UNORM, LoadMode::kDXT3A, TileMode::kUnknown}, + // k_DXT5A + {DXGI_FORMAT_BC4_UNORM, LoadMode::k64bpb, TileMode::kUnknown}, + // k_CTX1 + {DXGI_FORMAT_R8G8_UNORM, LoadMode::kCTX1, TileMode::kUnknown}, + // k_DXT3A_AS_1_1_1_1 + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, + // k_8_8_8_8_GAMMA + {DXGI_FORMAT_R8G8B8A8_UNORM, LoadMode::k32bpb, TileMode::kUnknown}, + // k_2_10_10_10_FLOAT_EDRAM + {DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown}, }; const char* const TextureCache::dimension_names_[4] = {"1D", "2D", "3D", "cube"}; -const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = { +const TextureCache::LoadModeInfo TextureCache::load_mode_info_[] = { {texture_load_8bpb_cs, sizeof(texture_load_8bpb_cs)}, {texture_load_16bpb_cs, sizeof(texture_load_16bpb_cs)}, {texture_load_32bpb_cs, sizeof(texture_load_32bpb_cs)}, @@ -120,6 +184,10 @@ const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = { {texture_load_depth_float_cs, sizeof(texture_load_depth_float_cs)}, }; +const TextureCache::TileModeInfo TextureCache::tile_mode_info_[] = { + {texture_tile_32bpp_cs, sizeof(texture_tile_32bpp_cs)}, +}; + TextureCache::TextureCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file, SharedMemory* shared_memory) @@ -133,7 +201,7 @@ bool TextureCache::Initialize() { auto device = command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); - // Create the copying root signature. + // Create the loading root signature. D3D12_ROOT_PARAMETER root_parameters[2]; // Parameter 0 is constants (changed very often when untiling). root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; @@ -167,7 +235,7 @@ bool TextureCache::Initialize() { if (FAILED(D3D12SerializeRootSignature( &root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1, &root_signature_blob, &root_signature_error_blob))) { - XELOGE("Failed to serialize the texture copying root signature"); + XELOGE("Failed to serialize the texture loading root signature"); if (root_signature_error_blob != nullptr) { XELOGE("%s", reinterpret_cast( root_signature_error_blob->GetBufferPointer())); @@ -178,36 +246,80 @@ bool TextureCache::Initialize() { } if (root_signature_error_blob != nullptr) { root_signature_error_blob->Release(); + root_signature_error_blob = nullptr; } if (FAILED(device->CreateRootSignature( 0, root_signature_blob->GetBufferPointer(), root_signature_blob->GetBufferSize(), - IID_PPV_ARGS(©_root_signature_)))) { - XELOGE("Failed to create the texture copying root signature"); + IID_PPV_ARGS(&load_root_signature_)))) { + XELOGE("Failed to create the texture loading root signature"); + root_signature_blob->Release(); + Shutdown(); + return false; + } + root_signature_blob->Release(); + // Create the tiling root signature (almost the same, but with root constants + // in parameter 0). + root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + root_parameters[0].Constants.ShaderRegister = 0; + root_parameters[0].Constants.RegisterSpace = 0; + root_parameters[0].Constants.Num32BitValues = + sizeof(TileConstants) / sizeof(uint32_t); + if (FAILED(D3D12SerializeRootSignature( + &root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1, + &root_signature_blob, &root_signature_error_blob))) { + XELOGE("Failed to serialize the texture tiling root signature"); + if (root_signature_error_blob != nullptr) { + XELOGE("%s", reinterpret_cast( + root_signature_error_blob->GetBufferPointer())); + root_signature_error_blob->Release(); + } + Shutdown(); + return false; + } + if (root_signature_error_blob != nullptr) { + root_signature_error_blob->Release(); + root_signature_error_blob = nullptr; + } + if (FAILED(device->CreateRootSignature( + 0, root_signature_blob->GetBufferPointer(), + root_signature_blob->GetBufferSize(), + IID_PPV_ARGS(&tile_root_signature_)))) { + XELOGE("Failed to create the texture tiling root signature"); root_signature_blob->Release(); Shutdown(); return false; } root_signature_blob->Release(); - // Create the copying pipelines. + // Create the loading and tiling pipelines. D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc; - pipeline_desc.pRootSignature = copy_root_signature_; + pipeline_desc.pRootSignature = load_root_signature_; pipeline_desc.NodeMask = 0; pipeline_desc.CachedPSO.pCachedBlob = nullptr; pipeline_desc.CachedPSO.CachedBlobSizeInBytes = 0; pipeline_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; - for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) { - const CopyModeInfo& mode_info = copy_mode_info_[i]; - if (mode_info.load_shader != nullptr) { - pipeline_desc.CS.pShaderBytecode = mode_info.load_shader; - pipeline_desc.CS.BytecodeLength = mode_info.load_shader_size; - if (FAILED(device->CreateComputePipelineState( - &pipeline_desc, IID_PPV_ARGS(©_load_pipelines_[i])))) { - XELOGE("Failed to create the texture copying pipeline for mode %u", i); - Shutdown(); - return false; - } + for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { + const LoadModeInfo& mode_info = load_mode_info_[i]; + pipeline_desc.CS.pShaderBytecode = mode_info.shader; + pipeline_desc.CS.BytecodeLength = mode_info.shader_size; + if (FAILED(device->CreateComputePipelineState( + &pipeline_desc, IID_PPV_ARGS(&load_pipelines_[i])))) { + XELOGE("Failed to create the texture loading pipeline for mode %u", i); + Shutdown(); + return false; + } + } + pipeline_desc.pRootSignature = tile_root_signature_; + for (uint32_t i = 0; i < uint32_t(TileMode::kCount); ++i) { + const TileModeInfo& mode_info = tile_mode_info_[i]; + pipeline_desc.CS.pShaderBytecode = mode_info.shader; + pipeline_desc.CS.BytecodeLength = mode_info.shader_size; + if (FAILED(device->CreateComputePipelineState( + &pipeline_desc, IID_PPV_ARGS(&tile_pipelines_[i])))) { + XELOGE("Failed to create the texture tiling pipeline for mode %u", i); + Shutdown(); + return false; } } @@ -217,15 +329,25 @@ bool TextureCache::Initialize() { void TextureCache::Shutdown() { ClearCache(); - for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) { - if (copy_load_pipelines_[i] != nullptr) { - copy_load_pipelines_[i]->Release(); - copy_load_pipelines_[i] = nullptr; + for (uint32_t i = 0; i < uint32_t(TileMode::kCount); ++i) { + if (tile_pipelines_[i] != nullptr) { + tile_pipelines_[i]->Release(); + tile_pipelines_[i] = nullptr; } } - if (copy_root_signature_ != nullptr) { - copy_root_signature_->Release(); - copy_root_signature_ = nullptr; + if (tile_root_signature_ != nullptr) { + tile_root_signature_->Release(); + tile_root_signature_ = nullptr; + } + for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { + if (load_pipelines_[i] != nullptr) { + load_pipelines_[i]->Release(); + load_pipelines_[i] = nullptr; + } + } + if (load_root_signature_ != nullptr) { + load_root_signature_->Release(); + load_root_signature_ = nullptr; } } @@ -451,14 +573,87 @@ void TextureCache::WriteSampler(uint32_t fetch_constant, } DXGI_FORMAT TextureCache::GetResolveDXGIFormat(TextureFormat format) { - // TODO(Triang3l): Change this to a check whether there is a tiling pipeline. - switch (format) { - case TextureFormat::k_8_8_8_8: - return host_formats_[uint32_t(format)].dxgi_format; - default: - break; + const HostFormat& host_format = host_formats_[uint32_t(format)]; + return host_format.tile_mode != TileMode::kUnknown ? host_format.dxgi_format + : DXGI_FORMAT_UNKNOWN; +} + +bool TextureCache::TileResolvedTexture( + TextureFormat format, uint32_t texture_base, uint32_t texture_pitch, + uint32_t texture_height, uint32_t resolve_width, uint32_t resolve_height, + Endian128 endian, ID3D12Resource* buffer, uint32_t buffer_size, + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint) { + TileMode tile_mode = host_formats_[uint32_t(format)].tile_mode; + if (tile_mode == TileMode::kUnknown) { + assert_always(); + return false; } - return DXGI_FORMAT_UNKNOWN; + + auto command_list = command_processor_->GetCurrentCommandList(); + if (command_list == nullptr) { + return false; + } + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); + auto device = provider->GetDevice(); + + texture_base &= 0x1FFFFFFF; + // TODO(Triang3l): Allow smaller alignment for 8- and 16-bit textures (but + // probably not really needed). + assert_false(texture_base & 0x3); + + // Calculate the texture size for memory operations and ensure we can write to + // the specified shared memory location. + uint32_t texture_size = texture_util::GetGuestMipStorageSize( + xe::align(texture_pitch, 32u), xe::align(texture_height, 32u), 1, true, + format, nullptr); + if (!shared_memory_->MakeTilesResident(texture_base, texture_size)) { + return false; + } + + // Tile the texture. + // TODO(Triang3l): Typed UAVs for 8- and 16-bit textures. + D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; + D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; + if (command_processor_->RequestViewDescriptors(0, 2, 2, descriptor_cpu_start, + descriptor_gpu_start) == 0) { + return false; + } + shared_memory_->UseForWriting(); + command_processor_->SubmitBarriers(); + command_list->SetComputeRootSignature(tile_root_signature_); + TileConstants tile_constants; + tile_constants.guest_base = texture_base; + tile_constants.endian_guest_pitch = uint32_t(endian) | (texture_pitch << 3); + tile_constants.size = resolve_width | (resolve_height << 16); + tile_constants.host_base = uint32_t(footprint.Offset); + tile_constants.host_pitch = uint32_t(footprint.Footprint.RowPitch); + command_list->SetComputeRoot32BitConstants( + 0, sizeof(tile_constants) / sizeof(uint32_t), &tile_constants, 0); + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc; + srv_desc.Format = DXGI_FORMAT_R32_TYPELESS; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Buffer.FirstElement = 0; + srv_desc.Buffer.NumElements = buffer_size; + srv_desc.Buffer.StructureByteStride = 0; + srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + device->CreateShaderResourceView(buffer, &srv_desc, descriptor_cpu_start); + D3D12_CPU_DESCRIPTOR_HANDLE uav_cpu_handle; + uav_cpu_handle.ptr = + descriptor_cpu_start.ptr + provider->GetDescriptorSizeView(); + shared_memory_->CreateRawUAV(uav_cpu_handle); + command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); + command_processor_->SetComputePipeline(tile_pipelines_[uint32_t(tile_mode)]); + command_list->Dispatch((resolve_width + 31) >> 5, (resolve_height + 31) >> 5, + 1); + + // Commit the write. + command_processor_->PushUAVBarrier(shared_memory_->GetBuffer()); + + // Invalidate textures. + shared_memory_->RangeWrittenByGPU(texture_base, texture_size); + + return true; } bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) { @@ -766,11 +961,11 @@ bool TextureCache::LoadTextureData(Texture* texture) { // Get the pipeline. const HostFormat& host_format = host_formats_[uint32_t(texture->key.format)]; - if (host_format.copy_mode == CopyMode::kUnknown) { + if (host_format.load_mode == LoadMode::kUnknown) { return false; } ID3D12PipelineState* pipeline = - copy_load_pipelines_[uint32_t(host_format.copy_mode)]; + load_pipelines_[uint32_t(host_format.load_mode)]; if (pipeline == nullptr) { return false; } @@ -839,7 +1034,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc, descriptor_cpu_uav); command_processor_->SetComputePipeline(pipeline); - command_list->SetComputeRootSignature(copy_root_signature_); + command_list->SetComputeRootSignature(load_root_signature_); command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); // Submit commands. @@ -849,13 +1044,13 @@ bool TextureCache::LoadTextureData(Texture* texture) { uint32_t mip_first = base_in_sync ? 1 : 0; uint32_t mip_last = mips_in_sync ? 0 : resource_desc.MipLevels - 1; auto cbuffer_pool = command_processor_->GetConstantBufferPool(); - CopyConstants copy_constants; - copy_constants.is_3d = is_3d ? 1 : 0; - copy_constants.endianness = uint32_t(texture->key.endianness); + LoadConstants load_constants; + load_constants.is_3d = is_3d ? 1 : 0; + load_constants.endianness = uint32_t(texture->key.endianness); if (!texture->key.packed_mips) { - copy_constants.guest_mip_offset[0] = 0; - copy_constants.guest_mip_offset[1] = 0; - copy_constants.guest_mip_offset[2] = 0; + load_constants.guest_mip_offset[0] = 0; + load_constants.guest_mip_offset[1] = 0; + load_constants.guest_mip_offset[2] = 0; } for (uint32_t i = 0; i < slice_count; ++i) { command_processor_->PushTransitionBarrier( @@ -863,48 +1058,48 @@ bool TextureCache::LoadTextureData(Texture* texture) { copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; for (uint32_t j = mip_first; j <= mip_last; ++j) { if (j == 0) { - copy_constants.guest_base = + load_constants.guest_base = (texture->key.base_page << 12) + i * texture->base_slice_size; } else { - copy_constants.guest_base = + load_constants.guest_base = (texture->key.mip_page << 12) + i * texture->mip_slice_size; } - copy_constants.guest_base += texture->mip_offsets[j]; - copy_constants.guest_pitch = texture->key.tiled - ? CopyConstants::kGuestPitchTiled + load_constants.guest_base += texture->mip_offsets[j]; + load_constants.guest_pitch = texture->key.tiled + ? LoadConstants::kGuestPitchTiled : texture->mip_pitches[j]; - copy_constants.host_base = uint32_t(host_layouts[j].Offset); - copy_constants.host_pitch = host_layouts[j].Footprint.RowPitch; - copy_constants.size_texels[0] = std::max(width >> j, 1u); - copy_constants.size_texels[1] = std::max(height >> j, 1u); - copy_constants.size_texels[2] = std::max(depth >> j, 1u); - copy_constants.size_blocks[0] = - (copy_constants.size_texels[0] + (block_width - 1)) / block_width; - copy_constants.size_blocks[1] = - (copy_constants.size_texels[1] + (block_height - 1)) / block_height; - copy_constants.size_blocks[2] = copy_constants.size_texels[2]; + load_constants.host_base = uint32_t(host_layouts[j].Offset); + load_constants.host_pitch = host_layouts[j].Footprint.RowPitch; + load_constants.size_texels[0] = std::max(width >> j, 1u); + load_constants.size_texels[1] = std::max(height >> j, 1u); + load_constants.size_texels[2] = std::max(depth >> j, 1u); + load_constants.size_blocks[0] = + (load_constants.size_texels[0] + (block_width - 1)) / block_width; + load_constants.size_blocks[1] = + (load_constants.size_texels[1] + (block_height - 1)) / block_height; + load_constants.size_blocks[2] = load_constants.size_texels[2]; if (texture->key.packed_mips) { texture_util::GetPackedMipOffset(width, height, depth, guest_format, j, - copy_constants.guest_mip_offset[0], - copy_constants.guest_mip_offset[1], - copy_constants.guest_mip_offset[2]); + load_constants.guest_mip_offset[0], + load_constants.guest_mip_offset[1], + load_constants.guest_mip_offset[2]); } D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address; uint8_t* cbuffer_mapping = cbuffer_pool->RequestFull( - xe::align(uint32_t(sizeof(copy_constants)), 256u), nullptr, nullptr, + xe::align(uint32_t(sizeof(load_constants)), 256u), nullptr, nullptr, &cbuffer_gpu_address); if (cbuffer_mapping == nullptr) { command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); return false; } - std::memcpy(cbuffer_mapping, ©_constants, sizeof(copy_constants)); + std::memcpy(cbuffer_mapping, &load_constants, sizeof(load_constants)); command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address); command_processor_->SubmitBarriers(); // Each thread group processes 32x32x1 blocks. - command_list->Dispatch((copy_constants.size_blocks[0] + 31) >> 5, - (copy_constants.size_blocks[1] + 31) >> 5, - copy_constants.size_blocks[2]); + command_list->Dispatch((load_constants.size_blocks[0] + 31) >> 5, + (load_constants.size_blocks[1] + 31) >> 5, + load_constants.size_blocks[2]); } command_processor_->PushUAVBarrier(copy_buffer); command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state, diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index aacf7d2f9..75544eea0 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -79,16 +79,18 @@ class TextureCache { D3D12_CPU_DESCRIPTOR_HANDLE handle); static DXGI_FORMAT GetResolveDXGIFormat(TextureFormat format); + // The source buffer must be in the non-pixel-shader SRV state. + bool TileResolvedTexture(TextureFormat format, uint32_t texture_base, + uint32_t texture_pitch, uint32_t texture_height, + uint32_t resolve_width, uint32_t resolve_height, + Endian128 endian, ID3D12Resource* buffer, + uint32_t buffer_size, + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint); bool RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle); private: - struct CopyModeInfo { - const void* load_shader; - size_t load_shader_size; - }; - - enum class CopyMode { + enum class LoadMode { k8bpb, k16bpb, k32bpb, @@ -104,9 +106,30 @@ class TextureCache { kUnknown = kCount }; + struct LoadModeInfo { + const void* shader; + size_t shader_size; + }; + + // Tiling modes for storing textures after resolving - needed only for the + // formats that can be resolved to. + enum class TileMode { + k32bpp, + + kCount, + + kUnknown = kCount + }; + + struct TileModeInfo { + const void* shader; + size_t shader_size; + }; + struct HostFormat { DXGI_FORMAT dxgi_format; - CopyMode copy_mode; + LoadMode load_mode; + TileMode tile_mode; }; union TextureKey { @@ -200,7 +223,7 @@ class TextureCache { bool mips_in_sync; }; - struct CopyConstants { + struct LoadConstants { // vec4 0. uint32_t guest_base; // For linear textures - row byte pitch. @@ -223,6 +246,22 @@ class TextureCache { static constexpr uint32_t kGuestPitchTiled = UINT32_MAX; }; + struct TileConstants { + // Either from the start of the shared memory or from the start of the typed + // UAV, in bytes. + uint32_t guest_base; + // 0:2 - endianness (up to Xin128). + // 3:31 - actual guest texture width. + uint32_t endian_guest_pitch; + // Size to copy, texels with index bigger than this won't be written. + // Width in the lower 16 bits, height in the upper. + uint32_t size; + // Byte offset to the first texel from the beginning of the source buffer. + uint32_t host_base; + // Row pitch of the source buffer. + uint32_t host_pitch; + }; + struct TextureBinding { TextureKey key; uint32_t swizzle; @@ -264,9 +303,12 @@ class TextureCache { RegisterFile* register_file_; SharedMemory* shared_memory_; - static const CopyModeInfo copy_mode_info_[]; - ID3D12RootSignature* copy_root_signature_ = nullptr; - ID3D12PipelineState* copy_load_pipelines_[size_t(CopyMode::kCount)] = {}; + static const LoadModeInfo load_mode_info_[]; + ID3D12RootSignature* load_root_signature_ = nullptr; + ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {}; + static const TileModeInfo tile_mode_info_[]; + ID3D12RootSignature* tile_root_signature_ = nullptr; + ID3D12PipelineState* tile_pipelines_[size_t(TileMode::kCount)] = {}; std::unordered_multimap textures_;