From cfd3821b83f164b17ca0ffcc5a26b960657491d8 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 6 Aug 2018 20:10:53 +0300 Subject: [PATCH] [D3D12] DXT1 untiling --- .../gpu/d3d12/d3d12_command_processor.cc | 18 +- src/xenia/gpu/d3d12/d3d12_command_processor.h | 7 + src/xenia/gpu/d3d12/shaders/byte_swap.hlsli | 19 + .../gpu/d3d12/shaders/texture_address.hlsli | 52 ++ .../gpu/d3d12/shaders/texture_copy.hlsli | 28 + .../d3d12/shaders/texture_load_64bpb.cs.hlsl | 38 ++ src/xenia/gpu/d3d12/texture_cache.cc | 495 +++++++++++++++--- src/xenia/gpu/d3d12/texture_cache.h | 57 +- src/xenia/gpu/texture_util.cc | 12 +- src/xenia/gpu/texture_util.h | 2 +- 10 files changed, 625 insertions(+), 103 deletions(-) create mode 100644 src/xenia/gpu/d3d12/shaders/byte_swap.hlsli create mode 100644 src/xenia/gpu/d3d12/shaders/texture_address.hlsli create mode 100644 src/xenia/gpu/d3d12/shaders/texture_copy.hlsli create mode 100644 src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 63cfafd0c..1cffd2a27 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -435,6 +435,13 @@ void D3D12CommandProcessor::ReleaseScratchGPUBuffer( } } +void D3D12CommandProcessor::SetPipeline(ID3D12PipelineState* pipeline) { + if (current_pipeline_ != pipeline) { + GetCurrentCommandList()->SetPipelineState(pipeline); + current_pipeline_ = pipeline; + } +} + bool D3D12CommandProcessor::SetupContext() { if (!CommandProcessor::SetupContext()) { XELOGE("Failed to initialize base command processor context"); @@ -475,6 +482,10 @@ bool D3D12CommandProcessor::SetupContext() { texture_cache_ = std::make_unique(this, register_file_, shared_memory_.get()); + if (!texture_cache_->Initialize()) { + XELOGE("Failed to initialize texture cache"); + return false; + } return true; } @@ -653,7 +664,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, } bool new_frame = BeginFrame(); - ID3D12GraphicsCommandList* command_list = GetCurrentCommandList(); + auto command_list = GetCurrentCommandList(); // Set the primitive topology. D3D_PRIMITIVE_TOPOLOGY primitive_topology; @@ -698,10 +709,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, UpdateFixedFunctionState(command_list); // Bind the pipeline. - if (current_pipeline_ != pipeline) { - current_pipeline_ = pipeline; - command_list->SetPipelineState(pipeline); - } + SetPipeline(pipeline); // Update system constants before uploading them. UpdateSystemConstantValues(indexed ? index_buffer_info->endianness diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 53ce142e7..ae62911d3 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -50,6 +50,9 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader); + ui::d3d12::UploadBufferPool* GetConstantBufferPool() const { + return constant_buffer_pool_.get(); + } // Request and automatically rebind descriptors on the draw command list. // Refer to DescriptorHeapPool::Request for partial/full update explanation. uint64_t RequestViewDescriptors(uint64_t previous_full_update, @@ -73,6 +76,10 @@ class D3D12CommandProcessor : public CommandProcessor { void ReleaseScratchGPUBuffer(ID3D12Resource* buffer, D3D12_RESOURCE_STATES new_state); + // Sets the current pipeline state - may be called internally or externally. + // This is for cache invalidation primarily. A frame must be open. + void SetPipeline(ID3D12PipelineState* pipeline); + protected: bool SetupContext() override; void ShutdownContext() override; diff --git a/src/xenia/gpu/d3d12/shaders/byte_swap.hlsli b/src/xenia/gpu/d3d12/shaders/byte_swap.hlsli new file mode 100644 index 000000000..2ea24f204 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/byte_swap.hlsli @@ -0,0 +1,19 @@ +#ifndef XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_ +#define XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_ + +#define XE_BYTE_SWAP_OVERLOAD(XeByteSwapType) \ +XeByteSwapType XeByteSwap(XeByteSwapType v, uint endian) { \ + [flatten] if (((endian ^ (endian >> 1u)) & 1u) != 0u) { \ + v = ((v & 0x00FF00FFu) << 8u) | ((v & 0xFF00FF00u) >> 8u); \ + } \ + [flatten] if ((endian & 2u) != 0u) { \ + v = (v << 16u) | (v >> 16u); \ + } \ + return v; \ +} +XE_BYTE_SWAP_OVERLOAD(uint) +XE_BYTE_SWAP_OVERLOAD(uint2) +XE_BYTE_SWAP_OVERLOAD(uint3) +XE_BYTE_SWAP_OVERLOAD(uint4) + +#endif // XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_ diff --git a/src/xenia/gpu/d3d12/shaders/texture_address.hlsli b/src/xenia/gpu/d3d12/shaders/texture_address.hlsli new file mode 100644 index 000000000..ed6cf945c --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_address.hlsli @@ -0,0 +1,52 @@ +#ifndef XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_ +#define XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_ + +// XeTiledOffset function take x/y in blocks and returns byte offsets for 4 +// consecutive blocks along X. + +// https://github.com/gildor2/UModel/blob/de8fbd3bc922427ea056b7340202dcdcc19ccff5/Unreal/UnTexture.cpp#L495 +uint4 XeTextureTiledOffset2D(uint2 p, uint width, uint log2_bpb) { + uint4 x4 = uint4(0u, 1u, 2u, 3u) + p.xxxx; + // Top bits of coordinates. + uint4 macro = + ((x4 >> 5u) + (p.y >> 5u) * ((width + 31u) >> 5u)) << (log2_bpb + 7u); + // Lower bits of coordinates (result is 6-bit value). + uint4 micro = ((x4 & 7u) + ((p.y & 0xEu) << 2u)) << log2_bpb; + // Mix micro/macro + add few remaining x/y bits. + uint4 offset = + macro + ((micro & ~0xFu) << 1u) + (micro & 0xFu) + ((p.y & 1u) << 4u); + // Mix bits again. + return ((offset & ~0x1FFu) << 3u) + // Upper bits (offset bits [*-9]). + ((p.y & 16u) << 7u) + // Next 1 bit. + ((offset & 0x1C0u) << 2u) + // Next 3 bits (offset bits [8-6]). + ((((x4 >> 3u) + ((p.y & 8u) >> 2u)) & 3u) << 6u) + // Next 2 bits. + (offset & 0x3Fu); // Lower 6 bits (offset bits [5-0]). +} + +// Reverse-engineered from an executable. +// The base/micro/macro names were chosen pretty much at random and don't have +// the same meaning as in TiledOffset2D. +uint4 XeTextureTiledOffset3D(uint3 p, uint2 width_height, uint log2_bpb) { + uint4 x4 = uint4(0u, 1u, 2u, 3u) + p.xxxx; + uint2 aligned_size = (width_height + 31u) & ~31u; + uint base = ((p.z >> 2u) * ((aligned_size.x * aligned_size.y) >> 4u) + + (p.y >> 4u)) * (aligned_size.x >> 5u); + uint4 micro = (((p.z >> 2u) + (p.y >> 3u)) & 1u).xxxx; + micro += (((micro << 1u) + (x4 >> 3u)) & 3u) << 1u; + uint4 macro = (((x4 & 7u) + ((p.y & 6u) << 2u)) << (log2_bpb + 6u)) >> 6u; + macro = (((((((x4 >> 5u) + base) << (log2_bpb + 6u)) & 0xFFFFFFFu) << 1u) + + (macro & ~15u)) << 1u) + (macro & 15u) + + ((p.z & 3u) << (log2_bpb + 6u)) + ((p.y & 1u) << 4u); + return ((((((((macro >> 6u) & 7u) + ((micro & 1u) << 3u)) << 3u) + + (micro & ~1u)) << 2u) + (macro & ~511u)) << 3u) + (macro & 63u); +} + +uint XeTextureGuestLinearOffset(uint3 p, uint height, uint pitch, uint bpb) { + return p.x * bpb + ((p.z * ((height + 31u) & ~31u) + pitch) * p.y); +} + +uint XeTextureHostLinearOffset(uint3 p, uint height, uint pitch, uint bpb) { + return p.x * bpb + ((p.z * height + pitch) * p.y); +} + +#endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_ diff --git a/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli b/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli new file mode 100644 index 000000000..131872f2d --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli @@ -0,0 +1,28 @@ +#ifndef XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_ +#define XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_ + +#include "byte_swap.hlsli" +#include "texture_address.hlsli" + +cbuffer xe_texture_copy_constants : register(b0) { + uint xe_texture_copy_guest_base; + // For linear textures - row byte pitch. + uint xe_texture_copy_guest_pitch; + uint xe_texture_copy_host_base; + uint xe_texture_copy_host_pitch; + + // Size in blocks. + uint3 xe_texture_copy_size; + bool xe_texture_copy_is_3d; + + // Offset within the packed mip for small mips. + uint3 xe_texture_copy_guest_mip_offset; + uint xe_texture_copy_endianness; +}; + +#define XeTextureCopyGuestPitchTiled 0xFFFFFFFFu + +ByteAddressBuffer xe_texture_copy_source : register(t0); +RWByteAddressBuffer xe_texture_copy_dest : register(u0); + +#endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_ diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl b/src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl new file mode 100644 index 000000000..1d2d17443 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl @@ -0,0 +1,38 @@ +#include "texture_copy.hlsli" + +[numthreads(8, 32, 1)] +void main(uint3 xe_thread_id : SV_DispatchThreadID) { + // 1 thread = 4 uint2 blocks. + uint3 block_index = xe_thread_id; + block_index.x <<= 2u; + [branch] if (any(block_index >= xe_texture_copy_size)) { + return; + } + uint3 block_index_guest = block_index + xe_texture_copy_guest_mip_offset; + uint4 block_offsets_guest; + [branch] if (xe_texture_copy_guest_pitch == XeTextureCopyGuestPitchTiled) { + [branch] if (xe_texture_copy_is_3d) { + block_offsets_guest = XeTextureTiledOffset3D( + block_index_guest, xe_texture_copy_size.xy, 3u); + } else { + block_offsets_guest = XeTextureTiledOffset2D( + block_index_guest.xy, xe_texture_copy_size.x, 3u); + } + } else { + block_offsets_guest = uint4(0u, 8u, 16u, 24u) + XeTextureGuestLinearOffset( + block_index_guest, xe_texture_copy_size.y, xe_texture_copy_guest_pitch, + 8u); + } + block_offsets_guest += xe_texture_copy_guest_base; + uint4 blocks_01 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.x), + xe_texture_copy_source.Load2(block_offsets_guest.y)); + uint4 blocks_23 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.z), + xe_texture_copy_source.Load2(block_offsets_guest.w)); + blocks_01 = XeByteSwap(blocks_01, xe_texture_copy_endianness); + blocks_23 = XeByteSwap(blocks_23, xe_texture_copy_endianness); + uint block_offset_host = XeTextureHostLinearOffset( + block_index, xe_texture_copy_size.y, xe_texture_copy_host_pitch, 8u) + + xe_texture_copy_host_base; + xe_texture_copy_dest.Store4(block_offset_host, blocks_01); + xe_texture_copy_dest.Store4(block_offset_host + 16u, blocks_23); +} diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 4595febcc..922ae0ec0 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -14,81 +14,90 @@ #include "xenia/base/assert.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include "xenia/gpu/texture_info.h" #include "xenia/gpu/texture_util.h" namespace xe { namespace gpu { namespace d3d12 { -TextureCache::HostFormat TextureCache::host_formats_[64] = { - {DXGI_FORMAT_UNKNOWN}, // k_1_REVERSE - {DXGI_FORMAT_UNKNOWN}, // k_1 - {DXGI_FORMAT_UNKNOWN}, // k_8 - {DXGI_FORMAT_UNKNOWN}, // k_1_5_5_5 - {DXGI_FORMAT_UNKNOWN}, // k_5_6_5 - {DXGI_FORMAT_UNKNOWN}, // k_6_5_5 - {DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8 - {DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10 - {DXGI_FORMAT_UNKNOWN}, // k_8_A - {DXGI_FORMAT_UNKNOWN}, // k_8_B - {DXGI_FORMAT_UNKNOWN}, // k_8_8 - {DXGI_FORMAT_UNKNOWN}, // k_Cr_Y1_Cb_Y0 - {DXGI_FORMAT_UNKNOWN}, // k_Y1_Cr_Y0_Cb - {DXGI_FORMAT_UNKNOWN}, // k_Shadow - {DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_A - {DXGI_FORMAT_UNKNOWN}, // k_4_4_4_4 - {DXGI_FORMAT_UNKNOWN}, // k_10_11_11 - {DXGI_FORMAT_UNKNOWN}, // k_11_11_10 - {DXGI_FORMAT_BC1_UNORM}, // k_DXT1 - {DXGI_FORMAT_UNKNOWN}, // k_DXT2_3 - {DXGI_FORMAT_UNKNOWN}, // k_DXT4_5 - {DXGI_FORMAT_UNKNOWN}, // k_DXV - {DXGI_FORMAT_UNKNOWN}, // k_24_8 - {DXGI_FORMAT_UNKNOWN}, // k_24_8_FLOAT - {DXGI_FORMAT_UNKNOWN}, // k_16 - {DXGI_FORMAT_UNKNOWN}, // k_16_16 - {DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16 - {DXGI_FORMAT_UNKNOWN}, // k_16_EXPAND - {DXGI_FORMAT_UNKNOWN}, // k_16_16_EXPAND - {DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16_EXPAND - {DXGI_FORMAT_UNKNOWN}, // k_16_FLOAT - {DXGI_FORMAT_UNKNOWN}, // k_16_16_FLOAT - {DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16_FLOAT - {DXGI_FORMAT_UNKNOWN}, // k_32 - {DXGI_FORMAT_UNKNOWN}, // k_32_32 - {DXGI_FORMAT_UNKNOWN}, // k_32_32_32_32 - {DXGI_FORMAT_UNKNOWN}, // k_32_FLOAT - {DXGI_FORMAT_UNKNOWN}, // k_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN}, // k_32_32_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN}, // k_32_AS_8 - {DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_8 - {DXGI_FORMAT_UNKNOWN}, // k_16_MPEG - {DXGI_FORMAT_UNKNOWN}, // k_16_16_MPEG - {DXGI_FORMAT_UNKNOWN}, // k_8_INTERLACED - {DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_INTERLACED - {DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_8_INTERLACED - {DXGI_FORMAT_UNKNOWN}, // k_16_INTERLACED - {DXGI_FORMAT_UNKNOWN}, // k_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN}, // k_16_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN}, // k_DXN - {DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_AS_16_16_16_16 - {DXGI_FORMAT_BC1_UNORM}, // k_DXT1_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN}, // k_DXT2_3_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN}, // k_DXT4_5_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN}, // k_10_11_11_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN}, // k_11_11_10_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN}, // k_32_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN}, // k_DXT3A - {DXGI_FORMAT_UNKNOWN}, // k_DXT5A - {DXGI_FORMAT_UNKNOWN}, // k_CTX1 - {DXGI_FORMAT_UNKNOWN}, // k_DXT3A_AS_1_1_1_1 - {DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_GAMMA - {DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10_FLOAT +// Generated with `xb buildhlsl`. +#include "xenia/gpu/d3d12/shaders/bin/texture_load_64bpb_cs.h" + +const TextureCache::HostFormat TextureCache::host_formats_[64] = { + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_REVERSE + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_5_5_5 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_5_6_5 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_6_5_5 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_A + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_B + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Cr_Y1_Cb_Y0 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Y1_Cr_Y0_Cb + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Shadow + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_A + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_4_4_4_4 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10 + {DXGI_FORMAT_BC1_UNORM, CopyMode::k64Bpb}, // k_DXT1 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT2_3 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT4_5 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXV + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_EXPAND + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_EXPAND + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16_EXPAND + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXN + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_AS_16_16_16_16 + {DXGI_FORMAT_BC1_UNORM, CopyMode::k64Bpb}, // k_DXT1_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT2_3_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT4_5_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT5A + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_CTX1 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A_AS_1_1_1_1 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_GAMMA + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_FLOAT }; -const char* TextureCache::dimension_names_[4] = {"1D", "2D", "3D", "cube"}; +const char* const TextureCache::dimension_names_[4] = {"1D", "2D", "3D", + "cube"}; + +const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = { + {texture_load_64bpb_cs, sizeof(texture_load_64bpb_cs)}}; TextureCache::TextureCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file, @@ -100,11 +109,105 @@ TextureCache::TextureCache(D3D12CommandProcessor* command_processor, TextureCache::~TextureCache() { Shutdown(); } bool TextureCache::Initialize() { + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); + + // Create the copying root signature. + D3D12_ROOT_PARAMETER root_parameters[2]; + // Parameter 0 is constants (changed very often when untiling). + root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + root_parameters[0].Descriptor.ShaderRegister = 0; + root_parameters[0].Descriptor.RegisterSpace = 0; + root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + // Parameter 1 is source and target. + D3D12_DESCRIPTOR_RANGE root_copy_ranges[2]; + root_copy_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + root_copy_ranges[0].NumDescriptors = 1; + root_copy_ranges[0].BaseShaderRegister = 0; + root_copy_ranges[0].RegisterSpace = 0; + root_copy_ranges[0].OffsetInDescriptorsFromTableStart = 0; + root_copy_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + root_copy_ranges[1].NumDescriptors = 1; + root_copy_ranges[1].BaseShaderRegister = 0; + root_copy_ranges[1].RegisterSpace = 0; + root_copy_ranges[1].OffsetInDescriptorsFromTableStart = 1; + root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_parameters[1].DescriptorTable.NumDescriptorRanges = 2; + root_parameters[1].DescriptorTable.pDescriptorRanges = root_copy_ranges; + root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + D3D12_ROOT_SIGNATURE_DESC root_signature_desc; + root_signature_desc.NumParameters = UINT(xe::countof(root_parameters)); + root_signature_desc.pParameters = root_parameters; + root_signature_desc.NumStaticSamplers = 0; + root_signature_desc.pStaticSamplers = nullptr; + root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + ID3DBlob* root_signature_blob; + ID3DBlob* root_signature_error_blob = nullptr; + if (FAILED(D3D12SerializeRootSignature( + &root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1, + &root_signature_blob, &root_signature_error_blob))) { + XELOGE("Failed to serialize the texture copying root signature"); + if (root_signature_error_blob != nullptr) { + XELOGE("%s", reinterpret_cast( + root_signature_error_blob->GetBufferPointer())); + root_signature_error_blob->Release(); + } + Shutdown(); + return false; + } + if (root_signature_error_blob != nullptr) { + root_signature_error_blob->Release(); + } + if (FAILED(device->CreateRootSignature( + 0, root_signature_blob->GetBufferPointer(), + root_signature_blob->GetBufferSize(), + IID_PPV_ARGS(©_root_signature_)))) { + XELOGE("Failed to create the texture copying root signature"); + root_signature_blob->Release(); + Shutdown(); + return false; + } + root_signature_blob->Release(); + + // Create the copying pipelines. + D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc; + pipeline_desc.pRootSignature = copy_root_signature_; + pipeline_desc.NodeMask = 0; + pipeline_desc.CachedPSO.pCachedBlob = nullptr; + pipeline_desc.CachedPSO.CachedBlobSizeInBytes = 0; + pipeline_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) { + const CopyModeInfo& mode_info = copy_mode_info_[i]; + if (mode_info.load_shader != nullptr) { + pipeline_desc.CS.pShaderBytecode = mode_info.load_shader; + pipeline_desc.CS.BytecodeLength = mode_info.load_shader_size; + if (FAILED(device->CreateComputePipelineState( + &pipeline_desc, IID_PPV_ARGS(©_load_pipelines_[i])))) { + XELOGE("Failed to create the texture copying pipeline for mode %u", i); + Shutdown(); + return false; + } + } + } + ClearBindings(); return true; } -void TextureCache::Shutdown() { ClearCache(); } +void TextureCache::Shutdown() { + ClearCache(); + + for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) { + if (copy_load_pipelines_[i] != nullptr) { + copy_load_pipelines_[i]->Release(); + copy_load_pipelines_[i] = nullptr; + } + } + if (copy_root_signature_ != nullptr) { + copy_root_signature_->Release(); + copy_root_signature_ = nullptr; + } +} void TextureCache::TextureFetchConstantWritten(uint32_t index) { texture_keys_in_sync_ &= ~(1u << index); @@ -120,7 +223,6 @@ void TextureCache::BeginFrame() { void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask, uint32_t used_pixel_texture_mask) { auto command_list = command_processor_->GetCurrentCommandList(); - assert_not_null(command_list); if (command_list == nullptr) { return; } @@ -154,7 +256,7 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask, continue; } - // TODO(Triang3l): Untile the texture. + LoadTextureData(binding.texture); } // Transition the textures to the needed usage. @@ -369,7 +471,7 @@ void TextureCache::TextureKeyFromFetchConstant( key_out.mip_max_level = mip_max_level; key_out.tiled = fetch.tiled; key_out.packed_mips = fetch.packed_mips; - key_out.format = TextureFormat(fetch.format); + key_out.format = GetBaseFormat(TextureFormat(fetch.format)); key_out.endianness = Endian(fetch.endianness); // Get rid of 6 and 7 values (to prevent device losses if the game has // something broken) the quick and dirty way - by changing them to 4 and 5. @@ -380,26 +482,26 @@ void TextureCache::TextureKeyFromFetchConstant( void TextureCache::LogTextureKeyAction(TextureKey key, const char* action) { XELOGGPU( "%s %s %ux%ux%u %s %s texture with %u %spacked mip level%s, " - "base at 0x%.8X, mips at 0x%.8X", action, key.tiled ? "tiled" : "linear", - key.width, key.height, key.depth, + "base at 0x%.8X, mips at 0x%.8X", + action, key.tiled ? "tiled" : "linear", key.width, key.height, key.depth, dimension_names_[uint32_t(key.dimension)], FormatInfo::Get(key.format)->name, key.mip_max_level + 1, key.packed_mips ? "" : "un", key.mip_max_level != 0 ? "s" : "", key.base_page << 12, key.mip_page << 12); } -void TextureCache::LogTextureAction(const Texture& texture, +void TextureCache::LogTextureAction(const Texture* texture, const char* action) { XELOGGPU( "%s %s %ux%ux%u %s %s texture with %u %spacked mip level%s, " - "base at 0x%.8X (size %u), mips at 0x%.8X (size %u)", action, - texture.key.tiled ? "tiled" : "linear", texture.key.width, - texture.key.height, texture.key.depth, - dimension_names_[uint32_t(texture.key.dimension)], - FormatInfo::Get(texture.key.format)->name, - texture.key.mip_max_level + 1, texture.key.packed_mips ? "" : "un", - texture.key.mip_max_level != 0 ? "s" : "", texture.key.base_page << 12, - texture.base_size, texture.key.mip_page << 12, texture.mip_size); + "base at 0x%.8X (size %u), mips at 0x%.8X (size %u)", + action, texture->key.tiled ? "tiled" : "linear", texture->key.width, + texture->key.height, texture->key.depth, + dimension_names_[uint32_t(texture->key.dimension)], + FormatInfo::Get(texture->key.format)->name, + texture->key.mip_max_level + 1, texture->key.packed_mips ? "" : "un", + texture->key.mip_max_level != 0 ? "s" : "", texture->key.base_page << 12, + texture->base_size, texture->key.mip_page << 12, texture->mip_size); } TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { @@ -457,19 +559,23 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { texture->key = key; texture->resource = resource; texture->state = state; + texture->mip_offsets[0] = 0; uint32_t width_blocks, height_blocks, depth_blocks; if (key.base_page != 0) { texture_util::GetGuestMipBlocks(key.dimension, key.width, key.height, key.depth, key.format, 0, width_blocks, height_blocks, depth_blocks); - texture->base_size = texture_util::GetGuestMipStorageSize( + texture->base_slice_size = texture_util::GetGuestMipStorageSize( width_blocks, height_blocks, depth_blocks, key.tiled, key.format, - nullptr); + texture->mip_pitches[0]); texture->base_in_sync = false; } else { + texture->base_slice_size = 0; + texture->mip_pitches[0] = 0; // Never try to upload the base level if there is none. texture->base_in_sync = true; } + texture->mip_slice_size = 0; if (key.mip_page != 0) { uint32_t mip_max_storage_level = key.mip_max_level; if (key.packed_mips) { @@ -477,26 +583,241 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { std::min(mip_max_storage_level, texture_util::GetPackedMipLevel(key.width, key.height)); } - texture->mip_size = 0; for (uint32_t i = 1; i <= mip_max_storage_level; ++i) { texture_util::GetGuestMipBlocks(key.dimension, key.width, key.height, key.depth, key.format, i, width_blocks, height_blocks, depth_blocks); - texture->mip_size += texture_util::GetGuestMipStorageSize( + texture->mip_offsets[i] = texture->mip_slice_size; + texture->mip_slice_size += texture_util::GetGuestMipStorageSize( width_blocks, height_blocks, depth_blocks, key.tiled, key.format, - nullptr); + texture->mip_pitches[i]); + } + // The rest are either packed levels or don't exist at all. + for (uint32_t i = mip_max_storage_level + 1; + i < xe::countof(texture->mip_offsets); ++i) { + texture->mip_offsets[i] = texture->mip_offsets[mip_max_storage_level]; + texture->mip_pitches[i] = texture->mip_pitches[mip_max_storage_level]; } texture->mips_in_sync = false; } else { + std::memset(&texture->mip_offsets[1], 0, + (xe::countof(texture->mip_offsets) - 1) * sizeof(uint32_t)); + std::memset(&texture->mip_pitches[1], 0, + (xe::countof(texture->mip_pitches) - 1) * sizeof(uint32_t)); // Never try to upload the mipmaps if there are none. texture->mips_in_sync = true; } + texture->base_size = texture->base_slice_size; + texture->mip_size = texture->mip_slice_size; + if (key.dimension != Dimension::k3D) { + texture->base_size *= key.depth; + texture->mip_size *= key.depth; + } textures_.insert(std::make_pair(map_key, texture)); - LogTextureAction(*texture, "Created"); + LogTextureAction(texture, "Created"); return texture; } +bool TextureCache::LoadTextureData(Texture* texture) { + if (texture->base_in_sync && texture->mips_in_sync) { + return true; + } + + auto command_list = command_processor_->GetCurrentCommandList(); + if (command_list == nullptr) { + return false; + } + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); + auto device = provider->GetDevice(); + + // Get the pipeline. + const HostFormat& host_format = host_formats_[uint32_t(texture->key.format)]; + if (host_format.copy_mode == CopyMode::kUnknown) { + return false; + } + ID3D12PipelineState* pipeline = + copy_load_pipelines_[uint32_t(host_format.copy_mode)]; + if (pipeline == nullptr) { + return false; + } + + // Request uploading of the texture data to the shared memory. + if (!texture->base_in_sync) { + if (!shared_memory_->UseRange(texture->key.base_page << 12, + texture->base_size)) { + return false; + } + } + if (!texture->mips_in_sync) { + if (!shared_memory_->UseRange(texture->key.mip_page << 12, + texture->mip_size)) { + return false; + } + } + + // Get the guest layout. + bool is_3d = texture->key.dimension == Dimension::k3D; + uint32_t width = texture->key.width; + uint32_t height = texture->key.height; + uint32_t depth = is_3d ? texture->key.depth : 1; + uint32_t slice_count = is_3d ? 1 : texture->key.depth; + TextureFormat guest_format = texture->key.format; + const FormatInfo* guest_format_info = FormatInfo::Get(guest_format); + uint32_t block_width = guest_format_info->block_width; + uint32_t block_height = guest_format_info->block_height; + + // Get the host layout and the buffer. + D3D12_RESOURCE_DESC resource_desc = texture->resource->GetDesc(); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_layouts[D3D12_REQ_MIP_LEVELS]; + UINT64 host_slice_size; + device->GetCopyableFootprints(&resource_desc, 0, resource_desc.MipLevels, 0, + host_layouts, nullptr, nullptr, + &host_slice_size); + D3D12_RESOURCE_STATES copy_buffer_state = + D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer( + uint32_t(host_slice_size), copy_buffer_state); + if (copy_buffer == nullptr) { + return false; + } + + // Begin loading. + D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; + D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; + if (command_processor_->RequestViewDescriptors(0, 2, 2, descriptor_cpu_start, + descriptor_gpu_start) == 0) { + command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); + return false; + } + shared_memory_->UseForReading(command_list); + shared_memory_->CreateSRV(descriptor_cpu_start); + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc; + uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uav_desc.Buffer.FirstElement = 0; + uav_desc.Buffer.NumElements = UINT(host_slice_size >> 2); + uav_desc.Buffer.StructureByteStride = 0; + uav_desc.Buffer.CounterOffsetInBytes = 0; + uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_uav; + descriptor_cpu_uav.ptr = + descriptor_cpu_start.ptr + provider->GetDescriptorSizeView(); + device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc, + descriptor_cpu_uav); + command_processor_->SetPipeline(pipeline); + command_list->SetComputeRootSignature(copy_root_signature_); + command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); + + // Submit commands. + D3D12_RESOURCE_BARRIER barriers[2]; + barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + if (texture->state != D3D12_RESOURCE_STATE_COPY_DEST) { + barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[0].Transition.pResource = texture->resource; + barriers[0].Transition.Subresource = + D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barriers[0].Transition.StateBefore = texture->state; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + command_list->ResourceBarrier(1, barriers); + texture->state = D3D12_RESOURCE_STATE_COPY_DEST; + } + uint32_t mip_first = texture->base_in_sync ? 1 : 0; + uint32_t mip_last = texture->mips_in_sync ? 0 : resource_desc.MipLevels - 1; + auto cbuffer_pool = command_processor_->GetConstantBufferPool(); + CopyConstants copy_constants; + copy_constants.is_3d = is_3d ? 1 : 0; + copy_constants.endianness = uint32_t(texture->key.endianness); + if (!texture->key.packed_mips) { + copy_constants.guest_mip_offset[0] = 0; + copy_constants.guest_mip_offset[1] = 0; + copy_constants.guest_mip_offset[2] = 0; + } + for (uint32_t i = 0; i < slice_count; ++i) { + if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { + barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[0].Transition.pResource = copy_buffer; + barriers[0].Transition.Subresource = 0; + barriers[0].Transition.StateBefore = copy_buffer_state; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + command_list->ResourceBarrier(1, barriers); + copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + for (uint32_t j = mip_first; j <= mip_last; ++j) { + if (j == 0) { + copy_constants.guest_base = + (texture->key.base_page << 12) + i * texture->base_slice_size; + } else { + copy_constants.guest_base = + (texture->key.mip_page << 12) + i * texture->mip_slice_size; + } + copy_constants.guest_base += texture->mip_offsets[j]; + copy_constants.guest_pitch = texture->key.tiled + ? CopyConstants::kGuestPitchTiled + : texture->mip_pitches[j]; + copy_constants.host_base = uint32_t(host_layouts[j].Offset); + copy_constants.host_pitch = host_layouts[j].Footprint.RowPitch; + copy_constants.size[0] = + (std::max(width >> j, 1u) + (block_width - 1)) / block_width; + copy_constants.size[1] = + (std::max(height >> j, 1u) + (block_height - 1)) / block_height; + copy_constants.size[2] = std::max(depth >> j, 1u); + if (texture->key.packed_mips) { + texture_util::GetPackedMipOffset(width, height, depth, guest_format, j, + copy_constants.guest_mip_offset[0], + copy_constants.guest_mip_offset[1], + copy_constants.guest_mip_offset[2]); + } + D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address; + uint8_t* cbuffer_mapping = cbuffer_pool->RequestFull( + xe::align(uint32_t(sizeof(copy_constants)), 256u), nullptr, nullptr, + &cbuffer_gpu_address); + if (cbuffer_mapping == nullptr) { + command_processor_->ReleaseScratchGPUBuffer(copy_buffer, + copy_buffer_state); + return false; + } + std::memcpy(cbuffer_mapping, ©_constants, sizeof(copy_constants)); + command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address); + // Each thread group processes 32x32x1 blocks. + command_list->Dispatch((copy_constants.size[0] + 31) >> 5, + (copy_constants.size[1] + 31) >> 5, + copy_constants.size[2]); + } + barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barriers[0].UAV.pResource = copy_buffer; + barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[1].Transition.pResource = copy_buffer; + barriers[1].Transition.Subresource = 0; + barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + command_list->ResourceBarrier(2, barriers); + copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE; + UINT slice_first_subresource = i * resource_desc.MipLevels; + for (uint32_t j = mip_first; j <= mip_last; ++j) { + D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; + location_source.pResource = copy_buffer; + location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + location_source.PlacedFootprint = host_layouts[j]; + location_dest.pResource = texture->resource; + location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + location_dest.SubresourceIndex = slice_first_subresource + j; + command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, + nullptr); + } + } + + command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); + + // TODO(Triang3l): Uncomment when done testing untiling shaders. + /* texture->base_in_sync = true; + texture->mips_in_sync = true; */ + + LogTextureAction(texture, "Loaded"); + return true; +} + void TextureCache::ClearBindings() { std::memset(texture_bindings_, 0, sizeof(texture_bindings_)); texture_keys_in_sync_ = 0; diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index b2b822fcb..a05336341 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -80,8 +80,22 @@ class TextureCache { D3D12_CPU_DESCRIPTOR_HANDLE handle); private: + struct CopyModeInfo { + const void* load_shader; + size_t load_shader_size; + }; + + enum class CopyMode { + k64Bpb, + + kCount, + + kUnknown = kCount + }; + struct HostFormat { DXGI_FORMAT dxgi_format; + CopyMode copy_mode; }; union TextureKey { @@ -150,16 +164,45 @@ class TextureCache { TextureKey key; ID3D12Resource* resource; D3D12_RESOURCE_STATES state; + // Byte size of one array slice of the top guest mip level. + uint32_t base_slice_size; // Byte size of the top guest mip level. uint32_t base_size; + // Byte size of one array slice of mips between 1 and key.mip_max_level. + uint32_t mip_slice_size; // Byte size of mips between 1 and key.mip_max_level. uint32_t mip_size; + // Byte offsets of each mipmap within one slice. + uint32_t mip_offsets[14]; + // Byte pitches of each mipmap within one slice (for linear layout mainly). + uint32_t mip_pitches[14]; // Whether the recent base level data has been loaded from the memory. bool base_in_sync; // Whether the recent mip data has been loaded from the memory. bool mips_in_sync; }; + struct CopyConstants { + // vec4 0. + uint32_t guest_base; + // For linear textures - row byte pitch. + uint32_t guest_pitch; + uint32_t host_base; + uint32_t host_pitch; + + // vec4 1. + // Size in blocks. + uint32_t size[3]; + uint32_t is_3d; + + // vec4 2. + // Offset within the packed mip for small mips. + uint32_t guest_mip_offset[3]; + uint32_t endianness; + + static constexpr uint32_t kGuestPitchTiled = UINT32_MAX; + }; + struct TextureBinding { TextureKey key; uint32_t swizzle; @@ -173,26 +216,34 @@ class TextureCache { uint32_t& swizzle_out); static void LogTextureKeyAction(TextureKey key, const char* action); - static void LogTextureAction(const Texture& texture, const char* action); + static void LogTextureAction(const Texture* texture, const char* action); // Returns nullptr if the key is not supported, but also if couldn't create // the texture - if it's nullptr, occasionally a recreation attempt should be // made. Texture* FindOrCreateTexture(TextureKey key); + // Writes data from the shared memory to the texture. This binds pipelines and + // allocates descriptors! + bool LoadTextureData(Texture* texture); + // Makes all bindings invalid. Also requesting textures after calling this // will cause another attempt to create a texture or to untile it if there was // an error. void ClearBindings(); - static HostFormat host_formats_[64]; + static const HostFormat host_formats_[64]; - static const char* dimension_names_[4]; + static const char* const dimension_names_[4]; D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; SharedMemory* shared_memory_; + static const CopyModeInfo copy_mode_info_[]; + ID3D12RootSignature* copy_root_signature_ = nullptr; + ID3D12PipelineState* copy_load_pipelines_[size_t(CopyMode::kCount)] = {}; + std::unordered_multimap textures_; TextureBinding texture_bindings_[32] = {}; diff --git a/src/xenia/gpu/texture_util.cc b/src/xenia/gpu/texture_util.cc index 6647980bc..0fa2e8bdc 100644 --- a/src/xenia/gpu/texture_util.cc +++ b/src/xenia/gpu/texture_util.cc @@ -56,17 +56,15 @@ void GetGuestMipBlocks(Dimension dimension, uint32_t width, uint32_t height, uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks, uint32_t depth_blocks, bool is_tiled, - TextureFormat format, uint32_t* row_pitch_out) { + TextureFormat format, uint32_t& row_pitch_out) { const FormatInfo* format_info = FormatInfo::Get(format); - uint32_t row_pitch = - width_blocks * format_info->block_width * format_info->block_height * - format_info->bits_per_pixel / 8; + uint32_t row_pitch = width_blocks * format_info->block_width * + format_info->block_height * format_info->bits_per_pixel / + 8; if (!is_tiled) { row_pitch = xe::align(row_pitch, 256u); } - if (row_pitch_out != nullptr) { - *row_pitch_out = row_pitch; - } + row_pitch_out = row_pitch; return xe::align(row_pitch * height_blocks * depth_blocks, 4096u); } diff --git a/src/xenia/gpu/texture_util.h b/src/xenia/gpu/texture_util.h index a9c6674f7..63e0394a9 100644 --- a/src/xenia/gpu/texture_util.h +++ b/src/xenia/gpu/texture_util.h @@ -33,7 +33,7 @@ void GetGuestMipBlocks(Dimension dimension, uint32_t width, uint32_t height, // height and depth must be obtained via GetGuestMipExtent. uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks, uint32_t depth_blocks, bool is_tiled, - TextureFormat format, uint32_t* row_pitch_out); + TextureFormat format, uint32_t& row_pitch_out); // Gets the number of the mipmap level where the packed mips are stored. inline uint32_t GetPackedMipLevel(uint32_t width, uint32_t height) {