From 75c185e759775aafce37da56a01c43c2ea25a3d5 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 24 May 2022 22:24:33 +0300 Subject: [PATCH 1/3] [GPU] Move texture load shader info to common --- src/xenia/gpu/d3d12/d3d12_texture_cache.cc | 706 +++++++++++---------- src/xenia/gpu/d3d12/d3d12_texture_cache.h | 93 +-- src/xenia/gpu/texture_cache.cc | 74 +++ src/xenia/gpu/texture_cache.h | 70 ++ 4 files changed, 546 insertions(+), 397 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index 6f1500f97..86d116494 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -80,386 +80,311 @@ namespace shaders { const D3D12TextureCache::HostFormat D3D12TextureCache::host_formats_[64] = { // k_1_REVERSE - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_1 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_8 - {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, LoadMode::k8bpb, - DXGI_FORMAT_R8_SNORM, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb, + DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_1_5_5_5 // Red and blue swapped in the load shader for simplicity. {DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, - LoadMode::kR5G5B5A1ToB5G5R5A1, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - false, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + kLoadShaderIndexR5G5B5A1ToB5G5R5A1, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_5_6_5 // Red and blue swapped in the load shader for simplicity. {DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, - LoadMode::kR5G6B5ToB5G6R5, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + kLoadShaderIndexR5G6B5ToB5G6R5, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_6_5_5 // On the host, green bits in blue, blue bits in green. {DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, - LoadMode::kR5G5B6ToB5G6R5WithRBGASwizzle, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)}, + kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)}, // k_8_8_8_8 {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::k32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, LoadMode::kUnknown, false, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, + kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown, + false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10 {DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, - LoadMode::k32bpb, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, + kLoadShaderIndex32bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_8_A - {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, LoadMode::k8bpb, - DXGI_FORMAT_R8_SNORM, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb, + DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_8_B - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_8_8 - {DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, LoadMode::k16bpb, - DXGI_FORMAT_R8G8_SNORM, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndex16bpb, + DXGI_FORMAT_R8G8_SNORM, kLoadShaderIndexUnknown, false, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_Cr_Y1_Cb_Y0_REP // Red and blue swapped in the load shader for simplicity. // TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is usable for - // the signed version, separate unsigned and signed load modes completely + // the signed version, separate unsigned and signed load shaders completely // (as one doesn't need decompression for this format, while another does). {DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM, - LoadMode::kGBGR8ToGRGB8, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, - DXGI_FORMAT_R8G8B8A8_UNORM, LoadMode::kGBGR8ToRGB8, + kLoadShaderIndexGBGR8ToGRGB8, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexGBGR8ToRGB8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_Y1_Cr_Y0_Cb_REP // Red and blue swapped in the load shader for simplicity. // TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is usable for - // the signed version, separate unsigned and signed load modes completely + // the signed version, separate unsigned and signed load shaders completely // (as one doesn't need decompression for this format, while another does). {DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_R8G8_B8G8_UNORM, - LoadMode::kBGRG8ToRGBG8, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, - DXGI_FORMAT_R8G8B8A8_UNORM, LoadMode::kBGRG8ToRGB8, + kLoadShaderIndexBGRG8ToRGBG8, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexBGRG8ToRGB8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_16_16_EDRAM // Not usable as a texture, also has -32...32 range. - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_A - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_4_4_4_4 // Red and blue swapped in the load shader for simplicity. {DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, - LoadMode::kR4G4B4A4ToB4G4R4A4, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - false, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, + kLoadShaderIndexRGBA4ToBGRA4, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_10_11_11 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::kR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, - LoadMode::kR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, + kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_11_11_10 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::kR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, - LoadMode::kR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, + kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_DXT1 - {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, LoadMode::k64bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT1ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, + DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT2_3 - {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT3ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, + DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT4_5 - {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT5ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, + DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_16_16_16_16_EDRAM // Not usable as a texture, also has -32...32 range. - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // R32_FLOAT for depth because shaders would require an additional SRV to // sample stencil, which we don't provide. // k_24_8 - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, LoadMode::kDepthUnorm, - DXGI_FORMAT_R32_FLOAT, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthUnorm, + DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_24_8_FLOAT - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, LoadMode::kDepthFloat, - DXGI_FORMAT_R32_FLOAT, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthFloat, + DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16 - {DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, LoadMode::k16bpb, - DXGI_FORMAT_R16_SNORM, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, kLoadShaderIndex16bpb, + DXGI_FORMAT_R16_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16 - {DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UNORM, LoadMode::k32bpb, - DXGI_FORMAT_R16G16_SNORM, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UNORM, + kLoadShaderIndex32bpb, DXGI_FORMAT_R16G16_SNORM, kLoadShaderIndexUnknown, + false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_16_16_16 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::k64bpb, DXGI_FORMAT_R16G16B16A16_SNORM, LoadMode::kUnknown, - false, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_SNORM, + kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_16_EXPAND - {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, - DXGI_FORMAT_R16_FLOAT, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb, + DXGI_FORMAT_R16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_EXPAND - {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, - DXGI_FORMAT_R16G16_FLOAT, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndex32bpb, + DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown, false, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_16_16_16_EXPAND {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, - LoadMode::k64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::kUnknown, - false, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT, + kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_16_FLOAT - {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, - DXGI_FORMAT_R16_FLOAT, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb, + DXGI_FORMAT_R16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_FLOAT - {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, - DXGI_FORMAT_R16G16_FLOAT, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndex32bpb, + DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown, false, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_16_16_16_FLOAT {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, - LoadMode::k64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::kUnknown, - false, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT, + kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_32 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_32 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_32_32_32_32 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_32_FLOAT - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, LoadMode::k32bpb, - DXGI_FORMAT_R32_FLOAT, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndex32bpb, + DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_32_FLOAT - {DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, LoadMode::k64bpb, - DXGI_FORMAT_R32G32_FLOAT, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndex64bpb, + DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndexUnknown, false, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_32_32_32_32_FLOAT {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, - LoadMode::k128bpb, DXGI_FORMAT_R32G32B32A32_FLOAT, LoadMode::kUnknown, - false, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + kLoadShaderIndex128bpb, DXGI_FORMAT_R32G32B32A32_FLOAT, + kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_32_AS_8 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_8 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_MPEG - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_MPEG - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_DXN - {DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, DXGI_FORMAT_R8G8_UNORM, - LoadMode::kDXNToRG8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, kLoadShaderIndex128bpb, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8_UNORM, + kLoadShaderIndexDXNToRG8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_AS_16_16_16_16 {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::k32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, LoadMode::kUnknown, false, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, + kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown, + false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT1_AS_16_16_16_16 - {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, LoadMode::k64bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT1ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, + DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT2_3_AS_16_16_16_16 - {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT3ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, + DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT4_5_AS_16_16_16_16 - {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT5ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, + DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10_AS_16_16_16_16 {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, - LoadMode::k32bpb, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, + kLoadShaderIndex32bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_10_11_11_AS_16_16_16_16 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::kR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, - LoadMode::kR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, + kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_11_11_10_AS_16_16_16_16 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::kR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, - LoadMode::kR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, + kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_32_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_DXT3A // R8_UNORM has the same size as BC2, but doesn't have the 4x4 size // alignment requirement. - {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, LoadMode::kDXT3A, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT3A, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_DXT5A - {DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, LoadMode::k64bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, true, DXGI_FORMAT_R8_UNORM, - LoadMode::kDXT5AToR8, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + {DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, kLoadShaderIndex64bpb, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8_UNORM, + kLoadShaderIndexDXT5AToR8, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_CTX1 - {DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, LoadMode::kCTX1, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + {DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexCTX1, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_DXT3A_AS_1_1_1_1 {DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, - LoadMode::kDXT3AAs1111ToBGRA4, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - false, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + kLoadShaderIndexDXT3AAs1111ToBGRA4, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_8_8_8_8_GAMMA_EDRAM // Not usable as a texture. - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10_FLOAT_EDRAM // Not usable as a texture. - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, false, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, -}; - -const D3D12TextureCache::LoadModeInfo D3D12TextureCache::load_mode_info_[] = { - {shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs), - shaders::texture_load_8bpb_scaled_cs, - sizeof(shaders::texture_load_8bpb_scaled_cs), 3, 4, 1, 4}, - {shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs), - shaders::texture_load_16bpb_scaled_cs, - sizeof(shaders::texture_load_16bpb_scaled_cs), 4, 4, 2, 4}, - {shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs), - shaders::texture_load_32bpb_scaled_cs, - sizeof(shaders::texture_load_32bpb_scaled_cs), 4, 4, 4, 3}, - {shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs), - shaders::texture_load_64bpb_scaled_cs, - sizeof(shaders::texture_load_64bpb_scaled_cs), 4, 4, 8, 2}, - {shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs), - shaders::texture_load_128bpb_scaled_cs, - sizeof(shaders::texture_load_128bpb_scaled_cs), 4, 4, 16, 1}, - {shaders::texture_load_r5g5b5a1_b5g5r5a1_cs, - sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs), - shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs, - sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs), 4, 4, 2, 4}, - {shaders::texture_load_r5g6b5_b5g6r5_cs, - sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs), - shaders::texture_load_r5g6b5_b5g6r5_scaled_cs, - sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs), 4, 4, 2, 4}, - {shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs, - sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs), - shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs, - sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs), 4, 4, - 2, 4}, - {shaders::texture_load_r4g4b4a4_b4g4r4a4_cs, - sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_cs), - shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs, - sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs), 4, 4, 2, 4}, - {shaders::texture_load_gbgr8_grgb8_cs, - sizeof(shaders::texture_load_gbgr8_grgb8_cs), nullptr, 0, 4, 4, 4, 3}, - {shaders::texture_load_gbgr8_rgb8_cs, - sizeof(shaders::texture_load_gbgr8_rgb8_cs), nullptr, 0, 4, 4, 8, 3}, - {shaders::texture_load_bgrg8_rgbg8_cs, - sizeof(shaders::texture_load_bgrg8_rgbg8_cs), nullptr, 0, 4, 4, 4, 3}, - {shaders::texture_load_bgrg8_rgb8_cs, - sizeof(shaders::texture_load_bgrg8_rgb8_cs), nullptr, 0, 4, 4, 8, 3}, - {shaders::texture_load_r10g11b11_rgba16_cs, - sizeof(shaders::texture_load_r10g11b11_rgba16_cs), - shaders::texture_load_r10g11b11_rgba16_scaled_cs, - sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs), 4, 4, 8, 3}, - {shaders::texture_load_r10g11b11_rgba16_snorm_cs, - sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs), - shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs, - sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs), 4, 4, 8, - 3}, - {shaders::texture_load_r11g11b10_rgba16_cs, - sizeof(shaders::texture_load_r11g11b10_rgba16_cs), - shaders::texture_load_r11g11b10_rgba16_scaled_cs, - sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs), 4, 4, 8, 3}, - {shaders::texture_load_r11g11b10_rgba16_snorm_cs, - sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs), - shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs, - sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs), 4, 4, 8, - 3}, - {shaders::texture_load_dxt1_rgba8_cs, - sizeof(shaders::texture_load_dxt1_rgba8_cs), nullptr, 0, 4, 4, 4, 2}, - {shaders::texture_load_dxt3_rgba8_cs, - sizeof(shaders::texture_load_dxt3_rgba8_cs), nullptr, 0, 4, 4, 4, 1}, - {shaders::texture_load_dxt5_rgba8_cs, - sizeof(shaders::texture_load_dxt5_rgba8_cs), nullptr, 0, 4, 4, 4, 1}, - {shaders::texture_load_dxn_rg8_cs, sizeof(shaders::texture_load_dxn_rg8_cs), - nullptr, 0, 4, 4, 2, 1}, - {shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs), - nullptr, 0, 4, 4, 1, 2}, - {shaders::texture_load_dxt3aas1111_bgra4_cs, - sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs), nullptr, 0, 4, 4, 2, - 2}, - {shaders::texture_load_dxt5a_r8_cs, - sizeof(shaders::texture_load_dxt5a_r8_cs), nullptr, 0, 4, 4, 1, 2}, - {shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs), - nullptr, 0, 4, 4, 2, 2}, - {shaders::texture_load_depth_unorm_cs, - sizeof(shaders::texture_load_depth_unorm_cs), - shaders::texture_load_depth_unorm_scaled_cs, - sizeof(shaders::texture_load_depth_unorm_scaled_cs), 4, 4, 4, 3}, - {shaders::texture_load_depth_float_cs, - sizeof(shaders::texture_load_depth_float_cs), - shaders::texture_load_depth_float_scaled_cs, - sizeof(shaders::texture_load_depth_float_scaled_cs), 4, 4, 4, 3}, + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, + DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, + kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, }; D3D12TextureCache::D3D12TextureCache(const RegisterFile& register_file, @@ -551,31 +476,176 @@ bool D3D12TextureCache::Initialize() { return false; } + // Specify the load shader code. + D3D12_SHADER_BYTECODE load_shader_code[kLoadShaderCount] = {}; + load_shader_code[kLoadShaderIndex8bpb] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs)}; + load_shader_code[kLoadShaderIndex16bpb] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs)}; + load_shader_code[kLoadShaderIndex32bpb] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs)}; + load_shader_code[kLoadShaderIndex64bpb] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs)}; + load_shader_code[kLoadShaderIndex128bpb] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs)}; + load_shader_code[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] = + D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b5a1_b5g5r5a1_cs, + sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs)}; + load_shader_code[kLoadShaderIndexR5G6B5ToB5G6R5] = + D3D12_SHADER_BYTECODE{shaders::texture_load_r5g6b5_b5g6r5_cs, + sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs)}; + load_shader_code[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs, + sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs)}; + load_shader_code[kLoadShaderIndexRGBA4ToBGRA4] = + D3D12_SHADER_BYTECODE{shaders::texture_load_r4g4b4a4_b4g4r4a4_cs, + sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_cs)}; + load_shader_code[kLoadShaderIndexGBGR8ToGRGB8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_gbgr8_grgb8_cs, + sizeof(shaders::texture_load_gbgr8_grgb8_cs)}; + load_shader_code[kLoadShaderIndexGBGR8ToRGB8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_gbgr8_rgb8_cs, + sizeof(shaders::texture_load_gbgr8_rgb8_cs)}; + load_shader_code[kLoadShaderIndexBGRG8ToRGBG8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_bgrg8_rgbg8_cs, + sizeof(shaders::texture_load_bgrg8_rgbg8_cs)}; + load_shader_code[kLoadShaderIndexBGRG8ToRGB8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_bgrg8_rgb8_cs, + sizeof(shaders::texture_load_bgrg8_rgb8_cs)}; + load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16] = + D3D12_SHADER_BYTECODE{shaders::texture_load_r10g11b11_rgba16_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_cs)}; + load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16SNorm] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r10g11b11_rgba16_snorm_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs)}; + load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16] = + D3D12_SHADER_BYTECODE{shaders::texture_load_r11g11b10_rgba16_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_cs)}; + load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16SNorm] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r11g11b10_rgba16_snorm_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs)}; + load_shader_code[kLoadShaderIndexDXT1ToRGBA8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_dxt1_rgba8_cs, + sizeof(shaders::texture_load_dxt1_rgba8_cs)}; + load_shader_code[kLoadShaderIndexDXT3ToRGBA8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_dxt3_rgba8_cs, + sizeof(shaders::texture_load_dxt3_rgba8_cs)}; + load_shader_code[kLoadShaderIndexDXT5ToRGBA8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_dxt5_rgba8_cs, + sizeof(shaders::texture_load_dxt5_rgba8_cs)}; + load_shader_code[kLoadShaderIndexDXNToRG8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_dxn_rg8_cs, + sizeof(shaders::texture_load_dxn_rg8_cs)}; + load_shader_code[kLoadShaderIndexDXT3A] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs)}; + load_shader_code[kLoadShaderIndexDXT3AAs1111ToBGRA4] = + D3D12_SHADER_BYTECODE{shaders::texture_load_dxt3aas1111_bgra4_cs, + sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs)}; + load_shader_code[kLoadShaderIndexDXT5AToR8] = + D3D12_SHADER_BYTECODE{shaders::texture_load_dxt5a_r8_cs, + sizeof(shaders::texture_load_dxt5a_r8_cs)}; + load_shader_code[kLoadShaderIndexCTX1] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs)}; + load_shader_code[kLoadShaderIndexDepthUnorm] = + D3D12_SHADER_BYTECODE{shaders::texture_load_depth_unorm_cs, + sizeof(shaders::texture_load_depth_unorm_cs)}; + load_shader_code[kLoadShaderIndexDepthFloat] = + D3D12_SHADER_BYTECODE{shaders::texture_load_depth_float_cs, + sizeof(shaders::texture_load_depth_float_cs)}; + D3D12_SHADER_BYTECODE load_shader_code_scaled[kLoadShaderCount] = {}; + if (IsDrawResolutionScaled()) { + load_shader_code_scaled[kLoadShaderIndex8bpb] = + D3D12_SHADER_BYTECODE{shaders::texture_load_8bpb_scaled_cs, + sizeof(shaders::texture_load_8bpb_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndex16bpb] = + D3D12_SHADER_BYTECODE{shaders::texture_load_16bpb_scaled_cs, + sizeof(shaders::texture_load_16bpb_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndex32bpb] = + D3D12_SHADER_BYTECODE{shaders::texture_load_32bpb_scaled_cs, + sizeof(shaders::texture_load_32bpb_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndex64bpb] = + D3D12_SHADER_BYTECODE{shaders::texture_load_64bpb_scaled_cs, + sizeof(shaders::texture_load_64bpb_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndex128bpb] = + D3D12_SHADER_BYTECODE{shaders::texture_load_128bpb_scaled_cs, + sizeof(shaders::texture_load_128bpb_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs, + sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexR5G6B5ToB5G6R5] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r5g6b5_b5g6r5_scaled_cs, + sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs, + sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexRGBA4ToBGRA4] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs, + sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r10g11b11_rgba16_scaled_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16SNorm] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r11g11b10_rgba16_scaled_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16SNorm] = + D3D12_SHADER_BYTECODE{ + shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexDepthUnorm] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_depth_unorm_scaled_cs, + sizeof(shaders::texture_load_depth_unorm_scaled_cs)}; + load_shader_code_scaled[kLoadShaderIndexDepthFloat] = D3D12_SHADER_BYTECODE{ + shaders::texture_load_depth_float_scaled_cs, + sizeof(shaders::texture_load_depth_float_scaled_cs)}; + } + // Create the loading pipelines. - for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { - const LoadModeInfo& load_mode_info = load_mode_info_[i]; + for (size_t i = 0; i < kLoadShaderCount; ++i) { + const D3D12_SHADER_BYTECODE& current_load_shader_code = load_shader_code[i]; + if (!current_load_shader_code.pShaderBytecode) { + continue; + } *(load_pipelines_[i].ReleaseAndGetAddressOf()) = - ui::d3d12::util::CreateComputePipeline(device, load_mode_info.shader, - load_mode_info.shader_size, - load_root_signature_.Get()); + ui::d3d12::util::CreateComputePipeline( + device, current_load_shader_code.pShaderBytecode, + current_load_shader_code.BytecodeLength, + load_root_signature_.Get()); if (!load_pipelines_[i]) { XELOGE( "D3D12TextureCache: Failed to create the texture loading pipeline " - "for mode {}", + "for shader {}", i); return false; } - if (IsDrawResolutionScaled() && load_mode_info.shader_scaled) { - *(load_pipelines_scaled_[i].ReleaseAndGetAddressOf()) = - ui::d3d12::util::CreateComputePipeline( - device, load_mode_info.shader_scaled, - load_mode_info.shader_scaled_size, load_root_signature_.Get()); - if (!load_pipelines_scaled_[i]) { - XELOGE( - "D3D12TextureCache: Failed to create the resolution-scaled texture " - "loading pipeline for mode {}", - i); - return false; + if (IsDrawResolutionScaled()) { + const D3D12_SHADER_BYTECODE& current_load_shader_code_scaled = + load_shader_code_scaled[i]; + if (current_load_shader_code_scaled.pShaderBytecode) { + *(load_pipelines_scaled_[i].ReleaseAndGetAddressOf()) = + ui::d3d12::util::CreateComputePipeline( + device, current_load_shader_code_scaled.pShaderBytecode, + current_load_shader_code_scaled.BytecodeLength, + load_root_signature_.Get()); + if (!load_pipelines_scaled_[i]) { + XELOGE( + "D3D12TextureCache: Failed to create the resolution-scaled " + "texture loading pipeline for shader {}", + i); + return false; + } } } } @@ -689,8 +759,8 @@ void D3D12TextureCache::EndFrame() { } XELOGE("* {}{}{}{}", FormatInfo::Get(xenos::TextureFormat(i))->name, unsupported_features & kUnsupportedResourceBit ? " resource" : "", - unsupported_features & kUnsupportedUnormBit ? " unorm" : "", - unsupported_features & kUnsupportedSnormBit ? " snorm" : ""); + unsupported_features & kUnsupportedUnormBit ? " unsigned" : "", + unsupported_features & kUnsupportedSnormBit ? " signed" : ""); unsupported_format_features_used_[i] = 0; } } @@ -1383,28 +1453,29 @@ bool D3D12TextureCache::IsDecompressionNeeded(xenos::TextureFormat format, (height & (format_info->block_height - 1)) != 0; } -D3D12TextureCache::LoadMode D3D12TextureCache::GetLoadMode(TextureKey key) { +TextureCache::LoadShaderIndex D3D12TextureCache::GetLoadShaderIndex( + TextureKey key) { const HostFormat& host_format = host_formats_[uint32_t(key.format)]; if (key.signed_separate) { - return host_format.load_mode_snorm; + return host_format.load_shader_signed; } if (IsDecompressionNeeded(key.format, key.GetWidth(), key.GetHeight())) { - return host_format.decompress_mode; + return host_format.load_shader_decompress; } - return host_format.load_mode; + return host_format.load_shader; } bool D3D12TextureCache::IsSignedVersionSeparateForFormat(TextureKey key) const { const HostFormat& host_format = host_formats_[uint32_t(key.format)]; - return host_format.load_mode_snorm != LoadMode::kUnknown && - host_format.load_mode_snorm != host_format.load_mode; + return host_format.load_shader_signed != kLoadShaderIndexUnknown && + host_format.load_shader_signed != host_format.load_shader; } bool D3D12TextureCache::IsScaledResolveSupportedForFormat( TextureKey key) const { - LoadMode load_mode = GetLoadMode(key); - return load_mode != LoadMode::kUnknown && - load_pipelines_scaled_[uint32_t(load_mode)] != nullptr; + LoadShaderIndex load_shader = GetLoadShaderIndex(key); + return load_shader != kLoadShaderIndexUnknown && + load_pipelines_scaled_[load_shader] != nullptr; } uint32_t D3D12TextureCache::GetHostFormatSwizzle(TextureKey key) const { @@ -1502,19 +1573,18 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice(); // Get the pipeline. - LoadMode load_mode = GetLoadMode(texture_key); - if (load_mode == LoadMode::kUnknown) { + LoadShaderIndex load_shader = GetLoadShaderIndex(texture_key); + if (load_shader == kLoadShaderIndexUnknown) { return false; } bool texture_resolution_scaled = texture_key.scaled_resolve; ID3D12PipelineState* pipeline = - texture_resolution_scaled - ? load_pipelines_scaled_[uint32_t(load_mode)].Get() - : load_pipelines_[uint32_t(load_mode)].Get(); + texture_resolution_scaled ? load_pipelines_scaled_[load_shader].Get() + : load_pipelines_[load_shader].Get(); if (pipeline == nullptr) { return false; } - const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)]; + const LoadShaderInfo& load_shader_info = GetLoadShaderInfo(load_shader); // Get the guest layout. const texture_util::TextureGuestLayout& guest_layout = @@ -1549,7 +1619,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, uint32_t host_block_width = host_block_compressed ? block_width : 1; uint32_t host_block_height = host_block_compressed ? block_height : 1; uint32_t host_x_blocks_per_thread = - UINT32_C(1) << load_mode_info.guest_x_blocks_per_thread_log2; + UINT32_C(1) << load_shader_info.guest_x_blocks_per_thread_log2; if (!host_block_compressed) { // Decompressing guest blocks. host_x_blocks_per_thread *= block_width; @@ -1598,7 +1668,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, xe::align(xe::round_up(host_slice_layout_base.Footprint.Width / host_block_width, host_x_blocks_per_thread) * - load_mode_info.bytes_per_host_block, + load_shader_info.bytes_per_host_block, uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); host_slice_size_base = xe::align( UINT64(host_slice_layout_base.Footprint.RowPitch) * @@ -1643,7 +1713,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, xe::align(xe::round_up(host_slice_layout_mip.Footprint.Width / host_block_width, host_x_blocks_per_thread) * - load_mode_info.bytes_per_host_block, + load_shader_info.bytes_per_host_block, uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); UINT64 host_slice_sizes_mip = xe::align( UINT64(host_slice_layout_mip.Footprint.RowPitch) * @@ -1693,8 +1763,8 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, descriptors_allocated[descriptor_write_index++]; ui::d3d12::util::CreateBufferTypedUAV( device, descriptor_dest.first, copy_buffer, - ui::d3d12::util::GetUintPow2DXGIFormat(load_mode_info.uav_bpe_log2), - uint32_t(copy_buffer_size) >> load_mode_info.uav_bpe_log2); + ui::d3d12::util::GetUintPow2DXGIFormat(load_shader_info.dest_bpe_log2), + uint32_t(copy_buffer_size) >> load_shader_info.dest_bpe_log2); command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second); // Set up the unscaled source descriptor (scaled needs two descriptors that // depend on the buffer being current, so they will be set later - for mips, @@ -1707,13 +1777,13 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, if (bindless_resources_used_) { descriptor_unscaled_source = command_processor_.GetSharedMemoryUintPow2BindlessSRVHandlePair( - load_mode_info.srv_bpe_log2); + load_shader_info.source_bpe_log2); } else { assert_true(descriptor_write_index < descriptor_count); descriptor_unscaled_source = descriptors_allocated[descriptor_write_index++]; d3d12_shared_memory.WriteUintPow2SRVDescriptor( - descriptor_unscaled_source.first, load_mode_info.srv_bpe_log2); + descriptor_unscaled_source.first, load_shader_info.source_bpe_log2); } command_list.D3DSetComputeRootDescriptorTable( 1, descriptor_unscaled_source.second); @@ -1752,7 +1822,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, // address is required - which may be different for base and mips. bool scaled_mips_source_set_up = false; uint32_t guest_x_blocks_per_group_log2 = - load_mode_info.GetGuestXBlocksPerGroupLog2(); + load_shader_info.GetGuestXBlocksPerGroupLog2(); for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; ++loop_level) { bool is_base = loop_level == 0; @@ -1776,8 +1846,8 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, assert_true(descriptor_write_index < descriptor_count); ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_scaled_source = descriptors_allocated[descriptor_write_index++]; - CreateCurrentScaledResolveRangeUintPow2SRV(descriptor_scaled_source.first, - load_mode_info.srv_bpe_log2); + CreateCurrentScaledResolveRangeUintPow2SRV( + descriptor_scaled_source.first, load_shader_info.source_bpe_log2); command_list.D3DSetComputeRootDescriptorTable( 1, descriptor_scaled_source.second); if (!is_base) { @@ -2003,7 +2073,7 @@ uint32_t D3D12TextureCache::FindOrCreateTextureDescriptor( if (is_signed) { // Not supporting signed compressed textures - hopefully DXN and DXT5A are // not used as signed. - desc.Format = host_formats_[uint32_t(format)].dxgi_format_snorm; + desc.Format = host_formats_[uint32_t(format)].dxgi_format_signed; } else { desc.Format = GetDXGIUnormFormat(texture_key); } diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.h b/src/xenia/gpu/d3d12/d3d12_texture_cache.h index 9fb7bcf13..cf51ca905 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.h @@ -179,85 +179,23 @@ class D3D12TextureCache final : public TextureCache { static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2; static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5; - enum class LoadMode { - k8bpb, - k16bpb, - k32bpb, - k64bpb, - k128bpb, - kR5G5B5A1ToB5G5R5A1, - kR5G6B5ToB5G6R5, - kR5G5B6ToB5G6R5WithRBGASwizzle, - kR4G4B4A4ToB4G4R4A4, - kGBGR8ToGRGB8, - kGBGR8ToRGB8, - kBGRG8ToRGBG8, - kBGRG8ToRGB8, - kR10G11B11ToRGBA16, - kR10G11B11ToRGBA16SNorm, - kR11G11B10ToRGBA16, - kR11G11B10ToRGBA16SNorm, - kDXT1ToRGBA8, - kDXT3ToRGBA8, - kDXT5ToRGBA8, - kDXNToRG8, - kDXT3A, - kDXT3AAs1111ToBGRA4, - kDXT5AToR8, - kCTX1, - kDepthUnorm, - kDepthFloat, - - kCount, - - kUnknown = kCount - }; - - struct LoadModeInfo { - // Shader without resolution scaling. - const void* shader; - size_t shader_size; - // Shader with resolution scaling, if available. These shaders are separate - // so the majority of the textures are not affected by the code needed for - // resolution scale support, and also to check if the format allows - // resolution scaling. - const void* shader_scaled; - size_t shader_scaled_size; - // Log2 of the sizes, in bytes, of the source (guest) SRV and the - // destination (host) UAV accessed by the copying shader, since the shader - // may copy multiple blocks per one invocation. - uint32_t srv_bpe_log2; - uint32_t uav_bpe_log2; - // Number of bytes in a host resolution-scaled block (corresponding to a - // guest block if not decompressing, or a host texel if decompressing) - // written by the shader. - uint32_t bytes_per_host_block; - // Log2 of the number of guest resolution-scaled blocks along the X axis - // loaded by a single thread shader group. - uint32_t guest_x_blocks_per_thread_log2; - - uint32_t GetGuestXBlocksPerGroupLog2() const { - return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2; - } - }; - struct HostFormat { // Format info for the regular case. // DXGI format (typeless when different signedness or number representation // is used) for the texture resource. DXGI_FORMAT dxgi_format_resource; // DXGI format for unsigned normalized or unsigned/signed float SRV. - DXGI_FORMAT dxgi_format_unorm; - // The regular load mode, used when special modes (like signed-specific or - // decompressing) aren't needed. - LoadMode load_mode; + DXGI_FORMAT dxgi_format_unsigned; + // The regular load shader, used when special load shaders (like + // signed-specific or decompressing) aren't needed. + LoadShaderIndex load_shader; // DXGI format for signed normalized or unsigned/signed float SRV. - DXGI_FORMAT dxgi_format_snorm; + DXGI_FORMAT dxgi_format_signed; // If the signed version needs a different bit representation on the host, - // this is the load mode for the signed version. Otherwise the regular - // load_mode will be used for the signed version, and a single copy will be - // created if both unsigned and signed are used. - LoadMode load_mode_snorm; + // this is the load shader for the signed version. Otherwise the regular + // load_shader will be used for the signed version, and a single copy will + // be created if both unsigned and signed are used. + LoadShaderIndex load_shader_signed; // Do NOT add integer DXGI formats to this - they are not filterable, can // only be read with Load, not Sample! If any game is seen using num_format @@ -276,7 +214,7 @@ class D3D12TextureCache final : public TextureCache { // supports unsigned normalized formats - let's hope GPUSIGN_SIGNED was not // used for DXN and DXT5A. DXGI_FORMAT dxgi_format_uncompressed; - LoadMode decompress_mode; + LoadShaderIndex load_shader_decompress; // Mapping of Xenos swizzle components to DXGI format components. uint32_t swizzle; @@ -440,13 +378,13 @@ class D3D12TextureCache final : public TextureCache { const HostFormat& host_format = host_formats_[uint32_t(format)]; return IsDecompressionNeeded(format, width, height) ? host_format.dxgi_format_uncompressed - : host_format.dxgi_format_unorm; + : host_format.dxgi_format_unsigned; } static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) { return GetDXGIUnormFormat(key.format, key.GetWidth(), key.GetHeight()); } - static LoadMode GetLoadMode(TextureKey key); + static LoadShaderIndex GetLoadShaderIndex(TextureKey key); static constexpr bool AreDimensionsCompatible( xenos::FetchOpDimension binding_dimension, @@ -528,14 +466,11 @@ class D3D12TextureCache final : public TextureCache { D3D12CommandProcessor& command_processor_; bool bindless_resources_used_; - static const LoadModeInfo load_mode_info_[]; Microsoft::WRL::ComPtr load_root_signature_; - std::array, - size_t(LoadMode::kCount)> + std::array, kLoadShaderCount> load_pipelines_; // Load pipelines for resolution-scaled resolve targets. - std::array, - size_t(LoadMode::kCount)> + std::array, kLoadShaderCount> load_pipelines_scaled_; std::vector srv_descriptor_cache_; diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc index ebe503ce3..b09f6ad46 100644 --- a/src/xenia/gpu/texture_cache.cc +++ b/src/xenia/gpu/texture_cache.cc @@ -73,6 +73,80 @@ DEFINE_uint32( namespace xe { namespace gpu { +const TextureCache::LoadShaderInfo + TextureCache::load_shader_info_[kLoadShaderCount] = { + // k8bpb + {3, 4, 1, 4}, + // k16bpb + {4, 4, 2, 4}, + // k32bpb + {4, 4, 4, 3}, + // k64bpb + {4, 4, 8, 2}, + // k128bpb + {4, 4, 16, 1}, + // kR5G5B5A1ToB5G5R5A1 + {4, 4, 2, 4}, + // kR5G6B5ToB5G6R5 + {4, 4, 2, 4}, + // kR5G5B6ToB5G6R5WithRBGASwizzle + {4, 4, 2, 4}, + // kRGBA4ToBGRA4 + {4, 4, 2, 4}, + // kRGBA4ToARGB4 + {4, 4, 2, 4}, + // kGBGR8ToGRGB8 + {4, 4, 4, 3}, + // kGBGR8ToRGB8 + {4, 4, 8, 3}, + // kBGRG8ToRGBG8 + {4, 4, 4, 3}, + // kBGRG8ToRGB8 + {4, 4, 8, 3}, + // kR10G11B11ToRGBA16 + {4, 4, 8, 3}, + // kR10G11B11ToRGBA16SNorm + {4, 4, 8, 3}, + // kR11G11B10ToRGBA16 + {4, 4, 8, 3}, + // kR11G11B10ToRGBA16SNorm + {4, 4, 8, 3}, + // kR16UNormToFloat + {4, 4, 2, 4}, + // kR16SNormToFloat + {4, 4, 2, 4}, + // kRG16UNormToFloat + {4, 4, 4, 3}, + // kRG16SNormToFloat + {4, 4, 4, 3}, + // kRGBA16UNormToFloat + {4, 4, 8, 2}, + // kRGBA16SNormToFloat + {4, 4, 8, 2}, + // kDXT1ToRGBA8 + {4, 4, 4, 2}, + // kDXT3ToRGBA8 + {4, 4, 4, 1}, + // kDXT5ToRGBA8 + {4, 4, 4, 1}, + // kDXNToRG8 + {4, 4, 2, 1}, + // kDXT3A + {4, 4, 1, 2}, + // kDXT3AAs1111ToBGRA4 + {4, 4, 2, 2}, + // kDXT3AAs1111ToARGB4 + {4, 4, 2, 2}, + // kDXT5AToR8 + {4, 4, 1, 2}, + // kCTX1 + {4, 4, 2, 2}, + // kDepthUnorm + {4, 4, 4, 3}, + // kDepthFloat + {4, 4, 4, 3}, +}; + TextureCache::TextureCache(const RegisterFile& register_file, SharedMemory& shared_memory, uint32_t draw_resolution_scale_x, diff --git a/src/xenia/gpu/texture_cache.h b/src/xenia/gpu/texture_cache.h index 1802eaaca..b2c5b1d60 100644 --- a/src/xenia/gpu/texture_cache.h +++ b/src/xenia/gpu/texture_cache.h @@ -395,6 +395,69 @@ class TextureCache { uint32_t height_texels; }; + static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2; + static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5; + + enum LoadShaderIndex { + kLoadShaderIndex8bpb, + kLoadShaderIndex16bpb, + kLoadShaderIndex32bpb, + kLoadShaderIndex64bpb, + kLoadShaderIndex128bpb, + kLoadShaderIndexR5G5B5A1ToB5G5R5A1, + kLoadShaderIndexR5G6B5ToB5G6R5, + kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle, + kLoadShaderIndexRGBA4ToBGRA4, + kLoadShaderIndexRGBA4ToARGB4, + kLoadShaderIndexGBGR8ToGRGB8, + kLoadShaderIndexGBGR8ToRGB8, + kLoadShaderIndexBGRG8ToRGBG8, + kLoadShaderIndexBGRG8ToRGB8, + kLoadShaderIndexR10G11B11ToRGBA16, + kLoadShaderIndexR10G11B11ToRGBA16SNorm, + kLoadShaderIndexR11G11B10ToRGBA16, + kLoadShaderIndexR11G11B10ToRGBA16SNorm, + kLoadShaderIndexR16UNormToFloat, + kLoadShaderIndexR16SNormToFloat, + kLoadShaderIndexRG16UNormToFloat, + kLoadShaderIndexRG16SNormToFloat, + kLoadShaderIndexRGBA16UNormToFloat, + kLoadShaderIndexRGBA16SNormToFloat, + kLoadShaderIndexDXT1ToRGBA8, + kLoadShaderIndexDXT3ToRGBA8, + kLoadShaderIndexDXT5ToRGBA8, + kLoadShaderIndexDXNToRG8, + kLoadShaderIndexDXT3A, + kLoadShaderIndexDXT3AAs1111ToBGRA4, + kLoadShaderIndexDXT3AAs1111ToARGB4, + kLoadShaderIndexDXT5AToR8, + kLoadShaderIndexCTX1, + kLoadShaderIndexDepthUnorm, + kLoadShaderIndexDepthFloat, + + kLoadShaderCount, + kLoadShaderIndexUnknown = kLoadShaderCount, + }; + + struct LoadShaderInfo { + // Log2 of the sizes, in bytes, of the elements in the source (guest) and + // the destination (host) buffer bindings accessed by the copying shader, + // since the shader may copy multiple blocks per one invocation. + uint32_t source_bpe_log2; + uint32_t dest_bpe_log2; + // Number of bytes in a host resolution-scaled block (corresponding to a + // guest block if not decompressing, or a host texel if decompressing) + // written by the shader. + uint32_t bytes_per_host_block; + // Log2 of the number of guest resolution-scaled blocks along the X axis + // loaded by a single thread shader group. + uint32_t guest_x_blocks_per_thread_log2; + + uint32_t GetGuestXBlocksPerGroupLog2() const { + return kLoadGuestXThreadsPerGroupLog2 + guest_x_blocks_per_thread_log2; + } + }; + static constexpr uint8_t kSwizzledSignsUnsigned = uint8_t(xenos::TextureSign::kUnsigned) * uint8_t(0b01010101); @@ -472,6 +535,11 @@ class TextureCache { // should be made. Texture* FindOrCreateTexture(TextureKey key); + static const LoadShaderInfo& GetLoadShaderInfo( + LoadShaderIndex load_shader_index) { + assert_true(load_shader_index < kLoadShaderCount); + return load_shader_info_[load_shader_index]; + } bool LoadTextureData(Texture& texture); // Writes the texture data (for base, mips or both - but not neither) from the // shared memory or the scaled resolve memory. The shared memory management is @@ -527,6 +595,8 @@ class TextureCache { uint32_t draw_resolution_scale_x_; uint32_t draw_resolution_scale_y_; + static const LoadShaderInfo load_shader_info_[kLoadShaderCount]; + xe::global_critical_region global_critical_region_; // Bit vector storing whether each 4 KB physical memory page contains scaled // resolve data. uint32_t rather than uint64_t because parts of it can be sent From 8701c9f24ee9cd208898d0d0cb6e67b78ba70412 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 24 May 2022 22:28:42 +0300 Subject: [PATCH 2/3] [D3D12] Texture load code cleanup and resolution scaling fixes The resolution scale is now taken into account when copying from the mip tail. --- src/xenia/gpu/d3d12/d3d12_command_processor.h | 2 +- src/xenia/gpu/d3d12/d3d12_texture_cache.cc | 214 ++++++++---------- src/xenia/gpu/texture_util.h | 4 +- src/xenia/gpu/xenos.h | 4 + 4 files changed, 98 insertions(+), 126 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 24d23cce9..6162b4683 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -562,7 +562,7 @@ class D3D12CommandProcessor : public CommandProcessor { // Unsubmitted barrier batch. std::vector barriers_; - // , sorted by the submission number. + // , sorted by the submission number. std::deque> resources_for_deletion_; static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024; diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index 86d116494..ad9b320fc 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -1612,6 +1612,25 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, uint32_t texture_resolution_scale_y = texture_resolution_scaled ? draw_resolution_scale_y() : 1; + // The loop counter can mean two things depending on whether the packed mip + // tail is stored as mip 0, because in this case, it would be ambiguous since + // both the base and the mips would be on "level 0", but stored in separate + // places. + uint32_t loop_level_first, loop_level_last; + if (level_packed == 0) { + // Packed mip tail is the level 0 - may need to load mip tails for the base, + // the mips, or both. + // Loop iteration 0 - base packed mip tail. + // Loop iteration 1 - mips packed mip tail. + loop_level_first = uint32_t(level_first != 0); + loop_level_last = uint32_t(level_last != 0); + } else { + // Packed mip tail is not the level 0. + // Loop iteration is the actual level being loaded. + loop_level_first = level_stored_first; + loop_level_last = level_stored_last; + } + // Get the host layout and the buffer. bool host_block_compressed = host_formats_[uint32_t(guest_format)].is_block_compressed && @@ -1631,99 +1650,61 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, // 1...min(level_last, level_packed) if level_packed is not 0, or only 0 if // level_packed == 0. D3D12_PLACED_SUBRESOURCE_FOOTPRINT - host_slice_layouts_mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; - UINT64 host_slice_sizes_mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; - { - // Using custom calculations instead of GetCopyableFootprints because - // shaders may unconditionally copy multiple blocks along X per thread for - // simplicity, to make sure all rows (also including the last one - - // GetCopyableFootprints aligns row offsets, but not the total size) are - // properly padded to the number of blocks copied in an invocation without - // implicit assumptions about D3D12_TEXTURE_DATA_PITCH_ALIGNMENT. - DXGI_FORMAT host_copy_format = - GetDXGIResourceFormat(guest_format, width, height); - if (!level_first) { - host_slice_layout_base.Offset = copy_buffer_size; - host_slice_layout_base.Footprint.Format = host_copy_format; - if (!level_packed) { - // Loading the packed tail for the base - load the whole tail to copy - // regions out of it. - host_slice_layout_base.Footprint.Width = - guest_layout.base.x_extent_blocks * block_width; - host_slice_layout_base.Footprint.Height = - guest_layout.base.y_extent_blocks * block_height; - host_slice_layout_base.Footprint.Depth = guest_layout.base.z_extent; - } else { - host_slice_layout_base.Footprint.Width = width; - host_slice_layout_base.Footprint.Height = height; - host_slice_layout_base.Footprint.Depth = depth; - } - host_slice_layout_base.Footprint.Width = xe::round_up( - host_slice_layout_base.Footprint.Width * texture_resolution_scale_x, - UINT(host_block_width)); - host_slice_layout_base.Footprint.Height = xe::round_up( - host_slice_layout_base.Footprint.Height * texture_resolution_scale_y, - UINT(host_block_height)); - host_slice_layout_base.Footprint.RowPitch = - xe::align(xe::round_up(host_slice_layout_base.Footprint.Width / - host_block_width, - host_x_blocks_per_thread) * - load_shader_info.bytes_per_host_block, - uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); - host_slice_size_base = xe::align( - UINT64(host_slice_layout_base.Footprint.RowPitch) * - (host_slice_layout_base.Footprint.Height / host_block_height) * - host_slice_layout_base.Footprint.Depth, - UINT64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)); - copy_buffer_size += host_slice_size_base * array_size; - } - if (level_last) { - for (uint32_t level = level_stored_first; level <= level_stored_last; - ++level) { - D3D12_PLACED_SUBRESOURCE_FOOTPRINT& host_slice_layout_mip = - host_slice_layouts_mips[level]; - host_slice_layout_mip.Offset = copy_buffer_size; - host_slice_layout_mip.Footprint.Format = host_copy_format; - if (level == level_packed) { - // Loading the packed tail for the mips - load the whole tail to copy - // regions out of it. - const texture_util::TextureGuestLayout::Level& - guest_layout_packed_mips = guest_layout.mips[level]; - host_slice_layout_mip.Footprint.Width = - guest_layout_packed_mips.x_extent_blocks * block_width; - host_slice_layout_mip.Footprint.Height = - guest_layout_packed_mips.y_extent_blocks * block_height; - host_slice_layout_mip.Footprint.Depth = - guest_layout_packed_mips.z_extent; - } else { - host_slice_layout_mip.Footprint.Width = - std::max(width >> level, uint32_t(1)); - host_slice_layout_mip.Footprint.Height = - std::max(height >> level, uint32_t(1)); - host_slice_layout_mip.Footprint.Depth = - std::max(depth >> level, uint32_t(1)); - } - host_slice_layout_mip.Footprint.Width = xe::round_up( - host_slice_layout_mip.Footprint.Width * texture_resolution_scale_x, - UINT(host_block_width)); - host_slice_layout_mip.Footprint.Height = xe::round_up( - host_slice_layout_mip.Footprint.Height * texture_resolution_scale_y, - UINT(host_block_height)); - host_slice_layout_mip.Footprint.RowPitch = - xe::align(xe::round_up(host_slice_layout_mip.Footprint.Width / - host_block_width, - host_x_blocks_per_thread) * - load_shader_info.bytes_per_host_block, - uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); - UINT64 host_slice_sizes_mip = xe::align( - UINT64(host_slice_layout_mip.Footprint.RowPitch) * - (host_slice_layout_mip.Footprint.Height / host_block_height) * - host_slice_layout_mip.Footprint.Depth, - UINT64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)); - host_slice_sizes_mips[level] = host_slice_sizes_mip; - copy_buffer_size += host_slice_sizes_mip * array_size; - } + host_slice_layouts_mips[xenos::kTextureMaxMips]; + UINT64 host_slice_sizes_mips[xenos::kTextureMaxMips]; + // Using custom calculations instead of GetCopyableFootprints because + // shaders may unconditionally copy multiple blocks along X per thread for + // simplicity, to make sure all rows (also including the last one - + // GetCopyableFootprints aligns row offsets, but not the total size) are + // properly padded to the number of blocks copied in an invocation without + // implicit assumptions about D3D12_TEXTURE_DATA_PITCH_ALIGNMENT. + DXGI_FORMAT host_copy_format = + GetDXGIResourceFormat(guest_format, width, height); + for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; + ++loop_level) { + bool is_base = loop_level == 0; + uint32_t level = (level_packed == 0) ? 0 : loop_level; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout = + is_base ? host_slice_layout_base : host_slice_layouts_mips[level]; + level_host_slice_layout.Offset = copy_buffer_size; + level_host_slice_layout.Footprint.Format = host_copy_format; + if (level == level_packed) { + // Loading the packed tail for the base or the mips - load the whole tail + // to copy regions out of it. + const texture_util::TextureGuestLayout::Level& guest_layout_packed = + is_base ? guest_layout.base : guest_layout.mips[level]; + level_host_slice_layout.Footprint.Width = + guest_layout_packed.x_extent_blocks * block_width; + level_host_slice_layout.Footprint.Height = + guest_layout_packed.y_extent_blocks * block_height; + level_host_slice_layout.Footprint.Depth = guest_layout_packed.z_extent; + } else { + level_host_slice_layout.Footprint.Width = + std::max(width >> level, uint32_t(1)); + level_host_slice_layout.Footprint.Height = + std::max(height >> level, uint32_t(1)); + level_host_slice_layout.Footprint.Depth = + std::max(depth >> level, uint32_t(1)); } + level_host_slice_layout.Footprint.Width = xe::round_up( + level_host_slice_layout.Footprint.Width * texture_resolution_scale_x, + UINT(host_block_width)); + level_host_slice_layout.Footprint.Height = xe::round_up( + level_host_slice_layout.Footprint.Height * texture_resolution_scale_y, + UINT(host_block_height)); + level_host_slice_layout.Footprint.RowPitch = xe::align( + xe::round_up(level_host_slice_layout.Footprint.Width / host_block_width, + host_x_blocks_per_thread) * + load_shader_info.bytes_per_host_block, + uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); + UINT64 level_host_slice_size = xe::align( + UINT64(level_host_slice_layout.Footprint.RowPitch) * + (level_host_slice_layout.Footprint.Height / host_block_height) * + level_host_slice_layout.Footprint.Depth, + UINT64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)); + (is_base ? host_slice_size_base : host_slice_sizes_mips[level]) = + level_host_slice_size; + copy_buffer_size += level_host_slice_size * array_size; } D3D12_RESOURCE_STATES copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; @@ -1771,7 +1752,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, // after loading the base is done). if (!texture_resolution_scaled) { D3D12SharedMemory& d3d12_shared_memory = - reinterpret_cast(shared_memory()); + static_cast(shared_memory()); d3d12_shared_memory.UseForReading(); ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_unscaled_source; if (bindless_resources_used_) { @@ -1798,24 +1779,6 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, (uint32_t(texture_key.endianness) << 2) | (texture_resolution_scale_x << 4) | (texture_resolution_scale_y << 6); - // The loop counter can mean two things depending on whether the packed mip - // tail is stored as mip 0, because in this case, it would be ambiguous since - // both the base and the mips would be on "level 0", but stored in separate - // places. - uint32_t loop_level_first, loop_level_last; - if (level_packed == 0) { - // Packed mip tail is the level 0 - may need to load mip tails for the base, - // the mips, or both. - // Loop iteration 0 - base packed mip tail. - // Loop iteration 1 - mips packed mip tail. - loop_level_first = uint32_t(level_first != 0); - loop_level_last = uint32_t(level_last != 0); - } else { - // Packed mip tail is not the level 0. - // Loop iteration is the actual level being loaded. - loop_level_first = level_stored_first; - loop_level_last = level_stored_last; - } // The loop is slices within levels because the base and the levels may need // different portions of the scaled resolve virtual address space to be // available through buffers, and to create a descriptor, the buffer start @@ -1902,8 +1865,6 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, load_constants.size_blocks[2] = level_depth; load_constants.height_texels = level_height; - // Each thread group processes 32x32x1 source blocks (resolution-scaled, but - // still compressed if the host needs decompression). uint32_t group_count_x = (load_constants.size_blocks[0] + ((UINT32_C(1) << guest_x_blocks_per_group_log2) - 1)) >> @@ -1913,13 +1874,16 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, ((UINT32_C(1) << kLoadGuestYBlocksPerGroupLog2) - 1)) >> kLoadGuestYBlocksPerGroupLog2; - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& host_slice_layout = + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout = is_base ? host_slice_layout_base : host_slice_layouts_mips[level]; uint32_t host_slice_size = uint32_t(is_base ? host_slice_size_base : host_slice_sizes_mips[level]); - load_constants.host_offset = uint32_t(host_slice_layout.Offset); - load_constants.host_pitch = host_slice_layout.Footprint.RowPitch; + load_constants.host_offset = uint32_t(level_host_slice_layout.Offset); + load_constants.host_pitch = level_host_slice_layout.Footprint.RowPitch; + uint32_t level_array_slice_stride_bytes_scaled = + level_guest_layout.array_slice_stride_bytes * + (texture_resolution_scale_x * texture_resolution_scale_y); for (uint32_t slice = 0; slice < array_size; ++slice) { D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address; uint8_t* cbuffer_mapping = cbuffer_pool.Request( @@ -1937,9 +1901,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, command_processor_.SubmitBarriers(); command_list.D3DDispatch(group_count_x, group_count_y, load_constants.size_blocks[2]); - load_constants.guest_offset += - level_guest_layout.array_slice_stride_bytes * - (texture_resolution_scale_x * texture_resolution_scale_y); + load_constants.guest_offset += level_array_slice_stride_bytes_scaled; load_constants.host_offset += host_slice_size; } } @@ -1977,15 +1939,21 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, texture_util::GetPackedMipOffset(width, height, depth, guest_format, level, level_offset_blocks_x, level_offset_blocks_y, level_offset_z); - source_box.left = level_offset_blocks_x * block_width; - source_box.top = level_offset_blocks_y * block_height; + source_box.left = + level_offset_blocks_x * block_width * texture_resolution_scale_x; + source_box.top = + level_offset_blocks_y * block_height * texture_resolution_scale_y; source_box.front = level_offset_z; source_box.right = source_box.left + - xe::align(std::max(width >> level, uint32_t(1)), host_block_width); + xe::align(std::max((width * texture_resolution_scale_x) >> level, + uint32_t(1)), + host_block_width); source_box.bottom = source_box.top + - xe::align(std::max(height >> level, uint32_t(1)), host_block_height); + xe::align(std::max((height * texture_resolution_scale_y) >> level, + uint32_t(1)), + host_block_height); source_box.back = source_box.front + std::max(depth >> level, uint32_t(1)); source_box_ptr = &source_box; diff --git a/src/xenia/gpu/texture_util.h b/src/xenia/gpu/texture_util.h index 1988ed690..7e20ab76f 100644 --- a/src/xenia/gpu/texture_util.h +++ b/src/xenia/gpu/texture_util.h @@ -173,8 +173,8 @@ struct TextureGuestLayout { // If mip_max_level specified at calculation time is at least 1, the stored // mips are min(1, packed_mip_level) through min(mip_max_level, // packed_mip_level). - Level mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; - uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; + Level mips[xenos::kTextureMaxMips]; + uint32_t mip_offsets_bytes[xenos::kTextureMaxMips]; uint32_t mips_total_extent_bytes; uint32_t max_level; // UINT32_MAX if there's no packed mip tail. diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 87dbe2b59..e9865946c 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -1045,6 +1045,10 @@ constexpr uint32_t kTexture3DMaxWidthHeight = 1 << kTexture3DMaxWidthHeightLog2; constexpr uint32_t kTexture3DMaxDepthLog2 = 10; constexpr uint32_t kTexture3DMaxDepth = 1 << kTexture3DMaxDepthLog2; +constexpr uint32_t kTextureMaxMips = + std::max(kTexture2DCubeMaxWidthHeightLog2, kTexture3DMaxWidthHeightLog2) + + 1; + // Tiled texture sizes are in 32x32 increments for 2D, 32x32x4 for 3D. // 2DTiledOffset(X * 32 + x, Y * 32 + y) == // 2DTiledOffset(X * 32, Y * 32) + 2DTiledOffset(x, y) From a4840e1992667946830c0af11166885fbd534cae Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 24 May 2022 22:33:27 +0300 Subject: [PATCH 3/3] [GPU] FIXME comment for 1bpb/2bpb texture tiled extent --- src/xenia/gpu/texture_util.cc | 6 ++++++ src/xenia/gpu/texture_util.h | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/src/xenia/gpu/texture_util.cc b/src/xenia/gpu/texture_util.cc index 849aac8e1..218ad1133 100644 --- a/src/xenia/gpu/texture_util.cc +++ b/src/xenia/gpu/texture_util.cc @@ -391,6 +391,12 @@ TextureGuestLayout GetGuestTextureLayout( // 2D 32x32-block tiles are laid out linearly in the texture. // Calculate the extent as ((all rows except for the last * pitch in // tiles + last row length in tiles) * bytes per tile). + // FIXME(Triang3l): This is wrong for 1bpb and 2bpb. At 1bpb (32x32 is + // 1024 bytes), offset for X + 32 minus offset for X is 512, not 1024, + // but offset for X + 128 minus offset for X + 96 is 2560. Also, for + // XY = 0...31, the extent of the addresses is 2560, not 1024. At 2bpb, + // addressing repeats every 64x64, and the extent for XY = 0...31 is + // 3072, not 2048. level_layout.array_slice_data_extent_bytes = (level_layout.y_extent_blocks - xenos::kTextureTileWidthHeight) * level_layout.row_pitch_bytes + diff --git a/src/xenia/gpu/texture_util.h b/src/xenia/gpu/texture_util.h index 7e20ab76f..e1f849b42 100644 --- a/src/xenia/gpu/texture_util.h +++ b/src/xenia/gpu/texture_util.h @@ -207,6 +207,11 @@ void GetTextureTotalSize(xenos::DataDimension dimension, // Offset3D(X * 32, Y * 32, Z * 8) + Offset3D(x, y, z) // (true for negative offsets too). // - 2D 32x32 tiles are laid out linearly. +// FIXME(Triang3l): This is wrong for 1bpb and 2bpb. At 1bpb (32x32 is 1024 +// bytes), offset for X + 32 minus offset for X is 512, not 1024, but offset for +// X + 128 minus offset for X + 96 is 2560. Also, for XY = 0...31, the extent of +// the addresses is 2560, not 1024. At 2bpb, addressing repeats every 64x64, and +// the extent for XY = 0...31 is 3072, not 2048. // - 3D tiled texture slices 0:3 and 4:7 are stored separately in memory, in // non-overlapping ranges, but addressing in 4:7 is different than in 0:3. // - Addressing of blocks that are contiguous along X (for tiling/untiling of