From 1cec143810281a1ad7222339e07d602e790c2f38 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 15 Aug 2018 16:59:43 +0300 Subject: [PATCH] [D3D12] DXT3A textures --- .../d3d12/shaders/texture_load_dxt3a.cs.hlsl | 29 +++++++++++++++++++ src/xenia/gpu/d3d12/texture_cache.cc | 24 ++++++++++++--- src/xenia/gpu/d3d12/texture_cache.h | 1 + 3 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 src/xenia/gpu/d3d12/shaders/texture_load_dxt3a.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_dxt3a.cs.hlsl b/src/xenia/gpu/d3d12/shaders/texture_load_dxt3a.cs.hlsl new file mode 100644 index 000000000..fe3672591 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_load_dxt3a.cs.hlsl @@ -0,0 +1,29 @@ +#include "texture_copy.hlsli" + +[numthreads(8, 32, 1)] +void main(uint3 xe_thread_id : SV_DispatchThreadID) { + // 1 thread = 4 DXT3A blocks to 4 DXT3 blocks with zero color. + uint3 block_index = xe_thread_id; + block_index.x <<= 2u; + [branch] if (any(block_index >= xe_texture_copy_size_blocks)) { + return; + } + uint4 block_offsets_guest = + XeTextureCopyGuestBlockOffsets(block_index, 8u, 3u); + uint4 blocks_01 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.x), + xe_texture_copy_source.Load2(block_offsets_guest.y)); + uint4 blocks_23 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.z), + xe_texture_copy_source.Load2(block_offsets_guest.w)); + blocks_01 = XeByteSwap(blocks_01, xe_texture_copy_endianness); + blocks_23 = XeByteSwap(blocks_23, xe_texture_copy_endianness); + uint block_offset_host = XeTextureHostLinearOffset( + block_index, xe_texture_copy_size_blocks.y, xe_texture_copy_host_pitch, + 16u) + xe_texture_copy_host_base; + xe_texture_copy_dest.Store4(block_offset_host, uint4(blocks_01.xy, 0u, 0u)); + xe_texture_copy_dest.Store4(block_offset_host + 16u, + uint4(blocks_01.zw, 0u, 0u)); + xe_texture_copy_dest.Store4(block_offset_host + 32u, + uint4(blocks_23.xy, 0u, 0u)); + xe_texture_copy_dest.Store4(block_offset_host + 48u, + uint4(blocks_23.zw, 0u, 0u)); +} diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 307f0e519..7af3e72bf 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -30,6 +30,7 @@ namespace d3d12 { #include "xenia/gpu/d3d12/shaders/bin/texture_load_64bpb_cs.h" #include "xenia/gpu/d3d12/shaders/bin/texture_load_8bpb_cs.h" #include "xenia/gpu/d3d12/shaders/bin/texture_load_ctx1_cs.h" +#include "xenia/gpu/d3d12/shaders/bin/texture_load_dxt3a_cs.h" const TextureCache::HostFormat TextureCache::host_formats_[64] = { {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_REVERSE @@ -91,7 +92,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11_AS_16_16_16_16 {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10_AS_16_16_16_16 {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A + {DXGI_FORMAT_BC2_UNORM, CopyMode::kDXT3A}, // k_DXT3A {DXGI_FORMAT_BC4_UNORM, CopyMode::k64bpb}, // k_DXT5A {DXGI_FORMAT_R8G8_UNORM, CopyMode::kCTX1}, // k_CTX1 {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A_AS_1_1_1_1 @@ -108,6 +109,7 @@ const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = { {texture_load_32bpb_cs, sizeof(texture_load_32bpb_cs)}, {texture_load_64bpb_cs, sizeof(texture_load_64bpb_cs)}, {texture_load_128bpb_cs, sizeof(texture_load_128bpb_cs)}, + {texture_load_dxt3a_cs, sizeof(texture_load_dxt3a_cs)}, {texture_load_ctx1_cs, sizeof(texture_load_ctx1_cs)}, }; @@ -525,6 +527,8 @@ void TextureCache::TextureKeyFromFetchConstant( return; } + TextureFormat format = GetBaseFormat(TextureFormat(fetch.format)); + key_out.base_page = base_page; key_out.mip_page = mip_page; key_out.dimension = dimension; @@ -534,12 +538,24 @@ void TextureCache::TextureKeyFromFetchConstant( key_out.mip_max_level = mip_max_level; key_out.tiled = fetch.tiled; key_out.packed_mips = fetch.packed_mips; - key_out.format = GetBaseFormat(TextureFormat(fetch.format)); + key_out.format = format; key_out.endianness = Endian(fetch.endianness); + + uint32_t swizzle = fetch.swizzle; // Get rid of 6 and 7 values (to prevent device losses if the game has // something broken) the quick and dirty way - by changing them to 4 and 5. - swizzle_out = fetch.swizzle & - ~((fetch.swizzle & (4 | (4 << 3) | (4 << 6) | (4 << 9))) >> 1); + swizzle &= ~((swizzle & (4 | (4 << 3) | (4 << 6) | (4 << 9))) >> 1); + // Remap the swizzle according to the texture format. + if (format == TextureFormat::k_DXT3A) { + // DXT3A is emulated as DXT3 with zero color, but the alpha should be + // replicated into all channels. + // http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf + // If not 0.0 or 1.0 (if the high bit isn't set), make 3 (alpha). + uint32_t swizzle_not_constant = + ~swizzle & (4 | (4 << 3) | (4 << 6) | (4 << 9)); + swizzle |= (swizzle_not_constant >> 1) | (swizzle_not_constant >> 2); + } + swizzle_out = swizzle; } void TextureCache::LogTextureKeyAction(TextureKey key, const char* action) { diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index f43e6806b..946f19cff 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -90,6 +90,7 @@ class TextureCache { k32bpb, k64bpb, k128bpb, + kDXT3A, kCTX1, kCount,