diff --git a/src/xenia/gpu/d3d12/shaders/texture_address.hlsli b/src/xenia/gpu/d3d12/shaders/texture_address.hlsli index ed6cf945c..3ded3e965 100644 --- a/src/xenia/gpu/d3d12/shaders/texture_address.hlsli +++ b/src/xenia/gpu/d3d12/shaders/texture_address.hlsli @@ -5,13 +5,13 @@ // consecutive blocks along X. // https://github.com/gildor2/UModel/blob/de8fbd3bc922427ea056b7340202dcdcc19ccff5/Unreal/UnTexture.cpp#L495 -uint4 XeTextureTiledOffset2D(uint2 p, uint width, uint log2_bpb) { +uint4 XeTextureTiledOffset2D(uint2 p, uint width, uint bpb_log2) { uint4 x4 = uint4(0u, 1u, 2u, 3u) + p.xxxx; // Top bits of coordinates. uint4 macro = - ((x4 >> 5u) + (p.y >> 5u) * ((width + 31u) >> 5u)) << (log2_bpb + 7u); + ((x4 >> 5u) + (p.y >> 5u) * ((width + 31u) >> 5u)) << (bpb_log2 + 7u); // Lower bits of coordinates (result is 6-bit value). - uint4 micro = ((x4 & 7u) + ((p.y & 0xEu) << 2u)) << log2_bpb; + uint4 micro = ((x4 & 7u) + ((p.y & 0xEu) << 2u)) << bpb_log2; // Mix micro/macro + add few remaining x/y bits. uint4 offset = macro + ((micro & ~0xFu) << 1u) + (micro & 0xFu) + ((p.y & 1u) << 4u); @@ -26,27 +26,27 @@ uint4 XeTextureTiledOffset2D(uint2 p, uint width, uint log2_bpb) { // Reverse-engineered from an executable. // The base/micro/macro names were chosen pretty much at random and don't have // the same meaning as in TiledOffset2D. -uint4 XeTextureTiledOffset3D(uint3 p, uint2 width_height, uint log2_bpb) { +uint4 XeTextureTiledOffset3D(uint3 p, uint2 width_height, uint bpb_log2) { uint4 x4 = uint4(0u, 1u, 2u, 3u) + p.xxxx; uint2 aligned_size = (width_height + 31u) & ~31u; - uint base = ((p.z >> 2u) * ((aligned_size.x * aligned_size.y) >> 4u) + - (p.y >> 4u)) * (aligned_size.x >> 5u); + uint base = ((p.z >> 2u) * (aligned_size.y >> 4u) + (p.y >> 4u)) * + (aligned_size.x >> 5u); uint4 micro = (((p.z >> 2u) + (p.y >> 3u)) & 1u).xxxx; micro += (((micro << 1u) + (x4 >> 3u)) & 3u) << 1u; - uint4 macro = (((x4 & 7u) + ((p.y & 6u) << 2u)) << (log2_bpb + 6u)) >> 6u; - macro = (((((((x4 >> 5u) + base) << (log2_bpb + 6u)) & 0xFFFFFFFu) << 1u) + + uint4 macro = (((x4 & 7u) + ((p.y & 6u) << 2u)) << (bpb_log2 + 6u)) >> 6u; + macro = (((((((x4 >> 5u) + base) << (bpb_log2 + 6u)) & 0xFFFFFFFu) << 1u) + (macro & ~15u)) << 1u) + (macro & 15u) + - ((p.z & 3u) << (log2_bpb + 6u)) + ((p.y & 1u) << 4u); + ((p.z & 3u) << (bpb_log2 + 6u)) + ((p.y & 1u) << 4u); return ((((((((macro >> 6u) & 7u) + ((micro & 1u) << 3u)) << 3u) + (micro & ~1u)) << 2u) + (macro & ~511u)) << 3u) + (macro & 63u); } uint XeTextureGuestLinearOffset(uint3 p, uint height, uint pitch, uint bpb) { - return p.x * bpb + ((p.z * ((height + 31u) & ~31u) + pitch) * p.y); + return p.x * bpb + (p.z * ((height + 31u) & ~31u) + p.y) * pitch; } uint XeTextureHostLinearOffset(uint3 p, uint height, uint pitch, uint bpb) { - return p.x * bpb + ((p.z * height + pitch) * p.y); + return p.x * bpb + (p.z * height + p.y) * pitch; } #endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_ diff --git a/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli b/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli index 131872f2d..ee7aa1ef5 100644 --- a/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli +++ b/src/xenia/gpu/d3d12/shaders/texture_copy.hlsli @@ -25,4 +25,26 @@ cbuffer xe_texture_copy_constants : register(b0) { ByteAddressBuffer xe_texture_copy_source : register(t0); RWByteAddressBuffer xe_texture_copy_dest : register(u0); +// bpb and bpb_log2 are separate because bpb may be not a power of 2 (like 96). +uint4 XeTextureCopyGuestBlockOffsets(uint3 block_index, uint bpb, + uint bpb_log2) { + uint3 block_index_guest = block_index + xe_texture_copy_guest_mip_offset; + uint4 block_offsets_guest; + [branch] if (xe_texture_copy_guest_pitch == XeTextureCopyGuestPitchTiled) { + [branch] if (xe_texture_copy_is_3d) { + block_offsets_guest = XeTextureTiledOffset3D( + block_index_guest, xe_texture_copy_size.xy, bpb_log2); + } else { + block_offsets_guest = XeTextureTiledOffset2D( + block_index_guest.xy, xe_texture_copy_size.x, bpb_log2); + } + } else { + block_offsets_guest = + uint4(0u, 1u, 2u, 3u) * bpb + XeTextureGuestLinearOffset( + block_index_guest, xe_texture_copy_size.y, + xe_texture_copy_guest_pitch, 16u); + } + return block_offsets_guest + xe_texture_copy_guest_base; +} + #endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_ diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_128bpb.cs.hlsl b/src/xenia/gpu/d3d12/shaders/texture_load_128bpb.cs.hlsl new file mode 100644 index 000000000..c574dc446 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_load_128bpb.cs.hlsl @@ -0,0 +1,29 @@ +#include "texture_copy.hlsli" + +[numthreads(8, 32, 1)] +void main(uint3 xe_thread_id : SV_DispatchThreadID) { + // 1 thread = 4 uint4 blocks. + uint3 block_index = xe_thread_id; + block_index.x <<= 2u; + [branch] if (any(block_index >= xe_texture_copy_size)) { + return; + } + uint4 block_offsets_guest = + XeTextureCopyGuestBlockOffsets(block_index, 16u, 4u); + uint4 block_0 = xe_texture_copy_source.Load4(block_offsets_guest.x); + uint4 block_1 = xe_texture_copy_source.Load4(block_offsets_guest.y); + uint4 block_2 = xe_texture_copy_source.Load4(block_offsets_guest.z); + uint4 block_3 = xe_texture_copy_source.Load4(block_offsets_guest.w); + block_0 = XeByteSwap(block_0, xe_texture_copy_endianness); + block_1 = XeByteSwap(block_1, xe_texture_copy_endianness); + block_2 = XeByteSwap(block_2, xe_texture_copy_endianness); + block_3 = XeByteSwap(block_3, xe_texture_copy_endianness); + uint block_offset_host = XeTextureHostLinearOffset( + block_index, xe_texture_copy_size.y, xe_texture_copy_host_pitch, 16u) + + xe_texture_copy_host_base; + uint4 block_offsets_host = uint4(0u, 16u, 32u, 48u) + block_offset_host; + xe_texture_copy_dest.Store4(block_offsets_host.x, block_0); + xe_texture_copy_dest.Store4(block_offsets_host.y, block_1); + xe_texture_copy_dest.Store4(block_offsets_host.z, block_2); + xe_texture_copy_dest.Store4(block_offsets_host.w, block_3); +} diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_32bpb.cs.hlsl b/src/xenia/gpu/d3d12/shaders/texture_load_32bpb.cs.hlsl new file mode 100644 index 000000000..bfef0d9c3 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/texture_load_32bpb.cs.hlsl @@ -0,0 +1,22 @@ +#include "texture_copy.hlsli" + +[numthreads(8, 32, 1)] +void main(uint3 xe_thread_id : SV_DispatchThreadID) { + // 1 thread = 4 uint blocks. + uint3 block_index = xe_thread_id; + block_index.x <<= 2u; + [branch] if (any(block_index >= xe_texture_copy_size)) { + return; + } + uint4 block_offsets_guest = + XeTextureCopyGuestBlockOffsets(block_index, 4u, 2u); + uint4 blocks = uint4(xe_texture_copy_source.Load(block_offsets_guest.x), + xe_texture_copy_source.Load(block_offsets_guest.y), + xe_texture_copy_source.Load(block_offsets_guest.z), + xe_texture_copy_source.Load(block_offsets_guest.w)); + blocks = XeByteSwap(blocks, xe_texture_copy_endianness); + uint block_offset_host = XeTextureHostLinearOffset( + block_index, xe_texture_copy_size.y, xe_texture_copy_host_pitch, 4u) + + xe_texture_copy_host_base; + xe_texture_copy_dest.Store4(block_offset_host, blocks); +} diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl b/src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl index 1d2d17443..f886237aa 100644 --- a/src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl @@ -8,22 +8,8 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) { [branch] if (any(block_index >= xe_texture_copy_size)) { return; } - uint3 block_index_guest = block_index + xe_texture_copy_guest_mip_offset; - uint4 block_offsets_guest; - [branch] if (xe_texture_copy_guest_pitch == XeTextureCopyGuestPitchTiled) { - [branch] if (xe_texture_copy_is_3d) { - block_offsets_guest = XeTextureTiledOffset3D( - block_index_guest, xe_texture_copy_size.xy, 3u); - } else { - block_offsets_guest = XeTextureTiledOffset2D( - block_index_guest.xy, xe_texture_copy_size.x, 3u); - } - } else { - block_offsets_guest = uint4(0u, 8u, 16u, 24u) + XeTextureGuestLinearOffset( - block_index_guest, xe_texture_copy_size.y, xe_texture_copy_guest_pitch, - 8u); - } - block_offsets_guest += xe_texture_copy_guest_base; + uint4 block_offsets_guest = + XeTextureCopyGuestBlockOffsets(block_index, 8u, 3u); uint4 blocks_01 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.x), xe_texture_copy_source.Load2(block_offsets_guest.y)); uint4 blocks_23 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.z), diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 922ae0ec0..bf9227abb 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -24,64 +24,67 @@ namespace gpu { namespace d3d12 { // Generated with `xb buildhlsl`. +#include "xenia/gpu/d3d12/shaders/bin/texture_load_128bpb_cs.h" +#include "xenia/gpu/d3d12/shaders/bin/texture_load_32bpb_cs.h" #include "xenia/gpu/d3d12/shaders/bin/texture_load_64bpb_cs.h" const TextureCache::HostFormat TextureCache::host_formats_[64] = { - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_REVERSE - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_5_5_5 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_5_6_5 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_6_5_5 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_A - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_B - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Cr_Y1_Cb_Y0 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Y1_Cr_Y0_Cb - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Shadow - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_A - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_4_4_4_4 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10 - {DXGI_FORMAT_BC1_UNORM, CopyMode::k64Bpb}, // k_DXT1 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT2_3 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT4_5 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXV - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_EXPAND - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_EXPAND - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16_EXPAND - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXN - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_AS_16_16_16_16 - {DXGI_FORMAT_BC1_UNORM, CopyMode::k64Bpb}, // k_DXT1_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT2_3_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT4_5_AS_16_16_16_16 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_AS_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_REVERSE + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_5_5_5 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_5_6_5 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_6_5_5 + {DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8 + {DXGI_FORMAT_R10G10B10A2_UNORM, CopyMode::k32bpb}, // k_2_10_10_10 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_A + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_B + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Cr_Y1_Cb_Y0 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Y1_Cr_Y0_Cb + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Shadow + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_A + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_4_4_4_4 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10 + {DXGI_FORMAT_BC1_UNORM, CopyMode::k64bpb}, // k_DXT1 + {DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3 + {DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXV + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16 + {DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16 + {DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_EXPAND + {DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16_EXPAND + {DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16_EXPAND + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_FLOAT + {DXGI_FORMAT_R16G16_FLOAT, CopyMode::k32bpb}, // k_16_16_FLOAT + {DXGI_FORMAT_R16G16B16A16_FLOAT, CopyMode::k64bpb}, // k_16_16_16_16_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32 + {DXGI_FORMAT_R32_FLOAT, CopyMode::k32bpb}, // k_32_FLOAT + {DXGI_FORMAT_R32G32_FLOAT, CopyMode::k64bpb}, // k_32_32_FLOAT + {DXGI_FORMAT_R32G32B32A32_FLOAT, CopyMode::k128bpb}, // k_32_32_32_32_FLOAT + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8 + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG_INTERLACED + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXN + {DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8_AS_16_16_16_16 + {DXGI_FORMAT_BC1_UNORM, CopyMode::k64bpb}, // k_DXT1_AS_16_16_16_16 + {DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3_AS_16_16_16_16 + {DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5_AS_16_16_16_16 + {DXGI_FORMAT_R10G10B10A2_UNORM, + CopyMode::k32bpb}, // k_2_10_10_10_AS_16_16_16_16 {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11_AS_16_16_16_16 {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10_AS_16_16_16_16 {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_FLOAT @@ -89,15 +92,18 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = { {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT5A {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_CTX1 {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A_AS_1_1_1_1 - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_GAMMA - {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_FLOAT + {DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8_GAMMA + {DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_FLOAT }; const char* const TextureCache::dimension_names_[4] = {"1D", "2D", "3D", "cube"}; const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = { - {texture_load_64bpb_cs, sizeof(texture_load_64bpb_cs)}}; + {texture_load_32bpb_cs, sizeof(texture_load_32bpb_cs)}, + {texture_load_64bpb_cs, sizeof(texture_load_64bpb_cs)}, + {texture_load_128bpb_cs, sizeof(texture_load_128bpb_cs)}, +}; TextureCache::TextureCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file, @@ -810,9 +816,8 @@ bool TextureCache::LoadTextureData(Texture* texture) { command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); - // TODO(Triang3l): Uncomment when done testing untiling shaders. - /* texture->base_in_sync = true; - texture->mips_in_sync = true; */ + texture->base_in_sync = true; + texture->mips_in_sync = true; LogTextureAction(texture, "Loaded"); return true; diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index a05336341..e67086bb3 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -86,7 +86,9 @@ class TextureCache { }; enum class CopyMode { - k64Bpb, + k32bpb, + k64bpb, + k128bpb, kCount,