[D3D12] 32bpp tiling shader

This commit is contained in:
Triang3l 2018-08-25 01:16:35 +03:00
parent 4a747b3b81
commit 2c6224ad37
9 changed files with 492 additions and 150 deletions

View File

@ -1208,7 +1208,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
D3D12_RESOURCE_STATES copy_buffer_state =
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
render_target->copy_buffer_size, copy_buffer_state);
std::max(render_target->copy_buffer_size,
resolve_target->copy_buffer_size),
copy_buffer_state);
if (copy_buffer == nullptr) {
return false;
}
@ -1289,10 +1291,6 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
nullptr);
// Done with the copy buffer.
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
// Do the resolve. Render targets unbound already, safe to call
// OMSetRenderTargets.
@ -1417,7 +1415,40 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
command_list->DrawInstanced(3, 1, 0, 0);
// TODO(Triang3l): Tile the resolve target in the texture cache.
// Copy the resolve target to the buffer.
command_processor_->PushTransitionBarrier(resolve_target->resource,
resolve_target->state,
D3D12_RESOURCE_STATE_COPY_SOURCE);
resolve_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE;
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
D3D12_RESOURCE_STATE_COPY_DEST);
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
command_processor_->SubmitBarriers();
location_source.pResource = resolve_target->resource;
location_source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_source.SubresourceIndex = 0;
location_dest.pResource = copy_buffer;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
location_dest.PlacedFootprint = resolve_target->footprint;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
nullptr);
// Tile the resolved texture. The texture cache expects the buffer to be a
// non-pixel-shader SRV.
command_processor_->PushTransitionBarrier(
copy_buffer, copy_buffer_state,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
texture_cache->TileResolvedTexture(
dest_format, dest_address, dest_pitch, dest_height, copy_width,
copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size,
resolve_target->footprint);
// Done with the copy buffer.
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
}
return true;
@ -1566,6 +1597,11 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
resolve_target->rtv_handle.ptr = rtv_handle.ptr;
resolve_target->key.value = key.value;
resolve_target->heap_page_first = min_heap_page_first;
UINT64 copy_buffer_size;
device->GetCopyableFootprints(&resource_desc, 0, 1, 0,
&resolve_target->footprint, nullptr, nullptr,
&copy_buffer_size);
resolve_target->copy_buffer_size = uint32_t(copy_buffer_size);
resolve_targets_.insert(std::make_pair(key.value, resolve_target));
return resolve_target;

View File

@ -343,7 +343,11 @@ class RenderTargetCache {
D3D12_RESOURCE_STATES state;
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
ResolveTargetKey key;
// The first 4 MB page in the heaps.
uint32_t heap_page_first;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
// Buffer size needed to copy the resolve target to a linear buffer.
uint32_t copy_buffer_size;
};
void ClearBindings();

View File

@ -32,18 +32,26 @@ XE_BYTE_SWAP_16_OVERLOAD(uint3)
XE_BYTE_SWAP_16_OVERLOAD(uint4)
uint2 XeByteSwap64(uint2 v, uint endian) {
if (endian & 4u) {
if ((endian & 4u) != 0u) {
v = v.yx;
endian = 2u;
}
return XeByteSwap(v, endian);
}
uint4 XeByteSwap64(uint4 v, uint endian) {
if (endian & 4u) {
if ((endian & 4u) != 0u) {
v = v.yxwz;
endian = 2u;
}
return XeByteSwap(v, endian);
}
uint4 XeByteSwap128(uint4 v, uint endian) {
if ((endian & 4u) != 0u) {
v = ((endian & 1u) != 0u) ? v.wzyx /* 8in128 */ : v.yxwz /* 8in64 */;
endian = 2u;
}
return XeByteSwap(v, endian);
}
#endif // XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_

View File

@ -1,4 +1,4 @@
cbuffer XeResolveCbuffer : register(b0) {
cbuffer XeResolveConstants : register(b0) {
// In samples.
// Left and top in the lower 16 bits, width and height in the upper.
uint2 xe_resolve_rect_samples;

View File

@ -4,7 +4,7 @@
#include "byte_swap.hlsli"
#include "texture_address.hlsli"
cbuffer xe_texture_copy_constants : register(b0) {
cbuffer XeTextureCopyConstants : register(b0) {
uint xe_texture_copy_guest_base;
// For linear textures - row byte pitch.
uint xe_texture_copy_guest_pitch;

View File

@ -0,0 +1,26 @@
#ifndef XENIA_GPU_D3D12_SHADERS_TEXTURE_TILE_HLSLI_
#define XENIA_GPU_D3D12_SHADERS_TEXTURE_TILE_HLSLI_
#include "byte_swap.hlsli"
#include "texture_address.hlsli"
cbuffer XeTextureTileConstants : register(b0) {
// Either from the start of the shared memory or from the start of the typed
// UAV, in bytes.
uint xe_texture_tile_guest_base;
// 0:2 - endianness (up to Xin128).
// 3:31 - actual guest texture width.
uint xe_texture_tile_endian_guest_pitch;
// Size to copy, texels with index bigger than this won't be written.
// Width in the lower 16 bits, height in the upper.
uint xe_texture_tile_size;
// Byte offset to the first texel from the beginning of the source buffer.
uint xe_texture_tile_host_base;
// Row pitch of the source buffer.
uint xe_texture_tile_host_pitch;
}
ByteAddressBuffer xe_texture_tile_source : register(t0);
// The target is u0, may be a raw UAV or a typed UAV depending on the format.
#endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_TILE_HLSLI_

View File

@ -0,0 +1,31 @@
#include "texture_tile.hlsli"
RWByteAddressBuffer xe_texture_tile_dest : register(u0);
[numthreads(8, 32, 1)]
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
// 1 thread = 4 texels.
uint2 texture_size = (xe_texture_tile_size >> uint2(0u, 16u)) & 0xFFFFu;
uint2 texel_index = xe_thread_id.xy;
texel_index.x <<= 2u;
[branch] if (any(texel_index >= texture_size)) {
return;
}
uint4 texels = xe_texture_tile_source.Load4(
xe_texture_tile_host_base + texel_index.y * xe_texture_tile_host_pitch +
texel_index.x * 4u);
texels = XeByteSwap(texels, xe_texture_tile_endian_guest_pitch & 7u);
uint4 texel_addresses = xe_texture_tile_guest_base + XeTextureTiledOffset2D(
texel_index, xe_texture_tile_endian_guest_pitch >> 3u, 2u);
xe_texture_tile_dest.Store(texel_addresses.x, texels.x);
bool3 texels_inside = uint3(1u, 2u, 3u) + texel_index.x < texture_size.x;
[branch] if (texels_inside.x) {
xe_texture_tile_dest.Store(texel_addresses.y, texels.y);
[branch] if (texels_inside.y) {
xe_texture_tile_dest.Store(texel_addresses.z, texels.z);
[branch] if (texels_inside.z) {
xe_texture_tile_dest.Store(texel_addresses.w, texels.w);
}
}
}
}

View File

@ -34,81 +34,145 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_float_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_unorm_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/texture_load_dxt3a_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/texture_tile_32bpp_cs.h"
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_REVERSE
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1
{DXGI_FORMAT_R8_UNORM, CopyMode::k8bpb}, // k_8
{DXGI_FORMAT_B5G5R5A1_UNORM, CopyMode::k16bpb}, // k_1_5_5_5
{DXGI_FORMAT_B5G6R5_UNORM, CopyMode::k16bpb}, // k_5_6_5
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_6_5_5
{DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8
{DXGI_FORMAT_R10G10B10A2_UNORM, CopyMode::k32bpb}, // k_2_10_10_10
{DXGI_FORMAT_R8_UNORM, CopyMode::k8bpb}, // k_8_A
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_B
{DXGI_FORMAT_R8G8_UNORM, CopyMode::k16bpb}, // k_8_8
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Cr_Y1_Cb_Y0_REP
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Y1_Cr_Y0_Cb_REP
{DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16_EDRAM
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_A
{DXGI_FORMAT_B4G4R4A4_UNORM, CopyMode::k16bpb}, // k_4_4_4_4
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10
{DXGI_FORMAT_BC1_UNORM, CopyMode::k64bpb}, // k_DXT1
{DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3
{DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5
{DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16_EDRAM
// k_1_REVERSE
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_1
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_8
{DXGI_FORMAT_R8_UNORM, LoadMode::k8bpb, TileMode::kUnknown},
// k_1_5_5_5
{DXGI_FORMAT_B5G5R5A1_UNORM, LoadMode::k16bpb, TileMode::kUnknown},
// k_5_6_5
{DXGI_FORMAT_B5G6R5_UNORM, LoadMode::k16bpb, TileMode::kUnknown},
// k_6_5_5
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_8_8_8_8
{DXGI_FORMAT_R8G8B8A8_UNORM, LoadMode::k32bpb, TileMode::k32bpp},
// k_2_10_10_10
{DXGI_FORMAT_R10G10B10A2_UNORM, LoadMode::k32bpb, TileMode::kUnknown},
// k_8_A
{DXGI_FORMAT_R8_UNORM, LoadMode::k8bpb, TileMode::kUnknown},
// k_8_B
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_8_8
{DXGI_FORMAT_R8G8_UNORM, LoadMode::k16bpb, TileMode::kUnknown},
// k_Cr_Y1_Cb_Y0_REP
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_Y1_Cr_Y0_Cb_REP
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_16_16_EDRAM
{DXGI_FORMAT_R16G16_UNORM, LoadMode::k32bpb, TileMode::kUnknown},
// k_8_8_8_8_A
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_4_4_4_4
{DXGI_FORMAT_B4G4R4A4_UNORM, LoadMode::k16bpb, TileMode::kUnknown},
// k_10_11_11
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_11_11_10
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_DXT1
{DXGI_FORMAT_BC1_UNORM, LoadMode::k64bpb, TileMode::kUnknown},
// k_DXT2_3
{DXGI_FORMAT_BC2_UNORM, LoadMode::k128bpb, TileMode::kUnknown},
// k_DXT4_5
{DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, TileMode::kUnknown},
// k_16_16_16_16_EDRAM
{DXGI_FORMAT_R16G16B16A16_UNORM, LoadMode::k64bpb, TileMode::kUnknown},
// R32_FLOAT for depth because shaders would require an additional SRV to
// sample stencil, which we don't provide.
{DXGI_FORMAT_R32_FLOAT, CopyMode::kDepthUnorm}, // k_24_8
{DXGI_FORMAT_R32_FLOAT, CopyMode::kDepthFloat}, // k_24_8_FLOAT
{DXGI_FORMAT_R16_UNORM, CopyMode::k16bpb}, // k_16
{DXGI_FORMAT_R16G16_UNORM, CopyMode::k32bpb}, // k_16_16
{DXGI_FORMAT_R16G16B16A16_UNORM, CopyMode::k64bpb}, // k_16_16_16_16
{DXGI_FORMAT_R16_FLOAT, CopyMode::k16bpb}, // k_16_EXPAND
{DXGI_FORMAT_R16G16_FLOAT, CopyMode::k32bpb}, // k_16_16_EXPAND
{DXGI_FORMAT_R16G16B16A16_FLOAT, CopyMode::k64bpb}, // k_16_16_16_16_EXPAND
{DXGI_FORMAT_R16_FLOAT, CopyMode::k16bpb}, // k_16_FLOAT
{DXGI_FORMAT_R16G16_FLOAT, CopyMode::k32bpb}, // k_16_16_FLOAT
{DXGI_FORMAT_R16G16B16A16_FLOAT, CopyMode::k64bpb}, // k_16_16_16_16_FLOAT
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32
{DXGI_FORMAT_R32_FLOAT, CopyMode::k32bpb}, // k_32_FLOAT
{DXGI_FORMAT_R32G32_FLOAT, CopyMode::k64bpb}, // k_32_32_FLOAT
{DXGI_FORMAT_R32G32B32A32_FLOAT, CopyMode::k128bpb}, // k_32_32_32_32_FLOAT
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_INTERLACED
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG_INTERLACED
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG_INTERLACED
{DXGI_FORMAT_BC5_UNORM, CopyMode::k128bpb}, // k_DXN
{DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8_AS_16_16_16_16
{DXGI_FORMAT_BC1_UNORM, CopyMode::k64bpb}, // k_DXT1_AS_16_16_16_16
{DXGI_FORMAT_BC2_UNORM, CopyMode::k128bpb}, // k_DXT2_3_AS_16_16_16_16
{DXGI_FORMAT_BC3_UNORM, CopyMode::k128bpb}, // k_DXT4_5_AS_16_16_16_16
{DXGI_FORMAT_R10G10B10A2_UNORM,
CopyMode::k32bpb}, // k_2_10_10_10_AS_16_16_16_16
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11_AS_16_16_16_16
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10_AS_16_16_16_16
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_FLOAT
{DXGI_FORMAT_BC2_UNORM, CopyMode::kDXT3A}, // k_DXT3A
{DXGI_FORMAT_BC4_UNORM, CopyMode::k64bpb}, // k_DXT5A
{DXGI_FORMAT_R8G8_UNORM, CopyMode::kCTX1}, // k_CTX1
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A_AS_1_1_1_1
{DXGI_FORMAT_R8G8B8A8_UNORM, CopyMode::k32bpb}, // k_8_8_8_8_GAMMA
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_FLOAT_EDRAM
// k_24_8
{DXGI_FORMAT_R32_FLOAT, LoadMode::kDepthUnorm, TileMode::kUnknown},
// k_24_8_FLOAT
{DXGI_FORMAT_R32_FLOAT, LoadMode::kDepthFloat, TileMode::kUnknown},
// k_16
{DXGI_FORMAT_R16_UNORM, LoadMode::k16bpb, TileMode::kUnknown},
// k_16_16
{DXGI_FORMAT_R16G16_UNORM, LoadMode::k32bpb, TileMode::kUnknown},
// k_16_16_16_16
{DXGI_FORMAT_R16G16B16A16_UNORM, LoadMode::k64bpb, TileMode::kUnknown},
// k_16_EXPAND
{DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, TileMode::kUnknown},
// k_16_16_EXPAND
{DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, TileMode::kUnknown},
// k_16_16_16_16_EXPAND
{DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::k64bpb, TileMode::kUnknown},
// k_16_FLOAT
{DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, TileMode::kUnknown},
// k_16_16_FLOAT
{DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, TileMode::kUnknown},
// k_16_16_16_16_FLOAT
{DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::k64bpb, TileMode::kUnknown},
// k_32
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_32_32
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_32_32_32_32
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_32_FLOAT
{DXGI_FORMAT_R32_FLOAT, LoadMode::k32bpb, TileMode::kUnknown},
// k_32_32_FLOAT
{DXGI_FORMAT_R32G32_FLOAT, LoadMode::k64bpb, TileMode::kUnknown},
// k_32_32_32_32_FLOAT
{DXGI_FORMAT_R32G32B32A32_FLOAT, LoadMode::k128bpb, TileMode::kUnknown},
// k_32_AS_8
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_32_AS_8_8
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_16_MPEG
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_16_16_MPEG
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_32_AS_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_32_AS_8_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_16_INTERLACED
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_16_MPEG_INTERLACED
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_16_16_MPEG_INTERLACED
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_DXN
{DXGI_FORMAT_BC5_UNORM, LoadMode::k128bpb, TileMode::kUnknown},
// k_8_8_8_8_AS_16_16_16_16
{DXGI_FORMAT_R8G8B8A8_UNORM, LoadMode::k32bpb, TileMode::kUnknown},
// k_DXT1_AS_16_16_16_16
{DXGI_FORMAT_BC1_UNORM, LoadMode::k64bpb, TileMode::kUnknown},
// k_DXT2_3_AS_16_16_16_16
{DXGI_FORMAT_BC2_UNORM, LoadMode::k128bpb, TileMode::kUnknown},
// k_DXT4_5_AS_16_16_16_16
{DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, TileMode::kUnknown},
// k_2_10_10_10_AS_16_16_16_16
{DXGI_FORMAT_R10G10B10A2_UNORM, LoadMode::k32bpb, TileMode::kUnknown},
// k_10_11_11_AS_16_16_16_16
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_11_11_10_AS_16_16_16_16
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_32_32_32_FLOAT
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_DXT3A
{DXGI_FORMAT_BC2_UNORM, LoadMode::kDXT3A, TileMode::kUnknown},
// k_DXT5A
{DXGI_FORMAT_BC4_UNORM, LoadMode::k64bpb, TileMode::kUnknown},
// k_CTX1
{DXGI_FORMAT_R8G8_UNORM, LoadMode::kCTX1, TileMode::kUnknown},
// k_DXT3A_AS_1_1_1_1
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
// k_8_8_8_8_GAMMA
{DXGI_FORMAT_R8G8B8A8_UNORM, LoadMode::k32bpb, TileMode::kUnknown},
// k_2_10_10_10_FLOAT_EDRAM
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
};
const char* const TextureCache::dimension_names_[4] = {"1D", "2D", "3D",
"cube"};
const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = {
const TextureCache::LoadModeInfo TextureCache::load_mode_info_[] = {
{texture_load_8bpb_cs, sizeof(texture_load_8bpb_cs)},
{texture_load_16bpb_cs, sizeof(texture_load_16bpb_cs)},
{texture_load_32bpb_cs, sizeof(texture_load_32bpb_cs)},
@ -120,6 +184,10 @@ const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = {
{texture_load_depth_float_cs, sizeof(texture_load_depth_float_cs)},
};
const TextureCache::TileModeInfo TextureCache::tile_mode_info_[] = {
{texture_tile_32bpp_cs, sizeof(texture_tile_32bpp_cs)},
};
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file,
SharedMemory* shared_memory)
@ -133,7 +201,7 @@ bool TextureCache::Initialize() {
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
// Create the copying root signature.
// Create the loading root signature.
D3D12_ROOT_PARAMETER root_parameters[2];
// Parameter 0 is constants (changed very often when untiling).
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
@ -167,7 +235,7 @@ bool TextureCache::Initialize() {
if (FAILED(D3D12SerializeRootSignature(
&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1,
&root_signature_blob, &root_signature_error_blob))) {
XELOGE("Failed to serialize the texture copying root signature");
XELOGE("Failed to serialize the texture loading root signature");
if (root_signature_error_blob != nullptr) {
XELOGE("%s", reinterpret_cast<const char*>(
root_signature_error_blob->GetBufferPointer()));
@ -178,37 +246,81 @@ bool TextureCache::Initialize() {
}
if (root_signature_error_blob != nullptr) {
root_signature_error_blob->Release();
root_signature_error_blob = nullptr;
}
if (FAILED(device->CreateRootSignature(
0, root_signature_blob->GetBufferPointer(),
root_signature_blob->GetBufferSize(),
IID_PPV_ARGS(&copy_root_signature_)))) {
XELOGE("Failed to create the texture copying root signature");
IID_PPV_ARGS(&load_root_signature_)))) {
XELOGE("Failed to create the texture loading root signature");
root_signature_blob->Release();
Shutdown();
return false;
}
root_signature_blob->Release();
// Create the tiling root signature (almost the same, but with root constants
// in parameter 0).
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
root_parameters[0].Constants.ShaderRegister = 0;
root_parameters[0].Constants.RegisterSpace = 0;
root_parameters[0].Constants.Num32BitValues =
sizeof(TileConstants) / sizeof(uint32_t);
if (FAILED(D3D12SerializeRootSignature(
&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1,
&root_signature_blob, &root_signature_error_blob))) {
XELOGE("Failed to serialize the texture tiling root signature");
if (root_signature_error_blob != nullptr) {
XELOGE("%s", reinterpret_cast<const char*>(
root_signature_error_blob->GetBufferPointer()));
root_signature_error_blob->Release();
}
Shutdown();
return false;
}
if (root_signature_error_blob != nullptr) {
root_signature_error_blob->Release();
root_signature_error_blob = nullptr;
}
if (FAILED(device->CreateRootSignature(
0, root_signature_blob->GetBufferPointer(),
root_signature_blob->GetBufferSize(),
IID_PPV_ARGS(&tile_root_signature_)))) {
XELOGE("Failed to create the texture tiling root signature");
root_signature_blob->Release();
Shutdown();
return false;
}
root_signature_blob->Release();
// Create the copying pipelines.
// Create the loading and tiling pipelines.
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
pipeline_desc.pRootSignature = copy_root_signature_;
pipeline_desc.pRootSignature = load_root_signature_;
pipeline_desc.NodeMask = 0;
pipeline_desc.CachedPSO.pCachedBlob = nullptr;
pipeline_desc.CachedPSO.CachedBlobSizeInBytes = 0;
pipeline_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) {
const CopyModeInfo& mode_info = copy_mode_info_[i];
if (mode_info.load_shader != nullptr) {
pipeline_desc.CS.pShaderBytecode = mode_info.load_shader;
pipeline_desc.CS.BytecodeLength = mode_info.load_shader_size;
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
const LoadModeInfo& mode_info = load_mode_info_[i];
pipeline_desc.CS.pShaderBytecode = mode_info.shader;
pipeline_desc.CS.BytecodeLength = mode_info.shader_size;
if (FAILED(device->CreateComputePipelineState(
&pipeline_desc, IID_PPV_ARGS(&copy_load_pipelines_[i])))) {
XELOGE("Failed to create the texture copying pipeline for mode %u", i);
&pipeline_desc, IID_PPV_ARGS(&load_pipelines_[i])))) {
XELOGE("Failed to create the texture loading pipeline for mode %u", i);
Shutdown();
return false;
}
}
pipeline_desc.pRootSignature = tile_root_signature_;
for (uint32_t i = 0; i < uint32_t(TileMode::kCount); ++i) {
const TileModeInfo& mode_info = tile_mode_info_[i];
pipeline_desc.CS.pShaderBytecode = mode_info.shader;
pipeline_desc.CS.BytecodeLength = mode_info.shader_size;
if (FAILED(device->CreateComputePipelineState(
&pipeline_desc, IID_PPV_ARGS(&tile_pipelines_[i])))) {
XELOGE("Failed to create the texture tiling pipeline for mode %u", i);
Shutdown();
return false;
}
}
return true;
@ -217,15 +329,25 @@ bool TextureCache::Initialize() {
void TextureCache::Shutdown() {
ClearCache();
for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) {
if (copy_load_pipelines_[i] != nullptr) {
copy_load_pipelines_[i]->Release();
copy_load_pipelines_[i] = nullptr;
for (uint32_t i = 0; i < uint32_t(TileMode::kCount); ++i) {
if (tile_pipelines_[i] != nullptr) {
tile_pipelines_[i]->Release();
tile_pipelines_[i] = nullptr;
}
}
if (copy_root_signature_ != nullptr) {
copy_root_signature_->Release();
copy_root_signature_ = nullptr;
if (tile_root_signature_ != nullptr) {
tile_root_signature_->Release();
tile_root_signature_ = nullptr;
}
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
if (load_pipelines_[i] != nullptr) {
load_pipelines_[i]->Release();
load_pipelines_[i] = nullptr;
}
}
if (load_root_signature_ != nullptr) {
load_root_signature_->Release();
load_root_signature_ = nullptr;
}
}
@ -451,14 +573,87 @@ void TextureCache::WriteSampler(uint32_t fetch_constant,
}
DXGI_FORMAT TextureCache::GetResolveDXGIFormat(TextureFormat format) {
// TODO(Triang3l): Change this to a check whether there is a tiling pipeline.
switch (format) {
case TextureFormat::k_8_8_8_8:
return host_formats_[uint32_t(format)].dxgi_format;
default:
break;
const HostFormat& host_format = host_formats_[uint32_t(format)];
return host_format.tile_mode != TileMode::kUnknown ? host_format.dxgi_format
: DXGI_FORMAT_UNKNOWN;
}
return DXGI_FORMAT_UNKNOWN;
bool TextureCache::TileResolvedTexture(
TextureFormat format, uint32_t texture_base, uint32_t texture_pitch,
uint32_t texture_height, uint32_t resolve_width, uint32_t resolve_height,
Endian128 endian, ID3D12Resource* buffer, uint32_t buffer_size,
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint) {
TileMode tile_mode = host_formats_[uint32_t(format)].tile_mode;
if (tile_mode == TileMode::kUnknown) {
assert_always();
return false;
}
auto command_list = command_processor_->GetCurrentCommandList();
if (command_list == nullptr) {
return false;
}
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
texture_base &= 0x1FFFFFFF;
// TODO(Triang3l): Allow smaller alignment for 8- and 16-bit textures (but
// probably not really needed).
assert_false(texture_base & 0x3);
// Calculate the texture size for memory operations and ensure we can write to
// the specified shared memory location.
uint32_t texture_size = texture_util::GetGuestMipStorageSize(
xe::align(texture_pitch, 32u), xe::align(texture_height, 32u), 1, true,
format, nullptr);
if (!shared_memory_->MakeTilesResident(texture_base, texture_size)) {
return false;
}
// Tile the texture.
// TODO(Triang3l): Typed UAVs for 8- and 16-bit textures.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(0, 2, 2, descriptor_cpu_start,
descriptor_gpu_start) == 0) {
return false;
}
shared_memory_->UseForWriting();
command_processor_->SubmitBarriers();
command_list->SetComputeRootSignature(tile_root_signature_);
TileConstants tile_constants;
tile_constants.guest_base = texture_base;
tile_constants.endian_guest_pitch = uint32_t(endian) | (texture_pitch << 3);
tile_constants.size = resolve_width | (resolve_height << 16);
tile_constants.host_base = uint32_t(footprint.Offset);
tile_constants.host_pitch = uint32_t(footprint.Footprint.RowPitch);
command_list->SetComputeRoot32BitConstants(
0, sizeof(tile_constants) / sizeof(uint32_t), &tile_constants, 0);
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Buffer.FirstElement = 0;
srv_desc.Buffer.NumElements = buffer_size;
srv_desc.Buffer.StructureByteStride = 0;
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
device->CreateShaderResourceView(buffer, &srv_desc, descriptor_cpu_start);
D3D12_CPU_DESCRIPTOR_HANDLE uav_cpu_handle;
uav_cpu_handle.ptr =
descriptor_cpu_start.ptr + provider->GetDescriptorSizeView();
shared_memory_->CreateRawUAV(uav_cpu_handle);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_processor_->SetComputePipeline(tile_pipelines_[uint32_t(tile_mode)]);
command_list->Dispatch((resolve_width + 31) >> 5, (resolve_height + 31) >> 5,
1);
// Commit the write.
command_processor_->PushUAVBarrier(shared_memory_->GetBuffer());
// Invalidate textures.
shared_memory_->RangeWrittenByGPU(texture_base, texture_size);
return true;
}
bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
@ -766,11 +961,11 @@ bool TextureCache::LoadTextureData(Texture* texture) {
// Get the pipeline.
const HostFormat& host_format = host_formats_[uint32_t(texture->key.format)];
if (host_format.copy_mode == CopyMode::kUnknown) {
if (host_format.load_mode == LoadMode::kUnknown) {
return false;
}
ID3D12PipelineState* pipeline =
copy_load_pipelines_[uint32_t(host_format.copy_mode)];
load_pipelines_[uint32_t(host_format.load_mode)];
if (pipeline == nullptr) {
return false;
}
@ -839,7 +1034,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc,
descriptor_cpu_uav);
command_processor_->SetComputePipeline(pipeline);
command_list->SetComputeRootSignature(copy_root_signature_);
command_list->SetComputeRootSignature(load_root_signature_);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
// Submit commands.
@ -849,13 +1044,13 @@ bool TextureCache::LoadTextureData(Texture* texture) {
uint32_t mip_first = base_in_sync ? 1 : 0;
uint32_t mip_last = mips_in_sync ? 0 : resource_desc.MipLevels - 1;
auto cbuffer_pool = command_processor_->GetConstantBufferPool();
CopyConstants copy_constants;
copy_constants.is_3d = is_3d ? 1 : 0;
copy_constants.endianness = uint32_t(texture->key.endianness);
LoadConstants load_constants;
load_constants.is_3d = is_3d ? 1 : 0;
load_constants.endianness = uint32_t(texture->key.endianness);
if (!texture->key.packed_mips) {
copy_constants.guest_mip_offset[0] = 0;
copy_constants.guest_mip_offset[1] = 0;
copy_constants.guest_mip_offset[2] = 0;
load_constants.guest_mip_offset[0] = 0;
load_constants.guest_mip_offset[1] = 0;
load_constants.guest_mip_offset[2] = 0;
}
for (uint32_t i = 0; i < slice_count; ++i) {
command_processor_->PushTransitionBarrier(
@ -863,48 +1058,48 @@ bool TextureCache::LoadTextureData(Texture* texture) {
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
for (uint32_t j = mip_first; j <= mip_last; ++j) {
if (j == 0) {
copy_constants.guest_base =
load_constants.guest_base =
(texture->key.base_page << 12) + i * texture->base_slice_size;
} else {
copy_constants.guest_base =
load_constants.guest_base =
(texture->key.mip_page << 12) + i * texture->mip_slice_size;
}
copy_constants.guest_base += texture->mip_offsets[j];
copy_constants.guest_pitch = texture->key.tiled
? CopyConstants::kGuestPitchTiled
load_constants.guest_base += texture->mip_offsets[j];
load_constants.guest_pitch = texture->key.tiled
? LoadConstants::kGuestPitchTiled
: texture->mip_pitches[j];
copy_constants.host_base = uint32_t(host_layouts[j].Offset);
copy_constants.host_pitch = host_layouts[j].Footprint.RowPitch;
copy_constants.size_texels[0] = std::max(width >> j, 1u);
copy_constants.size_texels[1] = std::max(height >> j, 1u);
copy_constants.size_texels[2] = std::max(depth >> j, 1u);
copy_constants.size_blocks[0] =
(copy_constants.size_texels[0] + (block_width - 1)) / block_width;
copy_constants.size_blocks[1] =
(copy_constants.size_texels[1] + (block_height - 1)) / block_height;
copy_constants.size_blocks[2] = copy_constants.size_texels[2];
load_constants.host_base = uint32_t(host_layouts[j].Offset);
load_constants.host_pitch = host_layouts[j].Footprint.RowPitch;
load_constants.size_texels[0] = std::max(width >> j, 1u);
load_constants.size_texels[1] = std::max(height >> j, 1u);
load_constants.size_texels[2] = std::max(depth >> j, 1u);
load_constants.size_blocks[0] =
(load_constants.size_texels[0] + (block_width - 1)) / block_width;
load_constants.size_blocks[1] =
(load_constants.size_texels[1] + (block_height - 1)) / block_height;
load_constants.size_blocks[2] = load_constants.size_texels[2];
if (texture->key.packed_mips) {
texture_util::GetPackedMipOffset(width, height, depth, guest_format, j,
copy_constants.guest_mip_offset[0],
copy_constants.guest_mip_offset[1],
copy_constants.guest_mip_offset[2]);
load_constants.guest_mip_offset[0],
load_constants.guest_mip_offset[1],
load_constants.guest_mip_offset[2]);
}
D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address;
uint8_t* cbuffer_mapping = cbuffer_pool->RequestFull(
xe::align(uint32_t(sizeof(copy_constants)), 256u), nullptr, nullptr,
xe::align(uint32_t(sizeof(load_constants)), 256u), nullptr, nullptr,
&cbuffer_gpu_address);
if (cbuffer_mapping == nullptr) {
command_processor_->ReleaseScratchGPUBuffer(copy_buffer,
copy_buffer_state);
return false;
}
std::memcpy(cbuffer_mapping, &copy_constants, sizeof(copy_constants));
std::memcpy(cbuffer_mapping, &load_constants, sizeof(load_constants));
command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address);
command_processor_->SubmitBarriers();
// Each thread group processes 32x32x1 blocks.
command_list->Dispatch((copy_constants.size_blocks[0] + 31) >> 5,
(copy_constants.size_blocks[1] + 31) >> 5,
copy_constants.size_blocks[2]);
command_list->Dispatch((load_constants.size_blocks[0] + 31) >> 5,
(load_constants.size_blocks[1] + 31) >> 5,
load_constants.size_blocks[2]);
}
command_processor_->PushUAVBarrier(copy_buffer);
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,

View File

@ -79,16 +79,18 @@ class TextureCache {
D3D12_CPU_DESCRIPTOR_HANDLE handle);
static DXGI_FORMAT GetResolveDXGIFormat(TextureFormat format);
// The source buffer must be in the non-pixel-shader SRV state.
bool TileResolvedTexture(TextureFormat format, uint32_t texture_base,
uint32_t texture_pitch, uint32_t texture_height,
uint32_t resolve_width, uint32_t resolve_height,
Endian128 endian, ID3D12Resource* buffer,
uint32_t buffer_size,
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint);
bool RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle);
private:
struct CopyModeInfo {
const void* load_shader;
size_t load_shader_size;
};
enum class CopyMode {
enum class LoadMode {
k8bpb,
k16bpb,
k32bpb,
@ -104,9 +106,30 @@ class TextureCache {
kUnknown = kCount
};
struct LoadModeInfo {
const void* shader;
size_t shader_size;
};
// Tiling modes for storing textures after resolving - needed only for the
// formats that can be resolved to.
enum class TileMode {
k32bpp,
kCount,
kUnknown = kCount
};
struct TileModeInfo {
const void* shader;
size_t shader_size;
};
struct HostFormat {
DXGI_FORMAT dxgi_format;
CopyMode copy_mode;
LoadMode load_mode;
TileMode tile_mode;
};
union TextureKey {
@ -200,7 +223,7 @@ class TextureCache {
bool mips_in_sync;
};
struct CopyConstants {
struct LoadConstants {
// vec4 0.
uint32_t guest_base;
// For linear textures - row byte pitch.
@ -223,6 +246,22 @@ class TextureCache {
static constexpr uint32_t kGuestPitchTiled = UINT32_MAX;
};
struct TileConstants {
// Either from the start of the shared memory or from the start of the typed
// UAV, in bytes.
uint32_t guest_base;
// 0:2 - endianness (up to Xin128).
// 3:31 - actual guest texture width.
uint32_t endian_guest_pitch;
// Size to copy, texels with index bigger than this won't be written.
// Width in the lower 16 bits, height in the upper.
uint32_t size;
// Byte offset to the first texel from the beginning of the source buffer.
uint32_t host_base;
// Row pitch of the source buffer.
uint32_t host_pitch;
};
struct TextureBinding {
TextureKey key;
uint32_t swizzle;
@ -264,9 +303,12 @@ class TextureCache {
RegisterFile* register_file_;
SharedMemory* shared_memory_;
static const CopyModeInfo copy_mode_info_[];
ID3D12RootSignature* copy_root_signature_ = nullptr;
ID3D12PipelineState* copy_load_pipelines_[size_t(CopyMode::kCount)] = {};
static const LoadModeInfo load_mode_info_[];
ID3D12RootSignature* load_root_signature_ = nullptr;
ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
static const TileModeInfo tile_mode_info_[];
ID3D12RootSignature* tile_root_signature_ = nullptr;
ID3D12PipelineState* tile_pipelines_[size_t(TileMode::kCount)] = {};
std::unordered_multimap<uint64_t, Texture*> textures_;