[D3D12] DXT1 untiling
This commit is contained in:
parent
265d6eb9df
commit
cfd3821b83
|
@ -435,6 +435,13 @@ void D3D12CommandProcessor::ReleaseScratchGPUBuffer(
|
|||
}
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::SetPipeline(ID3D12PipelineState* pipeline) {
|
||||
if (current_pipeline_ != pipeline) {
|
||||
GetCurrentCommandList()->SetPipelineState(pipeline);
|
||||
current_pipeline_ = pipeline;
|
||||
}
|
||||
}
|
||||
|
||||
bool D3D12CommandProcessor::SetupContext() {
|
||||
if (!CommandProcessor::SetupContext()) {
|
||||
XELOGE("Failed to initialize base command processor context");
|
||||
|
@ -475,6 +482,10 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
|
||||
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
|
||||
shared_memory_.get());
|
||||
if (!texture_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize texture cache");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -653,7 +664,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
}
|
||||
|
||||
bool new_frame = BeginFrame();
|
||||
ID3D12GraphicsCommandList* command_list = GetCurrentCommandList();
|
||||
auto command_list = GetCurrentCommandList();
|
||||
|
||||
// Set the primitive topology.
|
||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||
|
@ -698,10 +709,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
UpdateFixedFunctionState(command_list);
|
||||
|
||||
// Bind the pipeline.
|
||||
if (current_pipeline_ != pipeline) {
|
||||
current_pipeline_ = pipeline;
|
||||
command_list->SetPipelineState(pipeline);
|
||||
}
|
||||
SetPipeline(pipeline);
|
||||
|
||||
// Update system constants before uploading them.
|
||||
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
|
||||
|
|
|
@ -50,6 +50,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
|
||||
const D3D12Shader* pixel_shader);
|
||||
|
||||
ui::d3d12::UploadBufferPool* GetConstantBufferPool() const {
|
||||
return constant_buffer_pool_.get();
|
||||
}
|
||||
// Request and automatically rebind descriptors on the draw command list.
|
||||
// Refer to DescriptorHeapPool::Request for partial/full update explanation.
|
||||
uint64_t RequestViewDescriptors(uint64_t previous_full_update,
|
||||
|
@ -73,6 +76,10 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
void ReleaseScratchGPUBuffer(ID3D12Resource* buffer,
|
||||
D3D12_RESOURCE_STATES new_state);
|
||||
|
||||
// Sets the current pipeline state - may be called internally or externally.
|
||||
// This is for cache invalidation primarily. A frame must be open.
|
||||
void SetPipeline(ID3D12PipelineState* pipeline);
|
||||
|
||||
protected:
|
||||
bool SetupContext() override;
|
||||
void ShutdownContext() override;
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_
|
||||
#define XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_
|
||||
|
||||
#define XE_BYTE_SWAP_OVERLOAD(XeByteSwapType) \
|
||||
XeByteSwapType XeByteSwap(XeByteSwapType v, uint endian) { \
|
||||
[flatten] if (((endian ^ (endian >> 1u)) & 1u) != 0u) { \
|
||||
v = ((v & 0x00FF00FFu) << 8u) | ((v & 0xFF00FF00u) >> 8u); \
|
||||
} \
|
||||
[flatten] if ((endian & 2u) != 0u) { \
|
||||
v = (v << 16u) | (v >> 16u); \
|
||||
} \
|
||||
return v; \
|
||||
}
|
||||
XE_BYTE_SWAP_OVERLOAD(uint)
|
||||
XE_BYTE_SWAP_OVERLOAD(uint2)
|
||||
XE_BYTE_SWAP_OVERLOAD(uint3)
|
||||
XE_BYTE_SWAP_OVERLOAD(uint4)
|
||||
|
||||
#endif // XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_
|
|
@ -0,0 +1,52 @@
|
|||
#ifndef XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_
|
||||
#define XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_
|
||||
|
||||
// XeTiledOffset function take x/y in blocks and returns byte offsets for 4
|
||||
// consecutive blocks along X.
|
||||
|
||||
// https://github.com/gildor2/UModel/blob/de8fbd3bc922427ea056b7340202dcdcc19ccff5/Unreal/UnTexture.cpp#L495
|
||||
uint4 XeTextureTiledOffset2D(uint2 p, uint width, uint log2_bpb) {
|
||||
uint4 x4 = uint4(0u, 1u, 2u, 3u) + p.xxxx;
|
||||
// Top bits of coordinates.
|
||||
uint4 macro =
|
||||
((x4 >> 5u) + (p.y >> 5u) * ((width + 31u) >> 5u)) << (log2_bpb + 7u);
|
||||
// Lower bits of coordinates (result is 6-bit value).
|
||||
uint4 micro = ((x4 & 7u) + ((p.y & 0xEu) << 2u)) << log2_bpb;
|
||||
// Mix micro/macro + add few remaining x/y bits.
|
||||
uint4 offset =
|
||||
macro + ((micro & ~0xFu) << 1u) + (micro & 0xFu) + ((p.y & 1u) << 4u);
|
||||
// Mix bits again.
|
||||
return ((offset & ~0x1FFu) << 3u) + // Upper bits (offset bits [*-9]).
|
||||
((p.y & 16u) << 7u) + // Next 1 bit.
|
||||
((offset & 0x1C0u) << 2u) + // Next 3 bits (offset bits [8-6]).
|
||||
((((x4 >> 3u) + ((p.y & 8u) >> 2u)) & 3u) << 6u) + // Next 2 bits.
|
||||
(offset & 0x3Fu); // Lower 6 bits (offset bits [5-0]).
|
||||
}
|
||||
|
||||
// Reverse-engineered from an executable.
|
||||
// The base/micro/macro names were chosen pretty much at random and don't have
|
||||
// the same meaning as in TiledOffset2D.
|
||||
uint4 XeTextureTiledOffset3D(uint3 p, uint2 width_height, uint log2_bpb) {
|
||||
uint4 x4 = uint4(0u, 1u, 2u, 3u) + p.xxxx;
|
||||
uint2 aligned_size = (width_height + 31u) & ~31u;
|
||||
uint base = ((p.z >> 2u) * ((aligned_size.x * aligned_size.y) >> 4u) +
|
||||
(p.y >> 4u)) * (aligned_size.x >> 5u);
|
||||
uint4 micro = (((p.z >> 2u) + (p.y >> 3u)) & 1u).xxxx;
|
||||
micro += (((micro << 1u) + (x4 >> 3u)) & 3u) << 1u;
|
||||
uint4 macro = (((x4 & 7u) + ((p.y & 6u) << 2u)) << (log2_bpb + 6u)) >> 6u;
|
||||
macro = (((((((x4 >> 5u) + base) << (log2_bpb + 6u)) & 0xFFFFFFFu) << 1u) +
|
||||
(macro & ~15u)) << 1u) + (macro & 15u) +
|
||||
((p.z & 3u) << (log2_bpb + 6u)) + ((p.y & 1u) << 4u);
|
||||
return ((((((((macro >> 6u) & 7u) + ((micro & 1u) << 3u)) << 3u) +
|
||||
(micro & ~1u)) << 2u) + (macro & ~511u)) << 3u) + (macro & 63u);
|
||||
}
|
||||
|
||||
uint XeTextureGuestLinearOffset(uint3 p, uint height, uint pitch, uint bpb) {
|
||||
return p.x * bpb + ((p.z * ((height + 31u) & ~31u) + pitch) * p.y);
|
||||
}
|
||||
|
||||
uint XeTextureHostLinearOffset(uint3 p, uint height, uint pitch, uint bpb) {
|
||||
return p.x * bpb + ((p.z * height + pitch) * p.y);
|
||||
}
|
||||
|
||||
#endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_
|
|
@ -0,0 +1,28 @@
|
|||
#ifndef XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_
|
||||
#define XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_
|
||||
|
||||
#include "byte_swap.hlsli"
|
||||
#include "texture_address.hlsli"
|
||||
|
||||
cbuffer xe_texture_copy_constants : register(b0) {
|
||||
uint xe_texture_copy_guest_base;
|
||||
// For linear textures - row byte pitch.
|
||||
uint xe_texture_copy_guest_pitch;
|
||||
uint xe_texture_copy_host_base;
|
||||
uint xe_texture_copy_host_pitch;
|
||||
|
||||
// Size in blocks.
|
||||
uint3 xe_texture_copy_size;
|
||||
bool xe_texture_copy_is_3d;
|
||||
|
||||
// Offset within the packed mip for small mips.
|
||||
uint3 xe_texture_copy_guest_mip_offset;
|
||||
uint xe_texture_copy_endianness;
|
||||
};
|
||||
|
||||
#define XeTextureCopyGuestPitchTiled 0xFFFFFFFFu
|
||||
|
||||
ByteAddressBuffer xe_texture_copy_source : register(t0);
|
||||
RWByteAddressBuffer xe_texture_copy_dest : register(u0);
|
||||
|
||||
#endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_
|
|
@ -0,0 +1,38 @@
|
|||
#include "texture_copy.hlsli"
|
||||
|
||||
[numthreads(8, 32, 1)]
|
||||
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||
// 1 thread = 4 uint2 blocks.
|
||||
uint3 block_index = xe_thread_id;
|
||||
block_index.x <<= 2u;
|
||||
[branch] if (any(block_index >= xe_texture_copy_size)) {
|
||||
return;
|
||||
}
|
||||
uint3 block_index_guest = block_index + xe_texture_copy_guest_mip_offset;
|
||||
uint4 block_offsets_guest;
|
||||
[branch] if (xe_texture_copy_guest_pitch == XeTextureCopyGuestPitchTiled) {
|
||||
[branch] if (xe_texture_copy_is_3d) {
|
||||
block_offsets_guest = XeTextureTiledOffset3D(
|
||||
block_index_guest, xe_texture_copy_size.xy, 3u);
|
||||
} else {
|
||||
block_offsets_guest = XeTextureTiledOffset2D(
|
||||
block_index_guest.xy, xe_texture_copy_size.x, 3u);
|
||||
}
|
||||
} else {
|
||||
block_offsets_guest = uint4(0u, 8u, 16u, 24u) + XeTextureGuestLinearOffset(
|
||||
block_index_guest, xe_texture_copy_size.y, xe_texture_copy_guest_pitch,
|
||||
8u);
|
||||
}
|
||||
block_offsets_guest += xe_texture_copy_guest_base;
|
||||
uint4 blocks_01 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.x),
|
||||
xe_texture_copy_source.Load2(block_offsets_guest.y));
|
||||
uint4 blocks_23 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.z),
|
||||
xe_texture_copy_source.Load2(block_offsets_guest.w));
|
||||
blocks_01 = XeByteSwap(blocks_01, xe_texture_copy_endianness);
|
||||
blocks_23 = XeByteSwap(blocks_23, xe_texture_copy_endianness);
|
||||
uint block_offset_host = XeTextureHostLinearOffset(
|
||||
block_index, xe_texture_copy_size.y, xe_texture_copy_host_pitch, 8u) +
|
||||
xe_texture_copy_host_base;
|
||||
xe_texture_copy_dest.Store4(block_offset_host, blocks_01);
|
||||
xe_texture_copy_dest.Store4(block_offset_host + 16u, blocks_23);
|
||||
}
|
|
@ -14,81 +14,90 @@
|
|||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/texture_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
||||
TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_1_REVERSE
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_1
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_1_5_5_5
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_5_6_5
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_6_5_5
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8_A
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8_B
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_Cr_Y1_Cb_Y0
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_Y1_Cr_Y0_Cb
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_Shadow
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_A
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_4_4_4_4
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_10_11_11
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_11_11_10
|
||||
{DXGI_FORMAT_BC1_UNORM}, // k_DXT1
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT2_3
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT4_5
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXV
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_24_8
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_24_8_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_EXPAND
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_EXPAND
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16_EXPAND
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32_32_32
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32_32_32_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_AS_8
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_8
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_MPEG
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_MPEG
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_8_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_MPEG_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_MPEG_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXN
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_AS_16_16_16_16
|
||||
{DXGI_FORMAT_BC1_UNORM}, // k_DXT1_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT2_3_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT4_5_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_10_11_11_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_11_11_10_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32_32_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT3A
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT5A
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_CTX1
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT3A_AS_1_1_1_1
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_GAMMA
|
||||
{DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10_FLOAT
|
||||
// Generated with `xb buildhlsl`.
|
||||
#include "xenia/gpu/d3d12/shaders/bin/texture_load_64bpb_cs.h"
|
||||
|
||||
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_REVERSE
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_5_5_5
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_5_6_5
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_6_5_5
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_A
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_B
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Cr_Y1_Cb_Y0
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Y1_Cr_Y0_Cb
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Shadow
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_A
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_4_4_4_4
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10
|
||||
{DXGI_FORMAT_BC1_UNORM, CopyMode::k64Bpb}, // k_DXT1
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT2_3
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT4_5
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXV
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_EXPAND
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_EXPAND
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16_EXPAND
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG_INTERLACED
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXN
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_AS_16_16_16_16
|
||||
{DXGI_FORMAT_BC1_UNORM, CopyMode::k64Bpb}, // k_DXT1_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT2_3_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT4_5_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10_AS_16_16_16_16
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_FLOAT
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT5A
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_CTX1
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A_AS_1_1_1_1
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_GAMMA
|
||||
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_FLOAT
|
||||
};
|
||||
|
||||
const char* TextureCache::dimension_names_[4] = {"1D", "2D", "3D", "cube"};
|
||||
const char* const TextureCache::dimension_names_[4] = {"1D", "2D", "3D",
|
||||
"cube"};
|
||||
|
||||
const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = {
|
||||
{texture_load_64bpb_cs, sizeof(texture_load_64bpb_cs)}};
|
||||
|
||||
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file,
|
||||
|
@ -100,11 +109,105 @@ TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
|||
TextureCache::~TextureCache() { Shutdown(); }
|
||||
|
||||
bool TextureCache::Initialize() {
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
|
||||
// Create the copying root signature.
|
||||
D3D12_ROOT_PARAMETER root_parameters[2];
|
||||
// Parameter 0 is constants (changed very often when untiling).
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
||||
root_parameters[0].Descriptor.ShaderRegister = 0;
|
||||
root_parameters[0].Descriptor.RegisterSpace = 0;
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
// Parameter 1 is source and target.
|
||||
D3D12_DESCRIPTOR_RANGE root_copy_ranges[2];
|
||||
root_copy_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
root_copy_ranges[0].NumDescriptors = 1;
|
||||
root_copy_ranges[0].BaseShaderRegister = 0;
|
||||
root_copy_ranges[0].RegisterSpace = 0;
|
||||
root_copy_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
||||
root_copy_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
root_copy_ranges[1].NumDescriptors = 1;
|
||||
root_copy_ranges[1].BaseShaderRegister = 0;
|
||||
root_copy_ranges[1].RegisterSpace = 0;
|
||||
root_copy_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
|
||||
root_parameters[1].DescriptorTable.pDescriptorRanges = root_copy_ranges;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
root_signature_desc.NumParameters = UINT(xe::countof(root_parameters));
|
||||
root_signature_desc.pParameters = root_parameters;
|
||||
root_signature_desc.NumStaticSamplers = 0;
|
||||
root_signature_desc.pStaticSamplers = nullptr;
|
||||
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
||||
ID3DBlob* root_signature_blob;
|
||||
ID3DBlob* root_signature_error_blob = nullptr;
|
||||
if (FAILED(D3D12SerializeRootSignature(
|
||||
&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1,
|
||||
&root_signature_blob, &root_signature_error_blob))) {
|
||||
XELOGE("Failed to serialize the texture copying root signature");
|
||||
if (root_signature_error_blob != nullptr) {
|
||||
XELOGE("%s", reinterpret_cast<const char*>(
|
||||
root_signature_error_blob->GetBufferPointer()));
|
||||
root_signature_error_blob->Release();
|
||||
}
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
if (root_signature_error_blob != nullptr) {
|
||||
root_signature_error_blob->Release();
|
||||
}
|
||||
if (FAILED(device->CreateRootSignature(
|
||||
0, root_signature_blob->GetBufferPointer(),
|
||||
root_signature_blob->GetBufferSize(),
|
||||
IID_PPV_ARGS(©_root_signature_)))) {
|
||||
XELOGE("Failed to create the texture copying root signature");
|
||||
root_signature_blob->Release();
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
root_signature_blob->Release();
|
||||
|
||||
// Create the copying pipelines.
|
||||
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
|
||||
pipeline_desc.pRootSignature = copy_root_signature_;
|
||||
pipeline_desc.NodeMask = 0;
|
||||
pipeline_desc.CachedPSO.pCachedBlob = nullptr;
|
||||
pipeline_desc.CachedPSO.CachedBlobSizeInBytes = 0;
|
||||
pipeline_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
|
||||
for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) {
|
||||
const CopyModeInfo& mode_info = copy_mode_info_[i];
|
||||
if (mode_info.load_shader != nullptr) {
|
||||
pipeline_desc.CS.pShaderBytecode = mode_info.load_shader;
|
||||
pipeline_desc.CS.BytecodeLength = mode_info.load_shader_size;
|
||||
if (FAILED(device->CreateComputePipelineState(
|
||||
&pipeline_desc, IID_PPV_ARGS(©_load_pipelines_[i])))) {
|
||||
XELOGE("Failed to create the texture copying pipeline for mode %u", i);
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ClearBindings();
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureCache::Shutdown() { ClearCache(); }
|
||||
void TextureCache::Shutdown() {
|
||||
ClearCache();
|
||||
|
||||
for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) {
|
||||
if (copy_load_pipelines_[i] != nullptr) {
|
||||
copy_load_pipelines_[i]->Release();
|
||||
copy_load_pipelines_[i] = nullptr;
|
||||
}
|
||||
}
|
||||
if (copy_root_signature_ != nullptr) {
|
||||
copy_root_signature_->Release();
|
||||
copy_root_signature_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::TextureFetchConstantWritten(uint32_t index) {
|
||||
texture_keys_in_sync_ &= ~(1u << index);
|
||||
|
@ -120,7 +223,6 @@ void TextureCache::BeginFrame() {
|
|||
void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
||||
uint32_t used_pixel_texture_mask) {
|
||||
auto command_list = command_processor_->GetCurrentCommandList();
|
||||
assert_not_null(command_list);
|
||||
if (command_list == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
@ -154,7 +256,7 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
|||
continue;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Untile the texture.
|
||||
LoadTextureData(binding.texture);
|
||||
}
|
||||
|
||||
// Transition the textures to the needed usage.
|
||||
|
@ -369,7 +471,7 @@ void TextureCache::TextureKeyFromFetchConstant(
|
|||
key_out.mip_max_level = mip_max_level;
|
||||
key_out.tiled = fetch.tiled;
|
||||
key_out.packed_mips = fetch.packed_mips;
|
||||
key_out.format = TextureFormat(fetch.format);
|
||||
key_out.format = GetBaseFormat(TextureFormat(fetch.format));
|
||||
key_out.endianness = Endian(fetch.endianness);
|
||||
// Get rid of 6 and 7 values (to prevent device losses if the game has
|
||||
// something broken) the quick and dirty way - by changing them to 4 and 5.
|
||||
|
@ -380,26 +482,26 @@ void TextureCache::TextureKeyFromFetchConstant(
|
|||
void TextureCache::LogTextureKeyAction(TextureKey key, const char* action) {
|
||||
XELOGGPU(
|
||||
"%s %s %ux%ux%u %s %s texture with %u %spacked mip level%s, "
|
||||
"base at 0x%.8X, mips at 0x%.8X", action, key.tiled ? "tiled" : "linear",
|
||||
key.width, key.height, key.depth,
|
||||
"base at 0x%.8X, mips at 0x%.8X",
|
||||
action, key.tiled ? "tiled" : "linear", key.width, key.height, key.depth,
|
||||
dimension_names_[uint32_t(key.dimension)],
|
||||
FormatInfo::Get(key.format)->name, key.mip_max_level + 1,
|
||||
key.packed_mips ? "" : "un", key.mip_max_level != 0 ? "s" : "",
|
||||
key.base_page << 12, key.mip_page << 12);
|
||||
}
|
||||
|
||||
void TextureCache::LogTextureAction(const Texture& texture,
|
||||
void TextureCache::LogTextureAction(const Texture* texture,
|
||||
const char* action) {
|
||||
XELOGGPU(
|
||||
"%s %s %ux%ux%u %s %s texture with %u %spacked mip level%s, "
|
||||
"base at 0x%.8X (size %u), mips at 0x%.8X (size %u)", action,
|
||||
texture.key.tiled ? "tiled" : "linear", texture.key.width,
|
||||
texture.key.height, texture.key.depth,
|
||||
dimension_names_[uint32_t(texture.key.dimension)],
|
||||
FormatInfo::Get(texture.key.format)->name,
|
||||
texture.key.mip_max_level + 1, texture.key.packed_mips ? "" : "un",
|
||||
texture.key.mip_max_level != 0 ? "s" : "", texture.key.base_page << 12,
|
||||
texture.base_size, texture.key.mip_page << 12, texture.mip_size);
|
||||
"base at 0x%.8X (size %u), mips at 0x%.8X (size %u)",
|
||||
action, texture->key.tiled ? "tiled" : "linear", texture->key.width,
|
||||
texture->key.height, texture->key.depth,
|
||||
dimension_names_[uint32_t(texture->key.dimension)],
|
||||
FormatInfo::Get(texture->key.format)->name,
|
||||
texture->key.mip_max_level + 1, texture->key.packed_mips ? "" : "un",
|
||||
texture->key.mip_max_level != 0 ? "s" : "", texture->key.base_page << 12,
|
||||
texture->base_size, texture->key.mip_page << 12, texture->mip_size);
|
||||
}
|
||||
|
||||
TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
||||
|
@ -457,19 +559,23 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
|||
texture->key = key;
|
||||
texture->resource = resource;
|
||||
texture->state = state;
|
||||
texture->mip_offsets[0] = 0;
|
||||
uint32_t width_blocks, height_blocks, depth_blocks;
|
||||
if (key.base_page != 0) {
|
||||
texture_util::GetGuestMipBlocks(key.dimension, key.width, key.height,
|
||||
key.depth, key.format, 0, width_blocks,
|
||||
height_blocks, depth_blocks);
|
||||
texture->base_size = texture_util::GetGuestMipStorageSize(
|
||||
texture->base_slice_size = texture_util::GetGuestMipStorageSize(
|
||||
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
||||
nullptr);
|
||||
texture->mip_pitches[0]);
|
||||
texture->base_in_sync = false;
|
||||
} else {
|
||||
texture->base_slice_size = 0;
|
||||
texture->mip_pitches[0] = 0;
|
||||
// Never try to upload the base level if there is none.
|
||||
texture->base_in_sync = true;
|
||||
}
|
||||
texture->mip_slice_size = 0;
|
||||
if (key.mip_page != 0) {
|
||||
uint32_t mip_max_storage_level = key.mip_max_level;
|
||||
if (key.packed_mips) {
|
||||
|
@ -477,26 +583,241 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
|||
std::min(mip_max_storage_level,
|
||||
texture_util::GetPackedMipLevel(key.width, key.height));
|
||||
}
|
||||
texture->mip_size = 0;
|
||||
for (uint32_t i = 1; i <= mip_max_storage_level; ++i) {
|
||||
texture_util::GetGuestMipBlocks(key.dimension, key.width, key.height,
|
||||
key.depth, key.format, i, width_blocks,
|
||||
height_blocks, depth_blocks);
|
||||
texture->mip_size += texture_util::GetGuestMipStorageSize(
|
||||
texture->mip_offsets[i] = texture->mip_slice_size;
|
||||
texture->mip_slice_size += texture_util::GetGuestMipStorageSize(
|
||||
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
||||
nullptr);
|
||||
texture->mip_pitches[i]);
|
||||
}
|
||||
// The rest are either packed levels or don't exist at all.
|
||||
for (uint32_t i = mip_max_storage_level + 1;
|
||||
i < xe::countof(texture->mip_offsets); ++i) {
|
||||
texture->mip_offsets[i] = texture->mip_offsets[mip_max_storage_level];
|
||||
texture->mip_pitches[i] = texture->mip_pitches[mip_max_storage_level];
|
||||
}
|
||||
texture->mips_in_sync = false;
|
||||
} else {
|
||||
std::memset(&texture->mip_offsets[1], 0,
|
||||
(xe::countof(texture->mip_offsets) - 1) * sizeof(uint32_t));
|
||||
std::memset(&texture->mip_pitches[1], 0,
|
||||
(xe::countof(texture->mip_pitches) - 1) * sizeof(uint32_t));
|
||||
// Never try to upload the mipmaps if there are none.
|
||||
texture->mips_in_sync = true;
|
||||
}
|
||||
texture->base_size = texture->base_slice_size;
|
||||
texture->mip_size = texture->mip_slice_size;
|
||||
if (key.dimension != Dimension::k3D) {
|
||||
texture->base_size *= key.depth;
|
||||
texture->mip_size *= key.depth;
|
||||
}
|
||||
textures_.insert(std::make_pair(map_key, texture));
|
||||
LogTextureAction(*texture, "Created");
|
||||
LogTextureAction(texture, "Created");
|
||||
|
||||
return texture;
|
||||
}
|
||||
|
||||
bool TextureCache::LoadTextureData(Texture* texture) {
|
||||
if (texture->base_in_sync && texture->mips_in_sync) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto command_list = command_processor_->GetCurrentCommandList();
|
||||
if (command_list == nullptr) {
|
||||
return false;
|
||||
}
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
|
||||
// Get the pipeline.
|
||||
const HostFormat& host_format = host_formats_[uint32_t(texture->key.format)];
|
||||
if (host_format.copy_mode == CopyMode::kUnknown) {
|
||||
return false;
|
||||
}
|
||||
ID3D12PipelineState* pipeline =
|
||||
copy_load_pipelines_[uint32_t(host_format.copy_mode)];
|
||||
if (pipeline == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Request uploading of the texture data to the shared memory.
|
||||
if (!texture->base_in_sync) {
|
||||
if (!shared_memory_->UseRange(texture->key.base_page << 12,
|
||||
texture->base_size)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!texture->mips_in_sync) {
|
||||
if (!shared_memory_->UseRange(texture->key.mip_page << 12,
|
||||
texture->mip_size)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the guest layout.
|
||||
bool is_3d = texture->key.dimension == Dimension::k3D;
|
||||
uint32_t width = texture->key.width;
|
||||
uint32_t height = texture->key.height;
|
||||
uint32_t depth = is_3d ? texture->key.depth : 1;
|
||||
uint32_t slice_count = is_3d ? 1 : texture->key.depth;
|
||||
TextureFormat guest_format = texture->key.format;
|
||||
const FormatInfo* guest_format_info = FormatInfo::Get(guest_format);
|
||||
uint32_t block_width = guest_format_info->block_width;
|
||||
uint32_t block_height = guest_format_info->block_height;
|
||||
|
||||
// Get the host layout and the buffer.
|
||||
D3D12_RESOURCE_DESC resource_desc = texture->resource->GetDesc();
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_layouts[D3D12_REQ_MIP_LEVELS];
|
||||
UINT64 host_slice_size;
|
||||
device->GetCopyableFootprints(&resource_desc, 0, resource_desc.MipLevels, 0,
|
||||
host_layouts, nullptr, nullptr,
|
||||
&host_slice_size);
|
||||
D3D12_RESOURCE_STATES copy_buffer_state =
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
|
||||
uint32_t(host_slice_size), copy_buffer_state);
|
||||
if (copy_buffer == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Begin loading.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||
if (command_processor_->RequestViewDescriptors(0, 2, 2, descriptor_cpu_start,
|
||||
descriptor_gpu_start) == 0) {
|
||||
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
||||
return false;
|
||||
}
|
||||
shared_memory_->UseForReading(command_list);
|
||||
shared_memory_->CreateSRV(descriptor_cpu_start);
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
uav_desc.Buffer.FirstElement = 0;
|
||||
uav_desc.Buffer.NumElements = UINT(host_slice_size >> 2);
|
||||
uav_desc.Buffer.StructureByteStride = 0;
|
||||
uav_desc.Buffer.CounterOffsetInBytes = 0;
|
||||
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_uav;
|
||||
descriptor_cpu_uav.ptr =
|
||||
descriptor_cpu_start.ptr + provider->GetDescriptorSizeView();
|
||||
device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc,
|
||||
descriptor_cpu_uav);
|
||||
command_processor_->SetPipeline(pipeline);
|
||||
command_list->SetComputeRootSignature(copy_root_signature_);
|
||||
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||
|
||||
// Submit commands.
|
||||
D3D12_RESOURCE_BARRIER barriers[2];
|
||||
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
if (texture->state != D3D12_RESOURCE_STATE_COPY_DEST) {
|
||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
barriers[0].Transition.pResource = texture->resource;
|
||||
barriers[0].Transition.Subresource =
|
||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
barriers[0].Transition.StateBefore = texture->state;
|
||||
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
command_list->ResourceBarrier(1, barriers);
|
||||
texture->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
}
|
||||
uint32_t mip_first = texture->base_in_sync ? 1 : 0;
|
||||
uint32_t mip_last = texture->mips_in_sync ? 0 : resource_desc.MipLevels - 1;
|
||||
auto cbuffer_pool = command_processor_->GetConstantBufferPool();
|
||||
CopyConstants copy_constants;
|
||||
copy_constants.is_3d = is_3d ? 1 : 0;
|
||||
copy_constants.endianness = uint32_t(texture->key.endianness);
|
||||
if (!texture->key.packed_mips) {
|
||||
copy_constants.guest_mip_offset[0] = 0;
|
||||
copy_constants.guest_mip_offset[1] = 0;
|
||||
copy_constants.guest_mip_offset[2] = 0;
|
||||
}
|
||||
for (uint32_t i = 0; i < slice_count; ++i) {
|
||||
if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
barriers[0].Transition.pResource = copy_buffer;
|
||||
barriers[0].Transition.Subresource = 0;
|
||||
barriers[0].Transition.StateBefore = copy_buffer_state;
|
||||
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
command_list->ResourceBarrier(1, barriers);
|
||||
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
}
|
||||
for (uint32_t j = mip_first; j <= mip_last; ++j) {
|
||||
if (j == 0) {
|
||||
copy_constants.guest_base =
|
||||
(texture->key.base_page << 12) + i * texture->base_slice_size;
|
||||
} else {
|
||||
copy_constants.guest_base =
|
||||
(texture->key.mip_page << 12) + i * texture->mip_slice_size;
|
||||
}
|
||||
copy_constants.guest_base += texture->mip_offsets[j];
|
||||
copy_constants.guest_pitch = texture->key.tiled
|
||||
? CopyConstants::kGuestPitchTiled
|
||||
: texture->mip_pitches[j];
|
||||
copy_constants.host_base = uint32_t(host_layouts[j].Offset);
|
||||
copy_constants.host_pitch = host_layouts[j].Footprint.RowPitch;
|
||||
copy_constants.size[0] =
|
||||
(std::max(width >> j, 1u) + (block_width - 1)) / block_width;
|
||||
copy_constants.size[1] =
|
||||
(std::max(height >> j, 1u) + (block_height - 1)) / block_height;
|
||||
copy_constants.size[2] = std::max(depth >> j, 1u);
|
||||
if (texture->key.packed_mips) {
|
||||
texture_util::GetPackedMipOffset(width, height, depth, guest_format, j,
|
||||
copy_constants.guest_mip_offset[0],
|
||||
copy_constants.guest_mip_offset[1],
|
||||
copy_constants.guest_mip_offset[2]);
|
||||
}
|
||||
D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address;
|
||||
uint8_t* cbuffer_mapping = cbuffer_pool->RequestFull(
|
||||
xe::align(uint32_t(sizeof(copy_constants)), 256u), nullptr, nullptr,
|
||||
&cbuffer_gpu_address);
|
||||
if (cbuffer_mapping == nullptr) {
|
||||
command_processor_->ReleaseScratchGPUBuffer(copy_buffer,
|
||||
copy_buffer_state);
|
||||
return false;
|
||||
}
|
||||
std::memcpy(cbuffer_mapping, ©_constants, sizeof(copy_constants));
|
||||
command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address);
|
||||
// Each thread group processes 32x32x1 blocks.
|
||||
command_list->Dispatch((copy_constants.size[0] + 31) >> 5,
|
||||
(copy_constants.size[1] + 31) >> 5,
|
||||
copy_constants.size[2]);
|
||||
}
|
||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
||||
barriers[0].UAV.pResource = copy_buffer;
|
||||
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
barriers[1].Transition.pResource = copy_buffer;
|
||||
barriers[1].Transition.Subresource = 0;
|
||||
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
command_list->ResourceBarrier(2, barriers);
|
||||
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
UINT slice_first_subresource = i * resource_desc.MipLevels;
|
||||
for (uint32_t j = mip_first; j <= mip_last; ++j) {
|
||||
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
||||
location_source.pResource = copy_buffer;
|
||||
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
||||
location_source.PlacedFootprint = host_layouts[j];
|
||||
location_dest.pResource = texture->resource;
|
||||
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
||||
location_dest.SubresourceIndex = slice_first_subresource + j;
|
||||
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
|
||||
nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
||||
|
||||
// TODO(Triang3l): Uncomment when done testing untiling shaders.
|
||||
/* texture->base_in_sync = true;
|
||||
texture->mips_in_sync = true; */
|
||||
|
||||
LogTextureAction(texture, "Loaded");
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureCache::ClearBindings() {
|
||||
std::memset(texture_bindings_, 0, sizeof(texture_bindings_));
|
||||
texture_keys_in_sync_ = 0;
|
||||
|
|
|
@ -80,8 +80,22 @@ class TextureCache {
|
|||
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
|
||||
private:
|
||||
struct CopyModeInfo {
|
||||
const void* load_shader;
|
||||
size_t load_shader_size;
|
||||
};
|
||||
|
||||
enum class CopyMode {
|
||||
k64Bpb,
|
||||
|
||||
kCount,
|
||||
|
||||
kUnknown = kCount
|
||||
};
|
||||
|
||||
struct HostFormat {
|
||||
DXGI_FORMAT dxgi_format;
|
||||
CopyMode copy_mode;
|
||||
};
|
||||
|
||||
union TextureKey {
|
||||
|
@ -150,16 +164,45 @@ class TextureCache {
|
|||
TextureKey key;
|
||||
ID3D12Resource* resource;
|
||||
D3D12_RESOURCE_STATES state;
|
||||
// Byte size of one array slice of the top guest mip level.
|
||||
uint32_t base_slice_size;
|
||||
// Byte size of the top guest mip level.
|
||||
uint32_t base_size;
|
||||
// Byte size of one array slice of mips between 1 and key.mip_max_level.
|
||||
uint32_t mip_slice_size;
|
||||
// Byte size of mips between 1 and key.mip_max_level.
|
||||
uint32_t mip_size;
|
||||
// Byte offsets of each mipmap within one slice.
|
||||
uint32_t mip_offsets[14];
|
||||
// Byte pitches of each mipmap within one slice (for linear layout mainly).
|
||||
uint32_t mip_pitches[14];
|
||||
// Whether the recent base level data has been loaded from the memory.
|
||||
bool base_in_sync;
|
||||
// Whether the recent mip data has been loaded from the memory.
|
||||
bool mips_in_sync;
|
||||
};
|
||||
|
||||
struct CopyConstants {
|
||||
// vec4 0.
|
||||
uint32_t guest_base;
|
||||
// For linear textures - row byte pitch.
|
||||
uint32_t guest_pitch;
|
||||
uint32_t host_base;
|
||||
uint32_t host_pitch;
|
||||
|
||||
// vec4 1.
|
||||
// Size in blocks.
|
||||
uint32_t size[3];
|
||||
uint32_t is_3d;
|
||||
|
||||
// vec4 2.
|
||||
// Offset within the packed mip for small mips.
|
||||
uint32_t guest_mip_offset[3];
|
||||
uint32_t endianness;
|
||||
|
||||
static constexpr uint32_t kGuestPitchTiled = UINT32_MAX;
|
||||
};
|
||||
|
||||
struct TextureBinding {
|
||||
TextureKey key;
|
||||
uint32_t swizzle;
|
||||
|
@ -173,26 +216,34 @@ class TextureCache {
|
|||
uint32_t& swizzle_out);
|
||||
|
||||
static void LogTextureKeyAction(TextureKey key, const char* action);
|
||||
static void LogTextureAction(const Texture& texture, const char* action);
|
||||
static void LogTextureAction(const Texture* texture, const char* action);
|
||||
|
||||
// Returns nullptr if the key is not supported, but also if couldn't create
|
||||
// the texture - if it's nullptr, occasionally a recreation attempt should be
|
||||
// made.
|
||||
Texture* FindOrCreateTexture(TextureKey key);
|
||||
|
||||
// Writes data from the shared memory to the texture. This binds pipelines and
|
||||
// allocates descriptors!
|
||||
bool LoadTextureData(Texture* texture);
|
||||
|
||||
// Makes all bindings invalid. Also requesting textures after calling this
|
||||
// will cause another attempt to create a texture or to untile it if there was
|
||||
// an error.
|
||||
void ClearBindings();
|
||||
|
||||
static HostFormat host_formats_[64];
|
||||
static const HostFormat host_formats_[64];
|
||||
|
||||
static const char* dimension_names_[4];
|
||||
static const char* const dimension_names_[4];
|
||||
|
||||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
SharedMemory* shared_memory_;
|
||||
|
||||
static const CopyModeInfo copy_mode_info_[];
|
||||
ID3D12RootSignature* copy_root_signature_ = nullptr;
|
||||
ID3D12PipelineState* copy_load_pipelines_[size_t(CopyMode::kCount)] = {};
|
||||
|
||||
std::unordered_multimap<uint64_t, Texture*> textures_;
|
||||
|
||||
TextureBinding texture_bindings_[32] = {};
|
||||
|
|
|
@ -56,17 +56,15 @@ void GetGuestMipBlocks(Dimension dimension, uint32_t width, uint32_t height,
|
|||
|
||||
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
||||
uint32_t depth_blocks, bool is_tiled,
|
||||
TextureFormat format, uint32_t* row_pitch_out) {
|
||||
TextureFormat format, uint32_t& row_pitch_out) {
|
||||
const FormatInfo* format_info = FormatInfo::Get(format);
|
||||
uint32_t row_pitch =
|
||||
width_blocks * format_info->block_width * format_info->block_height *
|
||||
format_info->bits_per_pixel / 8;
|
||||
uint32_t row_pitch = width_blocks * format_info->block_width *
|
||||
format_info->block_height * format_info->bits_per_pixel /
|
||||
8;
|
||||
if (!is_tiled) {
|
||||
row_pitch = xe::align(row_pitch, 256u);
|
||||
}
|
||||
if (row_pitch_out != nullptr) {
|
||||
*row_pitch_out = row_pitch;
|
||||
}
|
||||
row_pitch_out = row_pitch;
|
||||
return xe::align(row_pitch * height_blocks * depth_blocks, 4096u);
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ void GetGuestMipBlocks(Dimension dimension, uint32_t width, uint32_t height,
|
|||
// height and depth must be obtained via GetGuestMipExtent.
|
||||
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
||||
uint32_t depth_blocks, bool is_tiled,
|
||||
TextureFormat format, uint32_t* row_pitch_out);
|
||||
TextureFormat format, uint32_t& row_pitch_out);
|
||||
|
||||
// Gets the number of the mipmap level where the packed mips are stored.
|
||||
inline uint32_t GetPackedMipLevel(uint32_t width, uint32_t height) {
|
||||
|
|
Loading…
Reference in New Issue