[D3D12] DXT1 untiling
This commit is contained in:
parent
265d6eb9df
commit
cfd3821b83
|
@ -435,6 +435,13 @@ void D3D12CommandProcessor::ReleaseScratchGPUBuffer(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::SetPipeline(ID3D12PipelineState* pipeline) {
|
||||||
|
if (current_pipeline_ != pipeline) {
|
||||||
|
GetCurrentCommandList()->SetPipelineState(pipeline);
|
||||||
|
current_pipeline_ = pipeline;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::SetupContext() {
|
bool D3D12CommandProcessor::SetupContext() {
|
||||||
if (!CommandProcessor::SetupContext()) {
|
if (!CommandProcessor::SetupContext()) {
|
||||||
XELOGE("Failed to initialize base command processor context");
|
XELOGE("Failed to initialize base command processor context");
|
||||||
|
@ -475,6 +482,10 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
|
|
||||||
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
|
texture_cache_ = std::make_unique<TextureCache>(this, register_file_,
|
||||||
shared_memory_.get());
|
shared_memory_.get());
|
||||||
|
if (!texture_cache_->Initialize()) {
|
||||||
|
XELOGE("Failed to initialize texture cache");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -653,7 +664,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool new_frame = BeginFrame();
|
bool new_frame = BeginFrame();
|
||||||
ID3D12GraphicsCommandList* command_list = GetCurrentCommandList();
|
auto command_list = GetCurrentCommandList();
|
||||||
|
|
||||||
// Set the primitive topology.
|
// Set the primitive topology.
|
||||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||||
|
@ -698,10 +709,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
UpdateFixedFunctionState(command_list);
|
UpdateFixedFunctionState(command_list);
|
||||||
|
|
||||||
// Bind the pipeline.
|
// Bind the pipeline.
|
||||||
if (current_pipeline_ != pipeline) {
|
SetPipeline(pipeline);
|
||||||
current_pipeline_ = pipeline;
|
|
||||||
command_list->SetPipelineState(pipeline);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update system constants before uploading them.
|
// Update system constants before uploading them.
|
||||||
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
|
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
|
||||||
|
|
|
@ -50,6 +50,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
|
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
|
||||||
const D3D12Shader* pixel_shader);
|
const D3D12Shader* pixel_shader);
|
||||||
|
|
||||||
|
ui::d3d12::UploadBufferPool* GetConstantBufferPool() const {
|
||||||
|
return constant_buffer_pool_.get();
|
||||||
|
}
|
||||||
// Request and automatically rebind descriptors on the draw command list.
|
// Request and automatically rebind descriptors on the draw command list.
|
||||||
// Refer to DescriptorHeapPool::Request for partial/full update explanation.
|
// Refer to DescriptorHeapPool::Request for partial/full update explanation.
|
||||||
uint64_t RequestViewDescriptors(uint64_t previous_full_update,
|
uint64_t RequestViewDescriptors(uint64_t previous_full_update,
|
||||||
|
@ -73,6 +76,10 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
void ReleaseScratchGPUBuffer(ID3D12Resource* buffer,
|
void ReleaseScratchGPUBuffer(ID3D12Resource* buffer,
|
||||||
D3D12_RESOURCE_STATES new_state);
|
D3D12_RESOURCE_STATES new_state);
|
||||||
|
|
||||||
|
// Sets the current pipeline state - may be called internally or externally.
|
||||||
|
// This is for cache invalidation primarily. A frame must be open.
|
||||||
|
void SetPipeline(ID3D12PipelineState* pipeline);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool SetupContext() override;
|
bool SetupContext() override;
|
||||||
void ShutdownContext() override;
|
void ShutdownContext() override;
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
#ifndef XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_
|
||||||
|
#define XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_
|
||||||
|
|
||||||
|
#define XE_BYTE_SWAP_OVERLOAD(XeByteSwapType) \
|
||||||
|
XeByteSwapType XeByteSwap(XeByteSwapType v, uint endian) { \
|
||||||
|
[flatten] if (((endian ^ (endian >> 1u)) & 1u) != 0u) { \
|
||||||
|
v = ((v & 0x00FF00FFu) << 8u) | ((v & 0xFF00FF00u) >> 8u); \
|
||||||
|
} \
|
||||||
|
[flatten] if ((endian & 2u) != 0u) { \
|
||||||
|
v = (v << 16u) | (v >> 16u); \
|
||||||
|
} \
|
||||||
|
return v; \
|
||||||
|
}
|
||||||
|
XE_BYTE_SWAP_OVERLOAD(uint)
|
||||||
|
XE_BYTE_SWAP_OVERLOAD(uint2)
|
||||||
|
XE_BYTE_SWAP_OVERLOAD(uint3)
|
||||||
|
XE_BYTE_SWAP_OVERLOAD(uint4)
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_D3D12_SHADERS_BYTE_SWAP_HLSLI_
|
|
@ -0,0 +1,52 @@
|
||||||
|
#ifndef XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_
|
||||||
|
#define XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_
|
||||||
|
|
||||||
|
// XeTiledOffset function take x/y in blocks and returns byte offsets for 4
|
||||||
|
// consecutive blocks along X.
|
||||||
|
|
||||||
|
// https://github.com/gildor2/UModel/blob/de8fbd3bc922427ea056b7340202dcdcc19ccff5/Unreal/UnTexture.cpp#L495
|
||||||
|
uint4 XeTextureTiledOffset2D(uint2 p, uint width, uint log2_bpb) {
|
||||||
|
uint4 x4 = uint4(0u, 1u, 2u, 3u) + p.xxxx;
|
||||||
|
// Top bits of coordinates.
|
||||||
|
uint4 macro =
|
||||||
|
((x4 >> 5u) + (p.y >> 5u) * ((width + 31u) >> 5u)) << (log2_bpb + 7u);
|
||||||
|
// Lower bits of coordinates (result is 6-bit value).
|
||||||
|
uint4 micro = ((x4 & 7u) + ((p.y & 0xEu) << 2u)) << log2_bpb;
|
||||||
|
// Mix micro/macro + add few remaining x/y bits.
|
||||||
|
uint4 offset =
|
||||||
|
macro + ((micro & ~0xFu) << 1u) + (micro & 0xFu) + ((p.y & 1u) << 4u);
|
||||||
|
// Mix bits again.
|
||||||
|
return ((offset & ~0x1FFu) << 3u) + // Upper bits (offset bits [*-9]).
|
||||||
|
((p.y & 16u) << 7u) + // Next 1 bit.
|
||||||
|
((offset & 0x1C0u) << 2u) + // Next 3 bits (offset bits [8-6]).
|
||||||
|
((((x4 >> 3u) + ((p.y & 8u) >> 2u)) & 3u) << 6u) + // Next 2 bits.
|
||||||
|
(offset & 0x3Fu); // Lower 6 bits (offset bits [5-0]).
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reverse-engineered from an executable.
|
||||||
|
// The base/micro/macro names were chosen pretty much at random and don't have
|
||||||
|
// the same meaning as in TiledOffset2D.
|
||||||
|
uint4 XeTextureTiledOffset3D(uint3 p, uint2 width_height, uint log2_bpb) {
|
||||||
|
uint4 x4 = uint4(0u, 1u, 2u, 3u) + p.xxxx;
|
||||||
|
uint2 aligned_size = (width_height + 31u) & ~31u;
|
||||||
|
uint base = ((p.z >> 2u) * ((aligned_size.x * aligned_size.y) >> 4u) +
|
||||||
|
(p.y >> 4u)) * (aligned_size.x >> 5u);
|
||||||
|
uint4 micro = (((p.z >> 2u) + (p.y >> 3u)) & 1u).xxxx;
|
||||||
|
micro += (((micro << 1u) + (x4 >> 3u)) & 3u) << 1u;
|
||||||
|
uint4 macro = (((x4 & 7u) + ((p.y & 6u) << 2u)) << (log2_bpb + 6u)) >> 6u;
|
||||||
|
macro = (((((((x4 >> 5u) + base) << (log2_bpb + 6u)) & 0xFFFFFFFu) << 1u) +
|
||||||
|
(macro & ~15u)) << 1u) + (macro & 15u) +
|
||||||
|
((p.z & 3u) << (log2_bpb + 6u)) + ((p.y & 1u) << 4u);
|
||||||
|
return ((((((((macro >> 6u) & 7u) + ((micro & 1u) << 3u)) << 3u) +
|
||||||
|
(micro & ~1u)) << 2u) + (macro & ~511u)) << 3u) + (macro & 63u);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint XeTextureGuestLinearOffset(uint3 p, uint height, uint pitch, uint bpb) {
|
||||||
|
return p.x * bpb + ((p.z * ((height + 31u) & ~31u) + pitch) * p.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint XeTextureHostLinearOffset(uint3 p, uint height, uint pitch, uint bpb) {
|
||||||
|
return p.x * bpb + ((p.z * height + pitch) * p.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_ADDRESS_HLSLI_
|
|
@ -0,0 +1,28 @@
|
||||||
|
#ifndef XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_
|
||||||
|
#define XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_
|
||||||
|
|
||||||
|
#include "byte_swap.hlsli"
|
||||||
|
#include "texture_address.hlsli"
|
||||||
|
|
||||||
|
cbuffer xe_texture_copy_constants : register(b0) {
|
||||||
|
uint xe_texture_copy_guest_base;
|
||||||
|
// For linear textures - row byte pitch.
|
||||||
|
uint xe_texture_copy_guest_pitch;
|
||||||
|
uint xe_texture_copy_host_base;
|
||||||
|
uint xe_texture_copy_host_pitch;
|
||||||
|
|
||||||
|
// Size in blocks.
|
||||||
|
uint3 xe_texture_copy_size;
|
||||||
|
bool xe_texture_copy_is_3d;
|
||||||
|
|
||||||
|
// Offset within the packed mip for small mips.
|
||||||
|
uint3 xe_texture_copy_guest_mip_offset;
|
||||||
|
uint xe_texture_copy_endianness;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define XeTextureCopyGuestPitchTiled 0xFFFFFFFFu
|
||||||
|
|
||||||
|
ByteAddressBuffer xe_texture_copy_source : register(t0);
|
||||||
|
RWByteAddressBuffer xe_texture_copy_dest : register(u0);
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_D3D12_SHADERS_TEXTURE_COPY_HLSLI_
|
|
@ -0,0 +1,38 @@
|
||||||
|
#include "texture_copy.hlsli"
|
||||||
|
|
||||||
|
[numthreads(8, 32, 1)]
|
||||||
|
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
|
// 1 thread = 4 uint2 blocks.
|
||||||
|
uint3 block_index = xe_thread_id;
|
||||||
|
block_index.x <<= 2u;
|
||||||
|
[branch] if (any(block_index >= xe_texture_copy_size)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint3 block_index_guest = block_index + xe_texture_copy_guest_mip_offset;
|
||||||
|
uint4 block_offsets_guest;
|
||||||
|
[branch] if (xe_texture_copy_guest_pitch == XeTextureCopyGuestPitchTiled) {
|
||||||
|
[branch] if (xe_texture_copy_is_3d) {
|
||||||
|
block_offsets_guest = XeTextureTiledOffset3D(
|
||||||
|
block_index_guest, xe_texture_copy_size.xy, 3u);
|
||||||
|
} else {
|
||||||
|
block_offsets_guest = XeTextureTiledOffset2D(
|
||||||
|
block_index_guest.xy, xe_texture_copy_size.x, 3u);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
block_offsets_guest = uint4(0u, 8u, 16u, 24u) + XeTextureGuestLinearOffset(
|
||||||
|
block_index_guest, xe_texture_copy_size.y, xe_texture_copy_guest_pitch,
|
||||||
|
8u);
|
||||||
|
}
|
||||||
|
block_offsets_guest += xe_texture_copy_guest_base;
|
||||||
|
uint4 blocks_01 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.x),
|
||||||
|
xe_texture_copy_source.Load2(block_offsets_guest.y));
|
||||||
|
uint4 blocks_23 = uint4(xe_texture_copy_source.Load2(block_offsets_guest.z),
|
||||||
|
xe_texture_copy_source.Load2(block_offsets_guest.w));
|
||||||
|
blocks_01 = XeByteSwap(blocks_01, xe_texture_copy_endianness);
|
||||||
|
blocks_23 = XeByteSwap(blocks_23, xe_texture_copy_endianness);
|
||||||
|
uint block_offset_host = XeTextureHostLinearOffset(
|
||||||
|
block_index, xe_texture_copy_size.y, xe_texture_copy_host_pitch, 8u) +
|
||||||
|
xe_texture_copy_host_base;
|
||||||
|
xe_texture_copy_dest.Store4(block_offset_host, blocks_01);
|
||||||
|
xe_texture_copy_dest.Store4(block_offset_host + 16u, blocks_23);
|
||||||
|
}
|
|
@ -14,81 +14,90 @@
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||||
|
#include "xenia/gpu/texture_info.h"
|
||||||
#include "xenia/gpu/texture_util.h"
|
#include "xenia/gpu/texture_util.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
|
||||||
TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
// Generated with `xb buildhlsl`.
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_1_REVERSE
|
#include "xenia/gpu/d3d12/shaders/bin/texture_load_64bpb_cs.h"
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_1
|
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8
|
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_1_5_5_5
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_REVERSE
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_5_6_5
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_6_5_5
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_1_5_5_5
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_5_6_5
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8_A
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_6_5_5
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8_B
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_Cr_Y1_Cb_Y0
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_A
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_Y1_Cr_Y0_Cb
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_B
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_Shadow
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_A
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Cr_Y1_Cb_Y0
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_4_4_4_4
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Y1_Cr_Y0_Cb
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_10_11_11
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_Shadow
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_11_11_10
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_A
|
||||||
{DXGI_FORMAT_BC1_UNORM}, // k_DXT1
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_4_4_4_4
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT2_3
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT4_5
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXV
|
{DXGI_FORMAT_BC1_UNORM, CopyMode::k64Bpb}, // k_DXT1
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_24_8
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT2_3
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_24_8_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT4_5
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXV
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_24_8_FLOAT
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_EXPAND
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_EXPAND
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16_EXPAND
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_EXPAND
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_EXPAND
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_16_16_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16_EXPAND
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_FLOAT
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_FLOAT
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32_32_32
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_16_16_FLOAT
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32_32_32_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_AS_8
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_FLOAT
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_8
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_FLOAT
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_MPEG
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_32_FLOAT
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_MPEG
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8_INTERLACED
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_INTERLACED
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_AS_8_8_INTERLACED
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_INTERLACED
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_INTERLACED
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_MPEG_INTERLACED
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_INTERLACED
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_16_16_MPEG_INTERLACED
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_AS_8_8_INTERLACED
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXN
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_INTERLACED
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_AS_16_16_16_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_MPEG_INTERLACED
|
||||||
{DXGI_FORMAT_BC1_UNORM}, // k_DXT1_AS_16_16_16_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_16_16_MPEG_INTERLACED
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT2_3_AS_16_16_16_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXN
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT4_5_AS_16_16_16_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_AS_16_16_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10_AS_16_16_16_16
|
{DXGI_FORMAT_BC1_UNORM, CopyMode::k64Bpb}, // k_DXT1_AS_16_16_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_10_11_11_AS_16_16_16_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT2_3_AS_16_16_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_11_11_10_AS_16_16_16_16
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT4_5_AS_16_16_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_32_32_32_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_AS_16_16_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT3A
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_10_11_11_AS_16_16_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT5A
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_11_11_10_AS_16_16_16_16
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_CTX1
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_32_32_32_FLOAT
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_DXT3A_AS_1_1_1_1
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_8_8_8_8_GAMMA
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT5A
|
||||||
{DXGI_FORMAT_UNKNOWN}, // k_2_10_10_10_FLOAT
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_CTX1
|
||||||
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_DXT3A_AS_1_1_1_1
|
||||||
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_8_8_8_8_GAMMA
|
||||||
|
{DXGI_FORMAT_UNKNOWN, CopyMode::kUnknown}, // k_2_10_10_10_FLOAT
|
||||||
};
|
};
|
||||||
|
|
||||||
const char* TextureCache::dimension_names_[4] = {"1D", "2D", "3D", "cube"};
|
const char* const TextureCache::dimension_names_[4] = {"1D", "2D", "3D",
|
||||||
|
"cube"};
|
||||||
|
|
||||||
|
const TextureCache::CopyModeInfo TextureCache::copy_mode_info_[] = {
|
||||||
|
{texture_load_64bpb_cs, sizeof(texture_load_64bpb_cs)}};
|
||||||
|
|
||||||
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
||||||
RegisterFile* register_file,
|
RegisterFile* register_file,
|
||||||
|
@ -100,11 +109,105 @@ TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
||||||
TextureCache::~TextureCache() { Shutdown(); }
|
TextureCache::~TextureCache() { Shutdown(); }
|
||||||
|
|
||||||
bool TextureCache::Initialize() {
|
bool TextureCache::Initialize() {
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
|
|
||||||
|
// Create the copying root signature.
|
||||||
|
D3D12_ROOT_PARAMETER root_parameters[2];
|
||||||
|
// Parameter 0 is constants (changed very often when untiling).
|
||||||
|
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
||||||
|
root_parameters[0].Descriptor.ShaderRegister = 0;
|
||||||
|
root_parameters[0].Descriptor.RegisterSpace = 0;
|
||||||
|
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||||
|
// Parameter 1 is source and target.
|
||||||
|
D3D12_DESCRIPTOR_RANGE root_copy_ranges[2];
|
||||||
|
root_copy_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||||
|
root_copy_ranges[0].NumDescriptors = 1;
|
||||||
|
root_copy_ranges[0].BaseShaderRegister = 0;
|
||||||
|
root_copy_ranges[0].RegisterSpace = 0;
|
||||||
|
root_copy_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
||||||
|
root_copy_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||||
|
root_copy_ranges[1].NumDescriptors = 1;
|
||||||
|
root_copy_ranges[1].BaseShaderRegister = 0;
|
||||||
|
root_copy_ranges[1].RegisterSpace = 0;
|
||||||
|
root_copy_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
||||||
|
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||||
|
root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
|
||||||
|
root_parameters[1].DescriptorTable.pDescriptorRanges = root_copy_ranges;
|
||||||
|
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||||
|
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||||
|
root_signature_desc.NumParameters = UINT(xe::countof(root_parameters));
|
||||||
|
root_signature_desc.pParameters = root_parameters;
|
||||||
|
root_signature_desc.NumStaticSamplers = 0;
|
||||||
|
root_signature_desc.pStaticSamplers = nullptr;
|
||||||
|
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
||||||
|
ID3DBlob* root_signature_blob;
|
||||||
|
ID3DBlob* root_signature_error_blob = nullptr;
|
||||||
|
if (FAILED(D3D12SerializeRootSignature(
|
||||||
|
&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1,
|
||||||
|
&root_signature_blob, &root_signature_error_blob))) {
|
||||||
|
XELOGE("Failed to serialize the texture copying root signature");
|
||||||
|
if (root_signature_error_blob != nullptr) {
|
||||||
|
XELOGE("%s", reinterpret_cast<const char*>(
|
||||||
|
root_signature_error_blob->GetBufferPointer()));
|
||||||
|
root_signature_error_blob->Release();
|
||||||
|
}
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (root_signature_error_blob != nullptr) {
|
||||||
|
root_signature_error_blob->Release();
|
||||||
|
}
|
||||||
|
if (FAILED(device->CreateRootSignature(
|
||||||
|
0, root_signature_blob->GetBufferPointer(),
|
||||||
|
root_signature_blob->GetBufferSize(),
|
||||||
|
IID_PPV_ARGS(©_root_signature_)))) {
|
||||||
|
XELOGE("Failed to create the texture copying root signature");
|
||||||
|
root_signature_blob->Release();
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
root_signature_blob->Release();
|
||||||
|
|
||||||
|
// Create the copying pipelines.
|
||||||
|
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
|
||||||
|
pipeline_desc.pRootSignature = copy_root_signature_;
|
||||||
|
pipeline_desc.NodeMask = 0;
|
||||||
|
pipeline_desc.CachedPSO.pCachedBlob = nullptr;
|
||||||
|
pipeline_desc.CachedPSO.CachedBlobSizeInBytes = 0;
|
||||||
|
pipeline_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
|
||||||
|
for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) {
|
||||||
|
const CopyModeInfo& mode_info = copy_mode_info_[i];
|
||||||
|
if (mode_info.load_shader != nullptr) {
|
||||||
|
pipeline_desc.CS.pShaderBytecode = mode_info.load_shader;
|
||||||
|
pipeline_desc.CS.BytecodeLength = mode_info.load_shader_size;
|
||||||
|
if (FAILED(device->CreateComputePipelineState(
|
||||||
|
&pipeline_desc, IID_PPV_ARGS(©_load_pipelines_[i])))) {
|
||||||
|
XELOGE("Failed to create the texture copying pipeline for mode %u", i);
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ClearBindings();
|
ClearBindings();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::Shutdown() { ClearCache(); }
|
void TextureCache::Shutdown() {
|
||||||
|
ClearCache();
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < uint32_t(CopyMode::kCount); ++i) {
|
||||||
|
if (copy_load_pipelines_[i] != nullptr) {
|
||||||
|
copy_load_pipelines_[i]->Release();
|
||||||
|
copy_load_pipelines_[i] = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (copy_root_signature_ != nullptr) {
|
||||||
|
copy_root_signature_->Release();
|
||||||
|
copy_root_signature_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TextureCache::TextureFetchConstantWritten(uint32_t index) {
|
void TextureCache::TextureFetchConstantWritten(uint32_t index) {
|
||||||
texture_keys_in_sync_ &= ~(1u << index);
|
texture_keys_in_sync_ &= ~(1u << index);
|
||||||
|
@ -120,7 +223,6 @@ void TextureCache::BeginFrame() {
|
||||||
void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
||||||
uint32_t used_pixel_texture_mask) {
|
uint32_t used_pixel_texture_mask) {
|
||||||
auto command_list = command_processor_->GetCurrentCommandList();
|
auto command_list = command_processor_->GetCurrentCommandList();
|
||||||
assert_not_null(command_list);
|
|
||||||
if (command_list == nullptr) {
|
if (command_list == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -154,7 +256,7 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): Untile the texture.
|
LoadTextureData(binding.texture);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transition the textures to the needed usage.
|
// Transition the textures to the needed usage.
|
||||||
|
@ -369,7 +471,7 @@ void TextureCache::TextureKeyFromFetchConstant(
|
||||||
key_out.mip_max_level = mip_max_level;
|
key_out.mip_max_level = mip_max_level;
|
||||||
key_out.tiled = fetch.tiled;
|
key_out.tiled = fetch.tiled;
|
||||||
key_out.packed_mips = fetch.packed_mips;
|
key_out.packed_mips = fetch.packed_mips;
|
||||||
key_out.format = TextureFormat(fetch.format);
|
key_out.format = GetBaseFormat(TextureFormat(fetch.format));
|
||||||
key_out.endianness = Endian(fetch.endianness);
|
key_out.endianness = Endian(fetch.endianness);
|
||||||
// Get rid of 6 and 7 values (to prevent device losses if the game has
|
// Get rid of 6 and 7 values (to prevent device losses if the game has
|
||||||
// something broken) the quick and dirty way - by changing them to 4 and 5.
|
// something broken) the quick and dirty way - by changing them to 4 and 5.
|
||||||
|
@ -380,26 +482,26 @@ void TextureCache::TextureKeyFromFetchConstant(
|
||||||
void TextureCache::LogTextureKeyAction(TextureKey key, const char* action) {
|
void TextureCache::LogTextureKeyAction(TextureKey key, const char* action) {
|
||||||
XELOGGPU(
|
XELOGGPU(
|
||||||
"%s %s %ux%ux%u %s %s texture with %u %spacked mip level%s, "
|
"%s %s %ux%ux%u %s %s texture with %u %spacked mip level%s, "
|
||||||
"base at 0x%.8X, mips at 0x%.8X", action, key.tiled ? "tiled" : "linear",
|
"base at 0x%.8X, mips at 0x%.8X",
|
||||||
key.width, key.height, key.depth,
|
action, key.tiled ? "tiled" : "linear", key.width, key.height, key.depth,
|
||||||
dimension_names_[uint32_t(key.dimension)],
|
dimension_names_[uint32_t(key.dimension)],
|
||||||
FormatInfo::Get(key.format)->name, key.mip_max_level + 1,
|
FormatInfo::Get(key.format)->name, key.mip_max_level + 1,
|
||||||
key.packed_mips ? "" : "un", key.mip_max_level != 0 ? "s" : "",
|
key.packed_mips ? "" : "un", key.mip_max_level != 0 ? "s" : "",
|
||||||
key.base_page << 12, key.mip_page << 12);
|
key.base_page << 12, key.mip_page << 12);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::LogTextureAction(const Texture& texture,
|
void TextureCache::LogTextureAction(const Texture* texture,
|
||||||
const char* action) {
|
const char* action) {
|
||||||
XELOGGPU(
|
XELOGGPU(
|
||||||
"%s %s %ux%ux%u %s %s texture with %u %spacked mip level%s, "
|
"%s %s %ux%ux%u %s %s texture with %u %spacked mip level%s, "
|
||||||
"base at 0x%.8X (size %u), mips at 0x%.8X (size %u)", action,
|
"base at 0x%.8X (size %u), mips at 0x%.8X (size %u)",
|
||||||
texture.key.tiled ? "tiled" : "linear", texture.key.width,
|
action, texture->key.tiled ? "tiled" : "linear", texture->key.width,
|
||||||
texture.key.height, texture.key.depth,
|
texture->key.height, texture->key.depth,
|
||||||
dimension_names_[uint32_t(texture.key.dimension)],
|
dimension_names_[uint32_t(texture->key.dimension)],
|
||||||
FormatInfo::Get(texture.key.format)->name,
|
FormatInfo::Get(texture->key.format)->name,
|
||||||
texture.key.mip_max_level + 1, texture.key.packed_mips ? "" : "un",
|
texture->key.mip_max_level + 1, texture->key.packed_mips ? "" : "un",
|
||||||
texture.key.mip_max_level != 0 ? "s" : "", texture.key.base_page << 12,
|
texture->key.mip_max_level != 0 ? "s" : "", texture->key.base_page << 12,
|
||||||
texture.base_size, texture.key.mip_page << 12, texture.mip_size);
|
texture->base_size, texture->key.mip_page << 12, texture->mip_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
||||||
|
@ -457,19 +559,23 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
||||||
texture->key = key;
|
texture->key = key;
|
||||||
texture->resource = resource;
|
texture->resource = resource;
|
||||||
texture->state = state;
|
texture->state = state;
|
||||||
|
texture->mip_offsets[0] = 0;
|
||||||
uint32_t width_blocks, height_blocks, depth_blocks;
|
uint32_t width_blocks, height_blocks, depth_blocks;
|
||||||
if (key.base_page != 0) {
|
if (key.base_page != 0) {
|
||||||
texture_util::GetGuestMipBlocks(key.dimension, key.width, key.height,
|
texture_util::GetGuestMipBlocks(key.dimension, key.width, key.height,
|
||||||
key.depth, key.format, 0, width_blocks,
|
key.depth, key.format, 0, width_blocks,
|
||||||
height_blocks, depth_blocks);
|
height_blocks, depth_blocks);
|
||||||
texture->base_size = texture_util::GetGuestMipStorageSize(
|
texture->base_slice_size = texture_util::GetGuestMipStorageSize(
|
||||||
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
||||||
nullptr);
|
texture->mip_pitches[0]);
|
||||||
texture->base_in_sync = false;
|
texture->base_in_sync = false;
|
||||||
} else {
|
} else {
|
||||||
|
texture->base_slice_size = 0;
|
||||||
|
texture->mip_pitches[0] = 0;
|
||||||
// Never try to upload the base level if there is none.
|
// Never try to upload the base level if there is none.
|
||||||
texture->base_in_sync = true;
|
texture->base_in_sync = true;
|
||||||
}
|
}
|
||||||
|
texture->mip_slice_size = 0;
|
||||||
if (key.mip_page != 0) {
|
if (key.mip_page != 0) {
|
||||||
uint32_t mip_max_storage_level = key.mip_max_level;
|
uint32_t mip_max_storage_level = key.mip_max_level;
|
||||||
if (key.packed_mips) {
|
if (key.packed_mips) {
|
||||||
|
@ -477,26 +583,241 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
||||||
std::min(mip_max_storage_level,
|
std::min(mip_max_storage_level,
|
||||||
texture_util::GetPackedMipLevel(key.width, key.height));
|
texture_util::GetPackedMipLevel(key.width, key.height));
|
||||||
}
|
}
|
||||||
texture->mip_size = 0;
|
|
||||||
for (uint32_t i = 1; i <= mip_max_storage_level; ++i) {
|
for (uint32_t i = 1; i <= mip_max_storage_level; ++i) {
|
||||||
texture_util::GetGuestMipBlocks(key.dimension, key.width, key.height,
|
texture_util::GetGuestMipBlocks(key.dimension, key.width, key.height,
|
||||||
key.depth, key.format, i, width_blocks,
|
key.depth, key.format, i, width_blocks,
|
||||||
height_blocks, depth_blocks);
|
height_blocks, depth_blocks);
|
||||||
texture->mip_size += texture_util::GetGuestMipStorageSize(
|
texture->mip_offsets[i] = texture->mip_slice_size;
|
||||||
|
texture->mip_slice_size += texture_util::GetGuestMipStorageSize(
|
||||||
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
||||||
nullptr);
|
texture->mip_pitches[i]);
|
||||||
|
}
|
||||||
|
// The rest are either packed levels or don't exist at all.
|
||||||
|
for (uint32_t i = mip_max_storage_level + 1;
|
||||||
|
i < xe::countof(texture->mip_offsets); ++i) {
|
||||||
|
texture->mip_offsets[i] = texture->mip_offsets[mip_max_storage_level];
|
||||||
|
texture->mip_pitches[i] = texture->mip_pitches[mip_max_storage_level];
|
||||||
}
|
}
|
||||||
texture->mips_in_sync = false;
|
texture->mips_in_sync = false;
|
||||||
} else {
|
} else {
|
||||||
|
std::memset(&texture->mip_offsets[1], 0,
|
||||||
|
(xe::countof(texture->mip_offsets) - 1) * sizeof(uint32_t));
|
||||||
|
std::memset(&texture->mip_pitches[1], 0,
|
||||||
|
(xe::countof(texture->mip_pitches) - 1) * sizeof(uint32_t));
|
||||||
// Never try to upload the mipmaps if there are none.
|
// Never try to upload the mipmaps if there are none.
|
||||||
texture->mips_in_sync = true;
|
texture->mips_in_sync = true;
|
||||||
}
|
}
|
||||||
|
texture->base_size = texture->base_slice_size;
|
||||||
|
texture->mip_size = texture->mip_slice_size;
|
||||||
|
if (key.dimension != Dimension::k3D) {
|
||||||
|
texture->base_size *= key.depth;
|
||||||
|
texture->mip_size *= key.depth;
|
||||||
|
}
|
||||||
textures_.insert(std::make_pair(map_key, texture));
|
textures_.insert(std::make_pair(map_key, texture));
|
||||||
LogTextureAction(*texture, "Created");
|
LogTextureAction(texture, "Created");
|
||||||
|
|
||||||
return texture;
|
return texture;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
|
if (texture->base_in_sync && texture->mips_in_sync) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto command_list = command_processor_->GetCurrentCommandList();
|
||||||
|
if (command_list == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
|
auto device = provider->GetDevice();
|
||||||
|
|
||||||
|
// Get the pipeline.
|
||||||
|
const HostFormat& host_format = host_formats_[uint32_t(texture->key.format)];
|
||||||
|
if (host_format.copy_mode == CopyMode::kUnknown) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ID3D12PipelineState* pipeline =
|
||||||
|
copy_load_pipelines_[uint32_t(host_format.copy_mode)];
|
||||||
|
if (pipeline == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Request uploading of the texture data to the shared memory.
|
||||||
|
if (!texture->base_in_sync) {
|
||||||
|
if (!shared_memory_->UseRange(texture->key.base_page << 12,
|
||||||
|
texture->base_size)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!texture->mips_in_sync) {
|
||||||
|
if (!shared_memory_->UseRange(texture->key.mip_page << 12,
|
||||||
|
texture->mip_size)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the guest layout.
|
||||||
|
bool is_3d = texture->key.dimension == Dimension::k3D;
|
||||||
|
uint32_t width = texture->key.width;
|
||||||
|
uint32_t height = texture->key.height;
|
||||||
|
uint32_t depth = is_3d ? texture->key.depth : 1;
|
||||||
|
uint32_t slice_count = is_3d ? 1 : texture->key.depth;
|
||||||
|
TextureFormat guest_format = texture->key.format;
|
||||||
|
const FormatInfo* guest_format_info = FormatInfo::Get(guest_format);
|
||||||
|
uint32_t block_width = guest_format_info->block_width;
|
||||||
|
uint32_t block_height = guest_format_info->block_height;
|
||||||
|
|
||||||
|
// Get the host layout and the buffer.
|
||||||
|
D3D12_RESOURCE_DESC resource_desc = texture->resource->GetDesc();
|
||||||
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_layouts[D3D12_REQ_MIP_LEVELS];
|
||||||
|
UINT64 host_slice_size;
|
||||||
|
device->GetCopyableFootprints(&resource_desc, 0, resource_desc.MipLevels, 0,
|
||||||
|
host_layouts, nullptr, nullptr,
|
||||||
|
&host_slice_size);
|
||||||
|
D3D12_RESOURCE_STATES copy_buffer_state =
|
||||||
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
|
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
|
||||||
|
uint32_t(host_slice_size), copy_buffer_state);
|
||||||
|
if (copy_buffer == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Begin loading.
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||||
|
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||||
|
if (command_processor_->RequestViewDescriptors(0, 2, 2, descriptor_cpu_start,
|
||||||
|
descriptor_gpu_start) == 0) {
|
||||||
|
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
shared_memory_->UseForReading(command_list);
|
||||||
|
shared_memory_->CreateSRV(descriptor_cpu_start);
|
||||||
|
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||||
|
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||||
|
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||||
|
uav_desc.Buffer.FirstElement = 0;
|
||||||
|
uav_desc.Buffer.NumElements = UINT(host_slice_size >> 2);
|
||||||
|
uav_desc.Buffer.StructureByteStride = 0;
|
||||||
|
uav_desc.Buffer.CounterOffsetInBytes = 0;
|
||||||
|
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_uav;
|
||||||
|
descriptor_cpu_uav.ptr =
|
||||||
|
descriptor_cpu_start.ptr + provider->GetDescriptorSizeView();
|
||||||
|
device->CreateUnorderedAccessView(copy_buffer, nullptr, &uav_desc,
|
||||||
|
descriptor_cpu_uav);
|
||||||
|
command_processor_->SetPipeline(pipeline);
|
||||||
|
command_list->SetComputeRootSignature(copy_root_signature_);
|
||||||
|
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||||
|
|
||||||
|
// Submit commands.
|
||||||
|
D3D12_RESOURCE_BARRIER barriers[2];
|
||||||
|
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||||
|
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||||
|
if (texture->state != D3D12_RESOURCE_STATE_COPY_DEST) {
|
||||||
|
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||||
|
barriers[0].Transition.pResource = texture->resource;
|
||||||
|
barriers[0].Transition.Subresource =
|
||||||
|
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||||
|
barriers[0].Transition.StateBefore = texture->state;
|
||||||
|
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
|
command_list->ResourceBarrier(1, barriers);
|
||||||
|
texture->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
|
}
|
||||||
|
uint32_t mip_first = texture->base_in_sync ? 1 : 0;
|
||||||
|
uint32_t mip_last = texture->mips_in_sync ? 0 : resource_desc.MipLevels - 1;
|
||||||
|
auto cbuffer_pool = command_processor_->GetConstantBufferPool();
|
||||||
|
CopyConstants copy_constants;
|
||||||
|
copy_constants.is_3d = is_3d ? 1 : 0;
|
||||||
|
copy_constants.endianness = uint32_t(texture->key.endianness);
|
||||||
|
if (!texture->key.packed_mips) {
|
||||||
|
copy_constants.guest_mip_offset[0] = 0;
|
||||||
|
copy_constants.guest_mip_offset[1] = 0;
|
||||||
|
copy_constants.guest_mip_offset[2] = 0;
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < slice_count; ++i) {
|
||||||
|
if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||||
|
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||||
|
barriers[0].Transition.pResource = copy_buffer;
|
||||||
|
barriers[0].Transition.Subresource = 0;
|
||||||
|
barriers[0].Transition.StateBefore = copy_buffer_state;
|
||||||
|
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
|
command_list->ResourceBarrier(1, barriers);
|
||||||
|
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
|
}
|
||||||
|
for (uint32_t j = mip_first; j <= mip_last; ++j) {
|
||||||
|
if (j == 0) {
|
||||||
|
copy_constants.guest_base =
|
||||||
|
(texture->key.base_page << 12) + i * texture->base_slice_size;
|
||||||
|
} else {
|
||||||
|
copy_constants.guest_base =
|
||||||
|
(texture->key.mip_page << 12) + i * texture->mip_slice_size;
|
||||||
|
}
|
||||||
|
copy_constants.guest_base += texture->mip_offsets[j];
|
||||||
|
copy_constants.guest_pitch = texture->key.tiled
|
||||||
|
? CopyConstants::kGuestPitchTiled
|
||||||
|
: texture->mip_pitches[j];
|
||||||
|
copy_constants.host_base = uint32_t(host_layouts[j].Offset);
|
||||||
|
copy_constants.host_pitch = host_layouts[j].Footprint.RowPitch;
|
||||||
|
copy_constants.size[0] =
|
||||||
|
(std::max(width >> j, 1u) + (block_width - 1)) / block_width;
|
||||||
|
copy_constants.size[1] =
|
||||||
|
(std::max(height >> j, 1u) + (block_height - 1)) / block_height;
|
||||||
|
copy_constants.size[2] = std::max(depth >> j, 1u);
|
||||||
|
if (texture->key.packed_mips) {
|
||||||
|
texture_util::GetPackedMipOffset(width, height, depth, guest_format, j,
|
||||||
|
copy_constants.guest_mip_offset[0],
|
||||||
|
copy_constants.guest_mip_offset[1],
|
||||||
|
copy_constants.guest_mip_offset[2]);
|
||||||
|
}
|
||||||
|
D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address;
|
||||||
|
uint8_t* cbuffer_mapping = cbuffer_pool->RequestFull(
|
||||||
|
xe::align(uint32_t(sizeof(copy_constants)), 256u), nullptr, nullptr,
|
||||||
|
&cbuffer_gpu_address);
|
||||||
|
if (cbuffer_mapping == nullptr) {
|
||||||
|
command_processor_->ReleaseScratchGPUBuffer(copy_buffer,
|
||||||
|
copy_buffer_state);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
std::memcpy(cbuffer_mapping, ©_constants, sizeof(copy_constants));
|
||||||
|
command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address);
|
||||||
|
// Each thread group processes 32x32x1 blocks.
|
||||||
|
command_list->Dispatch((copy_constants.size[0] + 31) >> 5,
|
||||||
|
(copy_constants.size[1] + 31) >> 5,
|
||||||
|
copy_constants.size[2]);
|
||||||
|
}
|
||||||
|
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
||||||
|
barriers[0].UAV.pResource = copy_buffer;
|
||||||
|
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||||
|
barriers[1].Transition.pResource = copy_buffer;
|
||||||
|
barriers[1].Transition.Subresource = 0;
|
||||||
|
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
|
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||||
|
command_list->ResourceBarrier(2, barriers);
|
||||||
|
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||||
|
UINT slice_first_subresource = i * resource_desc.MipLevels;
|
||||||
|
for (uint32_t j = mip_first; j <= mip_last; ++j) {
|
||||||
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
||||||
|
location_source.pResource = copy_buffer;
|
||||||
|
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
||||||
|
location_source.PlacedFootprint = host_layouts[j];
|
||||||
|
location_dest.pResource = texture->resource;
|
||||||
|
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
||||||
|
location_dest.SubresourceIndex = slice_first_subresource + j;
|
||||||
|
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
|
||||||
|
nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
||||||
|
|
||||||
|
// TODO(Triang3l): Uncomment when done testing untiling shaders.
|
||||||
|
/* texture->base_in_sync = true;
|
||||||
|
texture->mips_in_sync = true; */
|
||||||
|
|
||||||
|
LogTextureAction(texture, "Loaded");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void TextureCache::ClearBindings() {
|
void TextureCache::ClearBindings() {
|
||||||
std::memset(texture_bindings_, 0, sizeof(texture_bindings_));
|
std::memset(texture_bindings_, 0, sizeof(texture_bindings_));
|
||||||
texture_keys_in_sync_ = 0;
|
texture_keys_in_sync_ = 0;
|
||||||
|
|
|
@ -80,8 +80,22 @@ class TextureCache {
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
struct CopyModeInfo {
|
||||||
|
const void* load_shader;
|
||||||
|
size_t load_shader_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class CopyMode {
|
||||||
|
k64Bpb,
|
||||||
|
|
||||||
|
kCount,
|
||||||
|
|
||||||
|
kUnknown = kCount
|
||||||
|
};
|
||||||
|
|
||||||
struct HostFormat {
|
struct HostFormat {
|
||||||
DXGI_FORMAT dxgi_format;
|
DXGI_FORMAT dxgi_format;
|
||||||
|
CopyMode copy_mode;
|
||||||
};
|
};
|
||||||
|
|
||||||
union TextureKey {
|
union TextureKey {
|
||||||
|
@ -150,16 +164,45 @@ class TextureCache {
|
||||||
TextureKey key;
|
TextureKey key;
|
||||||
ID3D12Resource* resource;
|
ID3D12Resource* resource;
|
||||||
D3D12_RESOURCE_STATES state;
|
D3D12_RESOURCE_STATES state;
|
||||||
|
// Byte size of one array slice of the top guest mip level.
|
||||||
|
uint32_t base_slice_size;
|
||||||
// Byte size of the top guest mip level.
|
// Byte size of the top guest mip level.
|
||||||
uint32_t base_size;
|
uint32_t base_size;
|
||||||
|
// Byte size of one array slice of mips between 1 and key.mip_max_level.
|
||||||
|
uint32_t mip_slice_size;
|
||||||
// Byte size of mips between 1 and key.mip_max_level.
|
// Byte size of mips between 1 and key.mip_max_level.
|
||||||
uint32_t mip_size;
|
uint32_t mip_size;
|
||||||
|
// Byte offsets of each mipmap within one slice.
|
||||||
|
uint32_t mip_offsets[14];
|
||||||
|
// Byte pitches of each mipmap within one slice (for linear layout mainly).
|
||||||
|
uint32_t mip_pitches[14];
|
||||||
// Whether the recent base level data has been loaded from the memory.
|
// Whether the recent base level data has been loaded from the memory.
|
||||||
bool base_in_sync;
|
bool base_in_sync;
|
||||||
// Whether the recent mip data has been loaded from the memory.
|
// Whether the recent mip data has been loaded from the memory.
|
||||||
bool mips_in_sync;
|
bool mips_in_sync;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct CopyConstants {
|
||||||
|
// vec4 0.
|
||||||
|
uint32_t guest_base;
|
||||||
|
// For linear textures - row byte pitch.
|
||||||
|
uint32_t guest_pitch;
|
||||||
|
uint32_t host_base;
|
||||||
|
uint32_t host_pitch;
|
||||||
|
|
||||||
|
// vec4 1.
|
||||||
|
// Size in blocks.
|
||||||
|
uint32_t size[3];
|
||||||
|
uint32_t is_3d;
|
||||||
|
|
||||||
|
// vec4 2.
|
||||||
|
// Offset within the packed mip for small mips.
|
||||||
|
uint32_t guest_mip_offset[3];
|
||||||
|
uint32_t endianness;
|
||||||
|
|
||||||
|
static constexpr uint32_t kGuestPitchTiled = UINT32_MAX;
|
||||||
|
};
|
||||||
|
|
||||||
struct TextureBinding {
|
struct TextureBinding {
|
||||||
TextureKey key;
|
TextureKey key;
|
||||||
uint32_t swizzle;
|
uint32_t swizzle;
|
||||||
|
@ -173,26 +216,34 @@ class TextureCache {
|
||||||
uint32_t& swizzle_out);
|
uint32_t& swizzle_out);
|
||||||
|
|
||||||
static void LogTextureKeyAction(TextureKey key, const char* action);
|
static void LogTextureKeyAction(TextureKey key, const char* action);
|
||||||
static void LogTextureAction(const Texture& texture, const char* action);
|
static void LogTextureAction(const Texture* texture, const char* action);
|
||||||
|
|
||||||
// Returns nullptr if the key is not supported, but also if couldn't create
|
// Returns nullptr if the key is not supported, but also if couldn't create
|
||||||
// the texture - if it's nullptr, occasionally a recreation attempt should be
|
// the texture - if it's nullptr, occasionally a recreation attempt should be
|
||||||
// made.
|
// made.
|
||||||
Texture* FindOrCreateTexture(TextureKey key);
|
Texture* FindOrCreateTexture(TextureKey key);
|
||||||
|
|
||||||
|
// Writes data from the shared memory to the texture. This binds pipelines and
|
||||||
|
// allocates descriptors!
|
||||||
|
bool LoadTextureData(Texture* texture);
|
||||||
|
|
||||||
// Makes all bindings invalid. Also requesting textures after calling this
|
// Makes all bindings invalid. Also requesting textures after calling this
|
||||||
// will cause another attempt to create a texture or to untile it if there was
|
// will cause another attempt to create a texture or to untile it if there was
|
||||||
// an error.
|
// an error.
|
||||||
void ClearBindings();
|
void ClearBindings();
|
||||||
|
|
||||||
static HostFormat host_formats_[64];
|
static const HostFormat host_formats_[64];
|
||||||
|
|
||||||
static const char* dimension_names_[4];
|
static const char* const dimension_names_[4];
|
||||||
|
|
||||||
D3D12CommandProcessor* command_processor_;
|
D3D12CommandProcessor* command_processor_;
|
||||||
RegisterFile* register_file_;
|
RegisterFile* register_file_;
|
||||||
SharedMemory* shared_memory_;
|
SharedMemory* shared_memory_;
|
||||||
|
|
||||||
|
static const CopyModeInfo copy_mode_info_[];
|
||||||
|
ID3D12RootSignature* copy_root_signature_ = nullptr;
|
||||||
|
ID3D12PipelineState* copy_load_pipelines_[size_t(CopyMode::kCount)] = {};
|
||||||
|
|
||||||
std::unordered_multimap<uint64_t, Texture*> textures_;
|
std::unordered_multimap<uint64_t, Texture*> textures_;
|
||||||
|
|
||||||
TextureBinding texture_bindings_[32] = {};
|
TextureBinding texture_bindings_[32] = {};
|
||||||
|
|
|
@ -56,17 +56,15 @@ void GetGuestMipBlocks(Dimension dimension, uint32_t width, uint32_t height,
|
||||||
|
|
||||||
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
||||||
uint32_t depth_blocks, bool is_tiled,
|
uint32_t depth_blocks, bool is_tiled,
|
||||||
TextureFormat format, uint32_t* row_pitch_out) {
|
TextureFormat format, uint32_t& row_pitch_out) {
|
||||||
const FormatInfo* format_info = FormatInfo::Get(format);
|
const FormatInfo* format_info = FormatInfo::Get(format);
|
||||||
uint32_t row_pitch =
|
uint32_t row_pitch = width_blocks * format_info->block_width *
|
||||||
width_blocks * format_info->block_width * format_info->block_height *
|
format_info->block_height * format_info->bits_per_pixel /
|
||||||
format_info->bits_per_pixel / 8;
|
8;
|
||||||
if (!is_tiled) {
|
if (!is_tiled) {
|
||||||
row_pitch = xe::align(row_pitch, 256u);
|
row_pitch = xe::align(row_pitch, 256u);
|
||||||
}
|
}
|
||||||
if (row_pitch_out != nullptr) {
|
row_pitch_out = row_pitch;
|
||||||
*row_pitch_out = row_pitch;
|
|
||||||
}
|
|
||||||
return xe::align(row_pitch * height_blocks * depth_blocks, 4096u);
|
return xe::align(row_pitch * height_blocks * depth_blocks, 4096u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ void GetGuestMipBlocks(Dimension dimension, uint32_t width, uint32_t height,
|
||||||
// height and depth must be obtained via GetGuestMipExtent.
|
// height and depth must be obtained via GetGuestMipExtent.
|
||||||
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
||||||
uint32_t depth_blocks, bool is_tiled,
|
uint32_t depth_blocks, bool is_tiled,
|
||||||
TextureFormat format, uint32_t* row_pitch_out);
|
TextureFormat format, uint32_t& row_pitch_out);
|
||||||
|
|
||||||
// Gets the number of the mipmap level where the packed mips are stored.
|
// Gets the number of the mipmap level where the packed mips are stored.
|
||||||
inline uint32_t GetPackedMipLevel(uint32_t width, uint32_t height) {
|
inline uint32_t GetPackedMipLevel(uint32_t width, uint32_t height) {
|
||||||
|
|
Loading…
Reference in New Issue