[D3D12] Bindless textures/samplers

This commit is contained in:
Triang3l 2020-06-19 23:52:33 +03:00
parent 9f789e01b6
commit 40e335e2a9
23 changed files with 3565 additions and 1747 deletions

30
src/xenia/base/hash.h Normal file
View File

@ -0,0 +1,30 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_BASE_HASH_H_
#define XENIA_BASE_HASH_H_
#include <cstddef>
namespace xe {
namespace hash {
// For use in unordered_sets and unordered_maps (primarily multisets and
// multimaps, with manual collision resolution), where the hash is calculated
// externally (for instance, as XXH64), possibly requiring context data rather
// than a pure function to calculate the hash
template <typename Key>
struct IdentityHasher {
size_t operator()(const Key& key) const { return static_cast<size_t>(key); }
};
} // namespace hash
} // namespace xe
#endif // XENIA_BASE_HASH_H_

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,7 @@
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/ui/d3d12/d3d12_context.h"
#include "xenia/ui/d3d12/d3d12_util.h"
#include "xenia/ui/d3d12/pools.h"
namespace xe {
@ -53,8 +54,8 @@ class D3D12CommandProcessor : public CommandProcessor {
void RestoreEDRAMSnapshot(const void* snapshot) override;
// Needed by everything that owns transient objects.
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
ui::d3d12::D3D12Context* GetD3D12Context() const {
return static_cast<ui::d3d12::D3D12Context*>(context_.get());
}
// Returns the deferred drawing command list for the currently open
@ -95,18 +96,43 @@ class D3D12CommandProcessor : public CommandProcessor {
ui::d3d12::UploadBufferPool* GetConstantBufferPool() const {
return constant_buffer_pool_.get();
}
// Request and automatically rebind descriptors on the draw command list.
// Refer to DescriptorHeapPool::Request for partial/full update explanation.
uint64_t RequestViewDescriptors(uint64_t previous_heap_index,
uint32_t count_for_partial_update,
uint32_t count_for_full_update,
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
uint64_t RequestSamplerDescriptors(
uint64_t previous_heap_index, uint32_t count_for_partial_update,
uint32_t count_for_full_update,
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
D3D12_CPU_DESCRIPTOR_HANDLE GetViewBindlessHeapCPUStart() const {
assert_true(bindless_resources_used_);
return view_bindless_heap_cpu_start_;
}
D3D12_GPU_DESCRIPTOR_HANDLE GetViewBindlessHeapGPUStart() const {
assert_true(bindless_resources_used_);
return view_bindless_heap_gpu_start_;
}
// Returns UINT32_MAX if no free descriptors.
uint32_t RequestPersistentViewBindlessDescriptor();
void ReleaseViewBindlessDescriptorImmediately(uint32_t descriptor_index);
// Request non-contiguous SRV/UAV descriptors for use only within the next
// draw or dispatch command done for internal purposes. May change the current
// descriptor heap.
bool RequestOneUseSingleViewDescriptors(
uint32_t count, ui::d3d12::util::DescriptorCPUGPUHandlePair* handles_out);
// These are needed often, so they are always allocated.
enum class SystemBindlessView : uint32_t {
kNullTexture2DArray,
kNullTexture3D,
kNullTextureCube,
kSharedMemoryRawSRV,
kSharedMemoryRawUAV,
kEDRAMR32UintUAV,
kEDRAMRawSRV,
kEDRAMRawUAV,
kGammaRampNormalSRV,
kGammaRampPWLSRV,
kCount,
};
ui::d3d12::util::DescriptorCPUGPUHandlePair GetSystemBindlessViewHandlePair(
SystemBindlessView view) const;
// Returns a single temporary GPU-side buffer within a submission for tasks
// like texture untiling and resolving.
@ -148,6 +174,10 @@ class D3D12CommandProcessor : public CommandProcessor {
bool changing_viewport = true, bool changing_blend_factor = false,
bool changing_stencil_ref = false);
// For the pipeline state cache to call when binding layout UIDs may be
// reused.
void NotifyShaderBindingsLayoutUIDsInvalidated();
// Returns the text to display in the GPU backend name in the window title.
std::string GetWindowTitleText() const;
@ -180,36 +210,66 @@ class D3D12CommandProcessor : public CommandProcessor {
static constexpr uint32_t kQueueFrames = 3;
enum RootParameter : UINT {
// Keep the size of the root signature at each stage 13 dwords or less
// (better 12 or less) so it fits in user data on AMD. Descriptor tables are
// 1 dword, root descriptors are 2 dwords (however, root descriptors require
// less setup on the CPU - balance needs to be maintained).
// CBVs are set in both bindful and bindless cases via root descriptors.
// - Bindful resources - multiple root signatures depending on extra
// parameters.
// These are always present.
// Very frequently changed, especially for UI draws, and for models drawn in
// multiple parts - contains vertex and texture fetch constants.
kRootParameter_FetchConstants,
kRootParameter_Bindful_FetchConstants = 0, // +2 dwords = 2 in all.
// Quite frequently changed (for one object drawn multiple times, for
// instance - may contain projection matrices).
kRootParameter_FloatConstantsVertex,
kRootParameter_Bindful_FloatConstantsVertex, // +2 = 4 in VS.
// Less frequently changed (per-material).
kRootParameter_FloatConstantsPixel,
// Rarely changed - system constants like viewport and alpha testing.
kRootParameter_SystemConstants,
kRootParameter_Bindful_FloatConstantsPixel, // +2 = 4 in PS.
// May stay the same across many draws.
kRootParameter_Bindful_SystemConstants, // +2 = 6 in all.
// Pretty rarely used and rarely changed - flow control constants.
kRootParameter_BoolLoopConstants,
kRootParameter_Bindful_BoolLoopConstants, // +2 = 8 in all.
// Never changed except for when starting a new descriptor heap - shared
// memory byte address buffer, and, if ROV is used for EDRAM, EDRAM UAV.
kRootParameter_SharedMemoryAndEDRAM,
// memory byte address buffer, and, if ROV is used for EDRAM, EDRAM R32_UINT
// UAV.
// SRV/UAV descriptor table.
kRootParameter_Bindful_SharedMemoryAndEDRAM, // +1 = 9 in all.
kRootParameter_Count_Base,
kRootParameter_Bindful_Count_Base,
// Extra parameter that may or may not exist:
// - Pixel textures (t1+).
// - Pixel samplers (s0+).
// - Vertex textures (t1+).
// - Vertex samplers (s0+).
// - Pixel textures (+1 = 10 in PS).
// - Pixel samplers (+1 = 11 in PS).
// - Vertex textures (+1 = 10 in VS).
// - Vertex samplers (+1 = 11 in VS).
kRootParameter_Count_Max = kRootParameter_Count_Base + 4,
kRootParameter_Bindful_Count_Max = kRootParameter_Bindful_Count_Base + 4,
// - Bindless resources - two global root signatures (for non-tessellated
// and tessellated drawing), so these are always present.
kRootParameter_Bindless_FetchConstants = 0, // +2 = 2 in all.
kRootParameter_Bindless_FloatConstantsVertex, // +2 = 4 in VS.
kRootParameter_Bindless_FloatConstantsPixel, // +2 = 4 in PS.
// Changed per-material, texture and sampler descriptor indices.
kRootParameter_Bindless_DescriptorIndicesPixel, // +2 = 6 in PS.
kRootParameter_Bindless_DescriptorIndicesVertex, // +2 = 6 in VS.
kRootParameter_Bindless_SystemConstants, // +2 = 8 in all.
kRootParameter_Bindless_BoolLoopConstants, // +2 = 10 in all.
// Unbounded sampler descriptor table - changed in case of overflow.
kRootParameter_Bindless_SamplerHeap, // +1 = 11 in all.
// Unbounded SRV/UAV descriptor table - never changed.
kRootParameter_Bindless_ViewHeap, // +1 = 12 in all.
kRootParameter_Bindless_Count,
};
struct RootExtraParameterIndices {
struct RootBindfulExtraParameterIndices {
uint32_t textures_pixel;
uint32_t samplers_pixel;
uint32_t textures_vertex;
@ -218,9 +278,9 @@ class D3D12CommandProcessor : public CommandProcessor {
};
// Gets the indices of optional root parameters. Returns the total parameter
// count.
static uint32_t GetRootExtraParameterIndices(
static uint32_t GetRootBindfulExtraParameterIndices(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
RootExtraParameterIndices& indices_out);
RootBindfulExtraParameterIndices& indices_out);
// BeginSubmission and EndSubmission may be called at any time. If there's an
// open non-frame submission, BeginSubmission(true) will promote it to a
@ -247,6 +307,20 @@ class D3D12CommandProcessor : public CommandProcessor {
// Need to await submission completion before calling.
void ClearCommandAllocatorCache();
// Request descriptors and automatically rebind the descriptor heap on the
// draw command list. Refer to DescriptorHeapPool::Request for partial/full
// update explanation. Doesn't work when bindless descriptors are used.
uint64_t RequestViewBindfulDescriptors(
uint64_t previous_heap_index, uint32_t count_for_partial_update,
uint32_t count_for_full_update,
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
uint64_t RequestSamplerBindfulDescriptors(
uint64_t previous_heap_index, uint32_t count_for_partial_update,
uint32_t count_for_full_update,
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
void UpdateFixedFunctionState(bool primitive_two_faced);
void UpdateSystemConstantValues(
bool shared_memory_is_uav, bool primitive_two_faced,
@ -268,6 +342,8 @@ class D3D12CommandProcessor : public CommandProcessor {
// synchronizing immediately after use. Always in COPY_DEST state.
ID3D12Resource* RequestReadbackBuffer(uint32_t size);
void WriteGammaRampSRV(bool is_pwl, D3D12_CPU_DESCRIPTOR_HANDLE handle) const;
bool cache_clear_requested_ = false;
bool submission_open_ = false;
@ -298,28 +374,89 @@ class D3D12CommandProcessor : public CommandProcessor {
ID3D12GraphicsCommandList1* command_list_1_ = nullptr;
std::unique_ptr<DeferredCommandList> deferred_command_list_ = nullptr;
std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
// Root signatures for different descriptor counts.
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_;
std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr;
// Should bindless textures and samplers be used - many times faster
// UpdateBindings than bindful (that becomes a significant bottleneck with
// bindful - mainly because of CopyDescriptorsSimple, which takes the majority
// of UpdateBindings time, and that's outside the emulator's control even).
bool bindless_resources_used_ = false;
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
// and blending performed in pixel shaders be used instead of host render
// targets.
bool edram_rov_used_ = false;
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
static constexpr uint32_t kViewBindfulHeapSize = 32768;
static_assert(kViewBindfulHeapSize <=
D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1);
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_bindful_heap_pool_ =
nullptr;
// Currently bound descriptor heap - updated by RequestViewBindfulDescriptors.
ID3D12DescriptorHeap* view_bindful_heap_current_;
// Rationale: textures have 4 KB alignment in guest memory, and there can be
// 512 MB / 4 KB in total of them at most, and multiply by 3 for different
// swizzles, signedness, and multiple host textures for one guest texture, and
// transient descriptors. Though in reality there will be a lot fewer of
// course, this is just a "safe" value. The limit is 1000000 for resource
// binding tier 2.
static constexpr uint32_t kViewBindlessHeapSize = 262144;
static_assert(kViewBindlessHeapSize <=
D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2);
ID3D12DescriptorHeap* view_bindless_heap_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE view_bindless_heap_cpu_start_;
D3D12_GPU_DESCRIPTOR_HANDLE view_bindless_heap_gpu_start_;
uint32_t view_bindless_heap_allocated_ = 0;
std::vector<uint32_t> view_bindless_heap_free_;
// <Descriptor index, submission where requested>, sorted by the submission
// number.
std::deque<std::pair<uint32_t, uint64_t>> view_bindless_one_use_descriptors_;
// Direct3D 12 only allows shader-visible heaps with no more than 2048
// samplers (due to Nvidia addressing). However, there's also possibly a weird
// bug in the Nvidia driver (tested on 440.97 and earlier on Windows 10 1803)
// that caused the sampler with index 2047 not to work if a heap with 8 or
// less samplers also exists - in case of Xenia, it's the immediate drawer's
// sampler heap.
// FIXME(Triang3l): Investigate the issue with the sampler 2047 on Nvidia.
static constexpr uint32_t kSamplerHeapSize = 2000;
static_assert(kSamplerHeapSize <= D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE);
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_bindful_heap_pool_ =
nullptr;
ID3D12DescriptorHeap* sampler_bindful_heap_current_;
ID3D12DescriptorHeap* sampler_bindless_heap_current_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE sampler_bindless_heap_cpu_start_;
D3D12_GPU_DESCRIPTOR_HANDLE sampler_bindless_heap_gpu_start_;
// Currently the sampler heap is used only for texture cache samplers, so
// individual samplers are never freed, and using a simple linear allocator
// inside the current heap without a free list.
uint32_t sampler_bindless_heap_allocated_ = 0;
// <Heap, overflow submission number>, if total sampler count used so far
// exceeds kSamplerHeapSize, and the heap has been switched (this is not a
// totally impossible situation considering Direct3D 9 has sampler parameter
// state instead of sampler objects, and having one "unimportant" parameter
// changed may result in doubling of sampler count). Sorted by the submission
// number (so checking if the first can be reused is enough).
std::deque<std::pair<ID3D12DescriptorHeap*, uint64_t>>
sampler_bindless_heaps_overflowed_;
// TextureCache::SamplerParameters::value -> indices within the current
// bindless sampler heap.
std::unordered_map<uint32_t, uint32_t> texture_cache_bindless_sampler_map_;
// Root signatures for different descriptor counts.
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_bindful_;
ID3D12RootSignature* root_signature_bindless_vs_ = nullptr;
ID3D12RootSignature* root_signature_bindless_ds_ = nullptr;
std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr;
std::unique_ptr<TextureCache> texture_cache_ = nullptr;
std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr;
std::unique_ptr<PrimitiveConverter> primitive_converter_ = nullptr;
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
// Mip 0 contains the normal gamma ramp (256 entries), mip 1 contains the PWL
// ramp (128 entries). DXGI_FORMAT_R10G10B10A2_UNORM 1D.
ID3D12Resource* gamma_ramp_texture_ = nullptr;
@ -348,11 +485,8 @@ class D3D12CommandProcessor : public CommandProcessor {
// Unsubmitted barrier batch.
std::vector<D3D12_RESOURCE_BARRIER> barriers_;
struct BufferForDeletion {
ID3D12Resource* buffer;
uint64_t last_usage_submission;
};
std::deque<BufferForDeletion> buffers_for_deletion_;
// <Resource, submission where requested>, sorted by the submission number.
std::deque<std::pair<ID3D12Resource*, uint64_t>> buffers_for_deletion_;
static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024;
ID3D12Resource* scratch_buffer_ = nullptr;
@ -390,18 +524,12 @@ class D3D12CommandProcessor : public CommandProcessor {
// Currently bound graphics root signature.
ID3D12RootSignature* current_graphics_root_signature_;
// Extra parameters which may or may not be present.
RootExtraParameterIndices current_graphics_root_extras_;
RootBindfulExtraParameterIndices current_graphics_root_bindful_extras_;
// Whether root parameters are up to date - reset if a new signature is bound.
uint32_t current_graphics_root_up_to_date_;
// Currently bound descriptor heaps - update by RequestViewDescriptors and
// RequestSamplerDescriptors.
ID3D12DescriptorHeap* current_view_heap_;
ID3D12DescriptorHeap* current_sampler_heap_;
// System shader constants.
DxbcShaderTranslator::SystemConstants system_constants_;
ColorRenderTargetFormat system_constants_color_formats_[4];
// Float constant usage masks of the last draw call.
uint64_t current_float_constant_map_vertex_[4];
@ -409,45 +537,48 @@ class D3D12CommandProcessor : public CommandProcessor {
// Constant buffer bindings.
struct ConstantBufferBinding {
D3D12_GPU_VIRTUAL_ADDRESS buffer_address;
D3D12_GPU_VIRTUAL_ADDRESS address;
bool up_to_date;
};
ConstantBufferBinding cbuffer_bindings_system_;
ConstantBufferBinding cbuffer_bindings_float_vertex_;
ConstantBufferBinding cbuffer_bindings_float_pixel_;
ConstantBufferBinding cbuffer_bindings_bool_loop_;
ConstantBufferBinding cbuffer_bindings_fetch_;
ConstantBufferBinding cbuffer_binding_system_;
ConstantBufferBinding cbuffer_binding_float_vertex_;
ConstantBufferBinding cbuffer_binding_float_pixel_;
ConstantBufferBinding cbuffer_binding_bool_loop_;
ConstantBufferBinding cbuffer_binding_fetch_;
ConstantBufferBinding cbuffer_binding_descriptor_indices_vertex_;
ConstantBufferBinding cbuffer_binding_descriptor_indices_pixel_;
// Pages with the descriptors currently used for handling Xenos draw calls.
uint64_t draw_view_heap_index_;
uint64_t draw_sampler_heap_index_;
uint64_t draw_view_bindful_heap_index_;
uint64_t draw_sampler_bindful_heap_index_;
// Whether the last used texture bindings have been written to the current
// view descriptor heap.
bool texture_bindings_written_vertex_;
bool texture_bindings_written_pixel_;
// Hashes of the last texture bindings written to the current view descriptor
// heap with the last used descriptor layout. Valid only when the
// corresponding "written" variables are true.
uint64_t current_texture_bindings_hash_vertex_;
uint64_t current_texture_bindings_hash_pixel_;
// Whether the last used texture sampler bindings have been written to the
// current view descriptor heap.
bool bindful_textures_written_vertex_;
bool bindful_textures_written_pixel_;
bool bindful_samplers_written_vertex_;
bool bindful_samplers_written_pixel_;
// Layout UIDs and last texture and sampler bindings written to the current
// descriptor heaps (for bindful) or descriptor index constant buffer (for
// bindless) with the last used descriptor layout. Valid only when:
// - For bindful, when bindful_#_written_#_ is true.
// - For bindless, when cbuffer_binding_descriptor_indices_#_.up_to_date is
// true.
size_t current_texture_layout_uid_vertex_;
size_t current_texture_layout_uid_pixel_;
size_t current_sampler_layout_uid_vertex_;
size_t current_sampler_layout_uid_pixel_;
// Size of these should be ignored when checking whether these are up to date,
// layout UID should be checked first (they will be different for different
// binding counts).
std::vector<TextureCache::TextureSRVKey> current_texture_srv_keys_vertex_;
std::vector<TextureCache::TextureSRVKey> current_texture_srv_keys_pixel_;
std::vector<TextureCache::SamplerParameters> current_samplers_vertex_;
std::vector<TextureCache::SamplerParameters> current_samplers_pixel_;
std::vector<uint32_t> current_sampler_bindless_indices_vertex_;
std::vector<uint32_t> current_sampler_bindless_indices_pixel_;
// Whether the last used samplers have been written to the current sampler
// descriptor heap.
bool samplers_written_vertex_;
bool samplers_written_pixel_;
// Hashes of the last sampler parameters written to the current sampler
// descriptor heap with the last used descriptor layout. Valid only when the
// corresponding "written" variables are true.
uint64_t current_samplers_hash_vertex_;
uint64_t current_samplers_hash_pixel_;
// Latest descriptor handles used for handling Xenos draw calls.
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_system_constants_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_vertex_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_pixel_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_bool_loop_constants_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_fetch_constants_;
// Latest bindful descriptor handles used for handling Xenos draw calls.
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_edram_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_vertex_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_pixel_;

View File

@ -9,6 +9,8 @@
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/gpu/gpu_flags.h"
@ -18,8 +20,8 @@ namespace xe {
namespace gpu {
namespace d3d12 {
constexpr uint32_t D3D12Shader::kMaxTextureSRVIndexBits;
constexpr uint32_t D3D12Shader::kMaxTextureSRVs;
constexpr uint32_t D3D12Shader::kMaxTextureBindingIndexBits;
constexpr uint32_t D3D12Shader::kMaxTextureBindings;
constexpr uint32_t D3D12Shader::kMaxSamplerBindingIndexBits;
constexpr uint32_t D3D12Shader::kMaxSamplerBindings;
@ -28,34 +30,40 @@ D3D12Shader::D3D12Shader(ShaderType shader_type, uint64_t data_hash,
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
void D3D12Shader::SetTexturesAndSamplers(
const DxbcShaderTranslator::TextureSRV* texture_srvs,
uint32_t texture_srv_count,
const DxbcShaderTranslator::TextureBinding* texture_bindings,
uint32_t texture_binding_count,
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
uint32_t sampler_binding_count) {
texture_srvs_.clear();
texture_srvs_.reserve(texture_srv_count);
texture_bindings_.clear();
texture_bindings_.reserve(texture_binding_count);
used_texture_mask_ = 0;
for (uint32_t i = 0; i < texture_srv_count; ++i) {
TextureSRV srv;
const DxbcShaderTranslator::TextureSRV& translator_srv = texture_srvs[i];
srv.fetch_constant = translator_srv.fetch_constant;
srv.dimension = translator_srv.dimension;
srv.is_signed = translator_srv.is_signed;
texture_srvs_.push_back(srv);
used_texture_mask_ |= 1u << translator_srv.fetch_constant;
for (uint32_t i = 0; i < texture_binding_count; ++i) {
TextureBinding& binding = texture_bindings_.emplace_back();
// For a stable hash.
std::memset(&binding, 0, sizeof(binding));
const DxbcShaderTranslator::TextureBinding& translator_binding =
texture_bindings[i];
binding.bindless_descriptor_index =
translator_binding.bindless_descriptor_index;
binding.fetch_constant = translator_binding.fetch_constant;
binding.dimension = translator_binding.dimension;
binding.is_signed = translator_binding.is_signed;
used_texture_mask_ |= 1u << translator_binding.fetch_constant;
}
sampler_bindings_.clear();
sampler_bindings_.reserve(sampler_binding_count);
for (uint32_t i = 0; i < sampler_binding_count; ++i) {
SamplerBinding sampler;
const DxbcShaderTranslator::SamplerBinding& translator_sampler =
SamplerBinding binding;
const DxbcShaderTranslator::SamplerBinding& translator_binding =
sampler_bindings[i];
sampler.fetch_constant = translator_sampler.fetch_constant;
sampler.mag_filter = translator_sampler.mag_filter;
sampler.min_filter = translator_sampler.min_filter;
sampler.mip_filter = translator_sampler.mip_filter;
sampler.aniso_filter = translator_sampler.aniso_filter;
sampler_bindings_.push_back(sampler);
binding.bindless_descriptor_index =
translator_binding.bindless_descriptor_index;
binding.fetch_constant = translator_binding.fetch_constant;
binding.mag_filter = translator_binding.mag_filter;
binding.min_filter = translator_binding.min_filter;
binding.mip_filter = translator_binding.mip_filter;
binding.aniso_filter = translator_binding.aniso_filter;
sampler_bindings_.push_back(binding);
}
}

View File

@ -26,8 +26,8 @@ class D3D12Shader : public Shader {
const uint32_t* dword_ptr, uint32_t dword_count);
void SetTexturesAndSamplers(
const DxbcShaderTranslator::TextureSRV* texture_srvs,
uint32_t texture_srv_count,
const DxbcShaderTranslator::TextureBinding* texture_bindings,
uint32_t texture_binding_count,
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
uint32_t sampler_binding_count);
@ -44,18 +44,22 @@ class D3D12Shader : public Shader {
bool DisassembleDxbc(const ui::d3d12::D3D12Provider* provider);
static constexpr uint32_t kMaxTextureSRVIndexBits =
DxbcShaderTranslator::kMaxTextureSRVIndexBits;
static constexpr uint32_t kMaxTextureSRVs =
DxbcShaderTranslator::kMaxTextureSRVs;
struct TextureSRV {
static constexpr uint32_t kMaxTextureBindingIndexBits =
DxbcShaderTranslator::kMaxTextureBindingIndexBits;
static constexpr uint32_t kMaxTextureBindings =
DxbcShaderTranslator::kMaxTextureBindings;
struct TextureBinding {
uint32_t bindless_descriptor_index;
uint32_t fetch_constant;
// Stacked and 3D are separate TextureBindings, even for bindless for null
// descriptor handling simplicity.
TextureDimension dimension;
bool is_signed;
};
const TextureSRV* GetTextureSRVs(uint32_t& count_out) const {
count_out = uint32_t(texture_srvs_.size());
return texture_srvs_.data();
// Safe to hash and compare with memcmp for layout hashing.
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
count_out = uint32_t(texture_bindings_.size());
return texture_bindings_.data();
}
const uint32_t GetUsedTextureMask() const { return used_texture_mask_; }
@ -64,6 +68,7 @@ class D3D12Shader : public Shader {
static constexpr uint32_t kMaxSamplerBindings =
DxbcShaderTranslator::kMaxSamplerBindings;
struct SamplerBinding {
uint32_t bindless_descriptor_index;
uint32_t fetch_constant;
TextureFilter mag_filter;
TextureFilter min_filter;
@ -75,10 +80,29 @@ class D3D12Shader : public Shader {
return sampler_bindings_.data();
}
// For owning subsystems like the pipeline state cache, accessors for unique
// identifiers (used instead of hashes to make sure collisions can't happen)
// of binding layouts used by the shader, for invalidation if a shader with an
// incompatible layout was bound.
size_t GetTextureBindingLayoutUserUID() const {
return texture_binding_layout_user_uid_;
}
void SetTextureBindingLayoutUserUID(size_t uid) {
texture_binding_layout_user_uid_ = uid;
}
size_t GetSamplerBindingLayoutUserUID() const {
return sampler_binding_layout_user_uid_;
}
void SetSamplerBindingLayoutUserUID(size_t uid) {
sampler_binding_layout_user_uid_ = uid;
}
private:
std::vector<TextureSRV> texture_srvs_;
uint32_t used_texture_mask_ = 0;
std::vector<TextureBinding> texture_bindings_;
std::vector<SamplerBinding> sampler_bindings_;
size_t texture_binding_layout_user_uid_ = 0;
size_t sampler_binding_layout_user_uid_ = 0;
uint32_t used_texture_mask_ = 0;
std::vector<uint8_t> forced_early_z_shader_;
};

View File

@ -61,19 +61,22 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.h"
constexpr size_t PipelineCache::kLayoutUIDEmpty;
constexpr uint32_t PipelineCache::PipelineDescription::kVersion;
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file, bool edram_rov_used,
RegisterFile* register_file,
bool bindless_resources_used, bool edram_rov_used,
uint32_t resolution_scale)
: command_processor_(command_processor),
register_file_(register_file),
bindless_resources_used_(bindless_resources_used),
edram_rov_used_(edram_rov_used),
resolution_scale_(resolution_scale) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
shader_translator_ = std::make_unique<DxbcShaderTranslator>(
provider->GetAdapterVendorID(), edram_rov_used_,
provider->GetAdapterVendorID(), bindless_resources_used_, edram_rov_used_,
provider->GetGraphicsAnalysis() != nullptr);
if (edram_rov_used_) {
@ -178,6 +181,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0);
// Destroy all shaders.
command_processor_->NotifyShaderBindingsLayoutUIDsInvalidated();
if (bindless_resources_used_) {
bindless_sampler_layout_map_.clear();
bindless_sampler_layouts_.clear();
}
texture_binding_layout_map_.clear();
texture_binding_layouts_.clear();
for (auto it : shader_map_) {
delete it.second;
}
@ -264,8 +274,8 @@ void PipelineCache::InitializeShaderStorage(
auto shader_translation_thread_function = [&]() {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
DxbcShaderTranslator translator(
provider->GetAdapterVendorID(), edram_rov_used_,
provider->GetGraphicsAnalysis() != nullptr);
provider->GetAdapterVendorID(), bindless_resources_used_,
edram_rov_used_, provider->GetGraphicsAnalysis() != nullptr);
for (;;) {
std::pair<ShaderStoredHeader, D3D12Shader*> shader_to_translate;
for (;;) {
@ -287,11 +297,11 @@ void PipelineCache::InitializeShaderStorage(
translator, shader_to_translate.second,
shader_to_translate.first.sq_program_cntl,
shader_to_translate.first.host_vertex_shader_type)) {
std::unique_lock<std::mutex> lock(shaders_failed_to_translate_mutex);
std::lock_guard<std::mutex> lock(shaders_failed_to_translate_mutex);
shaders_failed_to_translate.push_back(shader_to_translate.second);
}
{
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
--shader_translation_threads_busy;
}
}
@ -340,7 +350,7 @@ void PipelineCache::InitializeShaderStorage(
// one.
size_t shader_translation_threads_needed;
{
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
shader_translation_threads_needed =
std::min(shader_translation_threads_busy +
shaders_to_translate.size() + size_t(1),
@ -353,7 +363,7 @@ void PipelineCache::InitializeShaderStorage(
shader_translation_threads.back()->set_name("Shader Translation");
}
{
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
shaders_to_translate.emplace_back(shader_header, shader);
}
shaders_translation_thread_cond.notify_one();
@ -362,7 +372,7 @@ void PipelineCache::InitializeShaderStorage(
}
if (!shader_translation_threads.empty()) {
{
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
shader_translation_threads_shutdown = true;
}
shaders_translation_thread_cond.notify_all();
@ -662,7 +672,7 @@ void PipelineCache::EndSubmission() {
if (shader_storage_file_flush_needed_ ||
pipeline_state_storage_file_flush_needed_) {
{
std::unique_lock<std::mutex> lock(storage_write_request_lock_);
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
if (shader_storage_file_flush_needed_) {
storage_write_flush_shaders_ = true;
}
@ -955,16 +965,6 @@ bool PipelineCache::TranslateShader(
return false;
}
uint32_t texture_srv_count;
const DxbcShaderTranslator::TextureSRV* texture_srvs =
translator.GetTextureSRVs(texture_srv_count);
uint32_t sampler_binding_count;
const DxbcShaderTranslator::SamplerBinding* sampler_bindings =
translator.GetSamplerBindings(sampler_binding_count);
shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count,
sampler_bindings, sampler_binding_count);
if (shader->is_valid()) {
const char* host_shader_type;
if (shader->type() == ShaderType::kVertex) {
switch (shader->host_vertex_shader_type()) {
@ -992,10 +992,138 @@ bool PipelineCache::TranslateShader(
} else {
host_shader_type = "pixel";
}
XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n",
host_shader_type, shader->ucode_dword_count() * 4,
shader->ucode_data_hash(), shader->ucode_disassembly().c_str());
XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n", host_shader_type,
shader->ucode_dword_count() * 4, shader->ucode_data_hash(),
shader->ucode_disassembly().c_str());
// Set up texture and sampler bindings.
uint32_t texture_binding_count;
const DxbcShaderTranslator::TextureBinding* translator_texture_bindings =
translator.GetTextureBindings(texture_binding_count);
uint32_t sampler_binding_count;
const DxbcShaderTranslator::SamplerBinding* sampler_bindings =
translator.GetSamplerBindings(sampler_binding_count);
shader->SetTexturesAndSamplers(translator_texture_bindings,
texture_binding_count, sampler_bindings,
sampler_binding_count);
assert_false(bindless_resources_used_ &&
texture_binding_count + sampler_binding_count >
D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4);
// Get hashable texture bindings, without translator-specific info.
const D3D12Shader::TextureBinding* texture_bindings =
shader->GetTextureBindings(texture_binding_count);
size_t texture_binding_layout_bytes =
texture_binding_count * sizeof(*texture_bindings);
uint64_t texture_binding_layout_hash = 0;
if (texture_binding_count) {
texture_binding_layout_hash =
XXH64(texture_bindings, texture_binding_layout_bytes, 0);
}
uint32_t bindless_sampler_count =
bindless_resources_used_ ? sampler_binding_count : 0;
uint64_t bindless_sampler_layout_hash = 0;
if (bindless_sampler_count) {
XXH64_state_t hash_state;
XXH64_reset(&hash_state, 0);
for (uint32_t i = 0; i < bindless_sampler_count; ++i) {
XXH64_update(&hash_state, &sampler_bindings[i].bindless_descriptor_index,
sizeof(sampler_bindings[i].bindless_descriptor_index));
}
bindless_sampler_layout_hash = XXH64_digest(&hash_state);
}
// Obtain the unique IDs of binding layouts if there are any texture bindings
// or bindless samplers, for invalidation in the command processor.
size_t texture_binding_layout_uid = kLayoutUIDEmpty;
// Use sampler count for the bindful case because it's the only thing that
// must be the same for layouts to be compatible in this case
// (instruction-specified parameters are used as overrides for actual
// samplers).
static_assert(
kLayoutUIDEmpty == 0,
"Empty layout UID is assumed to be 0 because for bindful samplers, the "
"UID is their count");
size_t sampler_binding_layout_uid = bindless_resources_used_
? kLayoutUIDEmpty
: size_t(sampler_binding_count);
if (texture_binding_count || bindless_sampler_count) {
std::lock_guard<std::mutex> layouts_mutex_(layouts_mutex_);
if (texture_binding_count) {
auto found_range =
texture_binding_layout_map_.equal_range(texture_binding_layout_hash);
for (auto it = found_range.first; it != found_range.second; ++it) {
if (it->second.vector_span_length == texture_binding_count &&
!std::memcmp(
texture_binding_layouts_.data() + it->second.vector_span_offset,
texture_bindings, texture_binding_layout_bytes)) {
texture_binding_layout_uid = it->second.uid;
break;
}
}
if (texture_binding_layout_uid == kLayoutUIDEmpty) {
static_assert(
kLayoutUIDEmpty == 0,
"Layout UID is size + 1 because it's assumed that 0 is the UID for "
"an empty layout");
texture_binding_layout_uid = texture_binding_layout_map_.size() + 1;
LayoutUID new_uid;
new_uid.uid = texture_binding_layout_uid;
new_uid.vector_span_offset = texture_binding_layouts_.size();
new_uid.vector_span_length = texture_binding_count;
texture_binding_layouts_.resize(new_uid.vector_span_offset +
texture_binding_count);
std::memcpy(
texture_binding_layouts_.data() + new_uid.vector_span_offset,
texture_bindings, texture_binding_layout_bytes);
texture_binding_layout_map_.insert(
{texture_binding_layout_hash, new_uid});
}
}
if (bindless_sampler_count) {
auto found_range =
bindless_sampler_layout_map_.equal_range(sampler_binding_layout_uid);
for (auto it = found_range.first; it != found_range.second; ++it) {
if (it->second.vector_span_length != bindless_sampler_count) {
continue;
}
sampler_binding_layout_uid = it->second.uid;
const uint32_t* vector_bindless_sampler_layout =
bindless_sampler_layouts_.data() + it->second.vector_span_offset;
for (uint32_t i = 0; i < bindless_sampler_count; ++i) {
if (vector_bindless_sampler_layout[i] !=
sampler_bindings[i].bindless_descriptor_index) {
sampler_binding_layout_uid = kLayoutUIDEmpty;
break;
}
}
if (sampler_binding_layout_uid != kLayoutUIDEmpty) {
break;
}
}
if (sampler_binding_layout_uid == kLayoutUIDEmpty) {
sampler_binding_layout_uid = bindless_sampler_layout_map_.size();
LayoutUID new_uid;
static_assert(
kLayoutUIDEmpty == 0,
"Layout UID is size + 1 because it's assumed that 0 is the UID for "
"an empty layout");
new_uid.uid = sampler_binding_layout_uid + 1;
new_uid.vector_span_offset = bindless_sampler_layouts_.size();
new_uid.vector_span_length = sampler_binding_count;
bindless_sampler_layouts_.resize(new_uid.vector_span_offset +
sampler_binding_count);
uint32_t* vector_bindless_sampler_layout =
bindless_sampler_layouts_.data() + new_uid.vector_span_offset;
for (uint32_t i = 0; i < bindless_sampler_count; ++i) {
vector_bindless_sampler_layout[i] =
sampler_bindings[i].bindless_descriptor_index;
}
bindless_sampler_layout_map_.insert(
{bindless_sampler_layout_hash, new_uid});
}
}
}
shader->SetTextureBindingLayoutUserUID(texture_binding_layout_uid);
shader->SetSamplerBindingLayoutUserUID(sampler_binding_layout_uid);
// Create a version of the shader with early depth/stencil forced by Xenia
// itself when it's safe to do so or when EARLY_Z_ENABLE is set in
@ -1856,7 +1984,7 @@ void PipelineCache::CreationThread(size_t thread_index) {
// set the completion event if needed (at the next iteration, or in some
// other thread).
{
std::unique_lock<std::mutex> lock(creation_request_lock_);
std::lock_guard<std::mutex> lock(creation_request_lock_);
--creation_threads_busy_;
}
}
@ -1867,7 +1995,7 @@ void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() {
while (true) {
PipelineState* pipeline_state_to_create;
{
std::unique_lock<std::mutex> lock(creation_request_lock_);
std::lock_guard<std::mutex> lock(creation_request_lock_);
if (creation_queue_.empty()) {
break;
}

View File

@ -21,6 +21,7 @@
#include <utility>
#include <vector>
#include "xenia/base/hash.h"
#include "xenia/base/platform.h"
#include "xenia/base/threading.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
@ -37,9 +38,11 @@ class D3D12CommandProcessor;
class PipelineCache {
public:
static constexpr size_t kLayoutUIDEmpty = 0;
PipelineCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file, bool edram_rov_used,
uint32_t resolution_scale);
RegisterFile* register_file, bool bindless_resources_used,
bool edram_rov_used, uint32_t resolution_scale);
~PipelineCache();
bool Initialize();
@ -217,6 +220,7 @@ class PipelineCache {
PipelineDescription description;
};
// Can be called from multiple threads.
bool TranslateShader(DxbcShaderTranslator& translator, D3D12Shader* shader,
reg::SQ_PROGRAM_CNTL cntl,
Shader::HostVertexShaderType host_vertex_shader_type =
@ -233,13 +237,37 @@ class PipelineCache {
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
bool bindless_resources_used_;
bool edram_rov_used_;
uint32_t resolution_scale_;
// Reusable shader translator.
std::unique_ptr<DxbcShaderTranslator> shader_translator_ = nullptr;
// All loaded shaders mapped by their guest hash key.
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
shader_map_;
struct LayoutUID {
size_t uid;
size_t vector_span_offset;
size_t vector_span_length;
};
std::mutex layouts_mutex_;
// Texture binding layouts of different shaders, for obtaining layout UIDs.
std::vector<D3D12Shader::TextureBinding> texture_binding_layouts_;
// Map of texture binding layouts used by shaders, for obtaining UIDs. Keys
// are XXH64 hashes of layouts, values need manual collision resolution using
// layout_vector_offset:layout_length of texture_binding_layouts_.
std::unordered_multimap<uint64_t, LayoutUID,
xe::hash::IdentityHasher<uint64_t>>
texture_binding_layout_map_;
// Bindless sampler indices of different shaders, for obtaining layout UIDs.
// For bindful, sampler count is used as the UID instead.
std::vector<uint32_t> bindless_sampler_layouts_;
// Keys are XXH64 hashes of used bindless sampler indices.
std::unordered_multimap<uint64_t, LayoutUID,
xe::hash::IdentityHasher<uint64_t>>
bindless_sampler_layout_map_;
// Empty depth-only pixel shader for writing to depth buffer via ROV when no
// Xenos pixel shader provided.
@ -252,7 +280,9 @@ class PipelineCache {
};
// All previously generated pipeline state objects identified by hash and the
// description.
std::unordered_multimap<uint64_t, PipelineState*> pipeline_states_;
std::unordered_multimap<uint64_t, PipelineState*,
xe::hash::IdentityHasher<uint64_t>>
pipeline_states_;
// Previously used pipeline state object. This matches our current state
// settings and allows us to quickly(ish) reuse the pipeline state if no

View File

@ -102,10 +102,12 @@ const RenderTargetCache::EDRAMLoadStoreModeInfo
RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file,
TraceWriter* trace_writer,
bool bindless_resources_used,
bool edram_rov_used)
: command_processor_(command_processor),
register_file_(register_file),
trace_writer_(trace_writer),
bindless_resources_used_(bindless_resources_used),
edram_rov_used_(edram_rov_used) {}
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
@ -181,10 +183,10 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
edram_buffer_, nullptr, &edram_buffer_uint32_uav_desc,
provider->OffsetViewDescriptor(
edram_buffer_descriptor_heap_start_,
uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)));
uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV)));
// Create the root signature for EDRAM buffer load/store.
D3D12_ROOT_PARAMETER load_store_root_parameters[2];
D3D12_ROOT_PARAMETER load_store_root_parameters[3];
// Parameter 0 is constants (changed for each render target binding).
load_store_root_parameters[0].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
@ -193,24 +195,32 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
load_store_root_parameters[0].Constants.Num32BitValues =
sizeof(EDRAMLoadStoreRootConstants) / sizeof(uint32_t);
load_store_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// Parameter 1 is source and target.
D3D12_DESCRIPTOR_RANGE load_store_root_ranges[2];
load_store_root_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
load_store_root_ranges[0].NumDescriptors = 1;
load_store_root_ranges[0].BaseShaderRegister = 0;
load_store_root_ranges[0].RegisterSpace = 0;
load_store_root_ranges[0].OffsetInDescriptorsFromTableStart = 0;
load_store_root_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
load_store_root_ranges[1].NumDescriptors = 1;
load_store_root_ranges[1].BaseShaderRegister = 0;
load_store_root_ranges[1].RegisterSpace = 0;
load_store_root_ranges[1].OffsetInDescriptorsFromTableStart = 1;
// Parameter 1 is the destination.
D3D12_DESCRIPTOR_RANGE load_store_root_dest_range;
load_store_root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
load_store_root_dest_range.NumDescriptors = 1;
load_store_root_dest_range.BaseShaderRegister = 0;
load_store_root_dest_range.RegisterSpace = 0;
load_store_root_dest_range.OffsetInDescriptorsFromTableStart = 0;
load_store_root_parameters[1].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
load_store_root_parameters[1].DescriptorTable.pDescriptorRanges =
load_store_root_ranges;
&load_store_root_dest_range;
load_store_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// Parameter 2 is the source.
D3D12_DESCRIPTOR_RANGE load_store_root_source_range;
load_store_root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
load_store_root_source_range.NumDescriptors = 1;
load_store_root_source_range.BaseShaderRegister = 0;
load_store_root_source_range.RegisterSpace = 0;
load_store_root_source_range.OffsetInDescriptorsFromTableStart = 0;
load_store_root_parameters[2].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
load_store_root_parameters[2].DescriptorTable.NumDescriptorRanges = 1;
load_store_root_parameters[2].DescriptorTable.pDescriptorRanges =
&load_store_root_source_range;
load_store_root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
D3D12_ROOT_SIGNATURE_DESC load_store_root_desc;
load_store_root_desc.NumParameters =
UINT(xe::countof(load_store_root_parameters));
@ -226,10 +236,8 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
Shutdown();
return false;
}
// Create the clear root signature (the same, but with the UAV only).
load_store_root_ranges[1].OffsetInDescriptorsFromTableStart = 0;
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
++load_store_root_parameters[1].DescriptorTable.pDescriptorRanges;
// Create the clear root signature (the same, but with the destination only).
load_store_root_desc.NumParameters = 2;
edram_clear_root_signature_ =
ui::d3d12::util::CreateRootSignature(provider, load_store_root_desc);
if (edram_clear_root_signature_ == nullptr) {
@ -1359,8 +1367,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// bilinear filtering), applying exponent bias and swapping red and blue in
// a format-agnostic way, then the resulting color is written to a temporary
// RTV of the destination format.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
uint32_t resolution_scale_log2 = resolution_scale_2x_ ? 1 : 0;
// Check if we need to apply the hack to remove the gap on the left and top
// sides of the screen caused by half-pixel offset becoming whole pixel offset
@ -1423,33 +1431,50 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
}
// Write the source and destination descriptors.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2,
descriptor_cpu_start, descriptor_gpu_start) ==
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_dest;
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_source;
if (bindless_resources_used_) {
if (resolution_scale_2x_) {
if (!command_processor_->RequestOneUseSingleViewDescriptors(
1, &descriptor_dest)) {
return false;
}
} else {
descriptor_dest = command_processor_->GetSystemBindlessViewHandlePair(
D3D12CommandProcessor::SystemBindlessView::kSharedMemoryRawUAV);
}
descriptor_source = command_processor_->GetSystemBindlessViewHandlePair(
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV);
} else {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
if (!command_processor_->RequestOneUseSingleViewDescriptors(
2, descriptors)) {
return false;
}
descriptor_dest = descriptors[0];
if (!resolution_scale_2x_) {
shared_memory->WriteRawUAVDescriptor(descriptor_dest.first);
}
descriptor_source = descriptors[1];
WriteEDRAMRawSRVDescriptor(descriptor_source.first);
}
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
WriteEDRAMRawSRVDescriptor(descriptor_cpu_start);
if (resolution_scale_2x_) {
texture_cache->UseScaledResolveBufferForWriting();
// Can't address more than 512 MB directly on Nvidia - binding only a part
// of the buffer.
texture_cache->CreateScaledResolveBufferRawUAV(
provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
dest_address >> 12,
descriptor_dest.first, dest_address >> 12,
((dest_address + dest_size - 1) >> 12) - (dest_address >> 12) + 1);
} else {
shared_memory->UseForWriting();
shared_memory->WriteRawUAVDescriptor(
provider->OffsetViewDescriptor(descriptor_cpu_start, 1));
// Descriptor already written.
}
command_processor_->SubmitBarriers();
// Dispatch the computation.
command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
command_list->D3DSetComputeRootDescriptorTable(2, descriptor_source.second);
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_dest.second);
EDRAMLoadStoreRootConstants root_constants;
// Address is adjusted to the first modified tile, so using & 31 as the
// destination offset.
@ -1488,10 +1513,11 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
}
command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_processor_->SetComputePipeline(
src_64bpp ? edram_tile_sample_64bpp_pipeline_
: edram_tile_sample_32bpp_pipeline_);
command_processor_->SubmitBarriers();
// 1 group per destination 80x16 region.
uint32_t group_count_x = row_width_ss_div_80, group_count_y = rows;
if (msaa_samples >= MsaaSamples::k2X) {
@ -1572,15 +1598,31 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
if (resolve_target == nullptr) {
return false;
}
// Descriptors. 2 for EDRAM load, 1 for conversion.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 3, 3,
descriptor_cpu_start, descriptor_gpu_start) ==
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
// Descriptors.
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_copy_buffer;
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_rt;
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram;
if (bindless_resources_used_) {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
if (!command_processor_->RequestOneUseSingleViewDescriptors(
2, descriptors)) {
return false;
}
descriptor_copy_buffer = descriptors[0];
descriptor_rt = descriptors[1];
descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair(
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV);
} else {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[3];
if (!command_processor_->RequestOneUseSingleViewDescriptors(
3, descriptors)) {
return false;
}
descriptor_copy_buffer = descriptors[0];
descriptor_rt = descriptors[1];
descriptor_edram = descriptors[2];
WriteEDRAMRawSRVDescriptor(descriptor_edram.first);
}
// Buffer for copying.
D3D12_RESOURCE_STATES copy_buffer_state =
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
@ -1616,11 +1658,12 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
0, sizeof(load_root_constants) / sizeof(uint32_t), &load_root_constants,
0);
WriteEDRAMRawSRVDescriptor(descriptor_cpu_start);
ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
copy_buffer, render_target->copy_buffer_size);
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_list->D3DSetComputeRootDescriptorTable(2, descriptor_edram.second);
ui::d3d12::util::CreateRawBufferUAV(device, descriptor_copy_buffer.first,
copy_buffer,
render_target->copy_buffer_size);
command_list->D3DSetComputeRootDescriptorTable(
1, descriptor_copy_buffer.second);
EDRAMLoadStoreMode mode = GetLoadStoreMode(false, src_format);
command_processor_->SetComputePipeline(
@ -1630,13 +1673,6 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
command_list->D3DDispatch(row_width_ss_div_80, rows, 1);
command_processor_->PushUAVBarrier(copy_buffer);
// Go to the next descriptor set.
descriptor_cpu_start =
provider->OffsetViewDescriptor(descriptor_cpu_start, 2);
descriptor_gpu_start =
provider->OffsetViewDescriptor(descriptor_gpu_start, 2);
// Copy the EDRAM buffer contents to the source texture.
#if 0
@ -1770,8 +1806,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
rt_srv_desc.Texture2D.PlaneSlice = 0;
rt_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f;
device->CreateShaderResourceView(render_target->resource, &rt_srv_desc,
descriptor_cpu_start);
command_list->D3DSetGraphicsRootDescriptorTable(1, descriptor_gpu_start);
descriptor_rt.first);
command_list->D3DSetGraphicsRootDescriptorTable(1, descriptor_rt.second);
command_processor_->SubmitBarriers();
command_processor_->SetSamplePositions(MsaaSamples::k1X);
@ -1878,18 +1914,18 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0;
uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0;
// Get everything needed for clearing.
auto command_list = command_processor_->GetDeferredCommandList();
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1,
descriptor_cpu_start, descriptor_gpu_start) ==
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
// Get transient data needed for clearing.
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram;
if (bindless_resources_used_) {
descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair(
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawUAV);
} else {
if (!command_processor_->RequestOneUseSingleViewDescriptors(
1, &descriptor_edram)) {
return false;
}
WriteEDRAMRawUAVDescriptor(descriptor_edram.first);
}
// Submit the clear.
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
@ -1935,11 +1971,11 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
root_constants.clear_color_high = regs[reg].u32;
command_processor_->SetComputePipeline(edram_clear_32bpp_pipeline_);
}
auto command_list = command_processor_->GetDeferredCommandList();
command_list->D3DSetComputeRootSignature(edram_clear_root_signature_);
command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
WriteEDRAMRawUAVDescriptor(descriptor_cpu_start);
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_edram.second);
// 1 group per 80x16 samples. Resolution scale handled in the shader itself.
command_list->D3DDispatch(row_width_ss_div_80, rows, 1);
CommitEDRAMBufferUAVWrites(true);
@ -2150,7 +2186,7 @@ void RenderTargetCache::FlushAndUnbindRenderTargets() {
ClearBindings();
}
void RenderTargetCache::WriteEDRAMUint32UAVDescriptor(
void RenderTargetCache::WriteEDRAMR32UintUAVDescriptor(
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
@ -2158,7 +2194,31 @@ void RenderTargetCache::WriteEDRAMUint32UAVDescriptor(
1, handle,
provider->OffsetViewDescriptor(
edram_buffer_descriptor_heap_start_,
uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)),
uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV)),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
void RenderTargetCache::WriteEDRAMRawSRVDescriptor(
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
device->CopyDescriptorsSimple(
1, handle,
provider->OffsetViewDescriptor(
edram_buffer_descriptor_heap_start_,
uint32_t(EDRAMBufferDescriptorIndex::kRawSRV)),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
void RenderTargetCache::WriteEDRAMRawUAVDescriptor(
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
device->CopyDescriptorsSimple(
1, handle,
provider->OffsetViewDescriptor(
edram_buffer_descriptor_heap_start_,
uint32_t(EDRAMBufferDescriptorIndex::kRawUAV)),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
@ -2283,13 +2343,22 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
// Clear and ignore the old 32-bit float depth - the non-ROV path is
// inaccurate anyway, and this is backend-specific, not a part of a guest
// trace.
D3D12_CPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_cpu;
D3D12_GPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_gpu;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1,
shader_visbile_descriptor_cpu, shader_visbile_descriptor_gpu) !=
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
WriteEDRAMUint32UAVDescriptor(shader_visbile_descriptor_cpu);
bool edram_shader_visible_r32_uav_obtained;
ui::d3d12::util::DescriptorCPUGPUHandlePair edram_shader_visible_r32_uav;
if (bindless_resources_used_) {
edram_shader_visible_r32_uav_obtained = true;
edram_shader_visible_r32_uav =
command_processor_->GetSystemBindlessViewHandlePair(
D3D12CommandProcessor::SystemBindlessView::kEDRAMR32UintUAV);
} else {
edram_shader_visible_r32_uav_obtained =
command_processor_->RequestOneUseSingleViewDescriptors(
1, &edram_shader_visible_r32_uav);
if (edram_shader_visible_r32_uav_obtained) {
WriteEDRAMR32UintUAVDescriptor(edram_shader_visible_r32_uav.first);
}
}
if (edram_shader_visible_r32_uav_obtained) {
UINT clear_value[4] = {0, 0, 0, 0};
D3D12_RECT clear_rect;
clear_rect.left = kEDRAMSize >> 2;
@ -2301,13 +2370,11 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
// ClearUnorderedAccessView takes a shader-visible GPU descriptor and a
// non-shader-visible CPU descriptor.
command_list->D3DClearUnorderedAccessViewUint(
shader_visbile_descriptor_gpu,
edram_shader_visible_r32_uav.second,
provider->OffsetViewDescriptor(
edram_buffer_descriptor_heap_start_,
uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)),
uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV)),
edram_buffer_, clear_value, 1, &clear_rect);
} else {
XELOGE("Failed to get a UAV descriptor for invalidating 32-bit depth");
}
}
}
@ -2343,30 +2410,6 @@ void RenderTargetCache::CommitEDRAMBufferUAVWrites(bool force) {
edram_buffer_modified_ = false;
}
void RenderTargetCache::WriteEDRAMRawSRVDescriptor(
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
device->CopyDescriptorsSimple(
1, handle,
provider->OffsetViewDescriptor(
edram_buffer_descriptor_heap_start_,
uint32_t(EDRAMBufferDescriptorIndex::kRawSRV)),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
void RenderTargetCache::WriteEDRAMRawUAVDescriptor(
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
device->CopyDescriptorsSimple(
1, handle,
provider->OffsetViewDescriptor(
edram_buffer_descriptor_heap_start_,
uint32_t(EDRAMBufferDescriptorIndex::kRawUAV)),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
void RenderTargetCache::ClearBindings() {
current_surface_pitch_ = 0;
current_msaa_samples_ = MsaaSamples::k1X;
@ -2710,14 +2753,25 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
}
// Allocate descriptors for the buffers.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2,
descriptor_cpu_start, descriptor_gpu_start) ==
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram;
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_source;
if (bindless_resources_used_) {
if (!command_processor_->RequestOneUseSingleViewDescriptors(
1, &descriptor_source)) {
return;
}
descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair(
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawUAV);
} else {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
if (!command_processor_->RequestOneUseSingleViewDescriptors(2,
descriptors)) {
return;
}
descriptor_edram = descriptors[0];
WriteEDRAMRawUAVDescriptor(descriptor_edram.first);
descriptor_source = descriptors[1];
}
// Get the buffer for copying.
D3D12_RESOURCE_STATES copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
@ -2740,14 +2794,13 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
// Set up the bindings.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, copy_buffer,
copy_buffer_size);
WriteEDRAMRawUAVDescriptor(
provider->OffsetViewDescriptor(descriptor_cpu_start, 1));
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_source.first,
copy_buffer, copy_buffer_size);
command_list->D3DSetComputeRootDescriptorTable(2, descriptor_source.second);
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_edram.second);
// Sort the bindings in ascending order of EDRAM base so data in the render
// targets placed farther in EDRAM isn't lost in case of overlap.
@ -2857,14 +2910,24 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
auto command_list = command_processor_->GetDeferredCommandList();
// Allocate descriptors for the buffers.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2,
descriptor_cpu_start, descriptor_gpu_start) ==
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_dest, descriptor_edram;
if (bindless_resources_used_) {
if (!command_processor_->RequestOneUseSingleViewDescriptors(
1, &descriptor_dest)) {
return;
}
descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair(
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV);
} else {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
if (!command_processor_->RequestOneUseSingleViewDescriptors(2,
descriptors)) {
return;
}
descriptor_dest = descriptors[0];
descriptor_edram = descriptors[1];
WriteEDRAMRawSRVDescriptor(descriptor_edram.first);
}
// Get the buffer for copying.
uint32_t copy_buffer_size = 0;
@ -2892,14 +2955,13 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
// Set up the bindings.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
WriteEDRAMRawSRVDescriptor(descriptor_cpu_start);
ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
command_list->D3DSetComputeRootDescriptorTable(2, descriptor_edram.second);
ui::d3d12::util::CreateRawBufferUAV(device, descriptor_dest.first,
copy_buffer, copy_buffer_size);
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_dest.second);
// Load each render target.
for (uint32_t i = 0; i < render_target_count; ++i) {

View File

@ -251,7 +251,7 @@ class RenderTargetCache {
RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file, TraceWriter* trace_writer,
bool edram_rov_used);
bool bindless_resources_used, bool edram_rov_used);
~RenderTargetCache();
bool Initialize(const TextureCache* texture_cache);
@ -284,7 +284,9 @@ class RenderTargetCache {
// the command processor takes over framebuffer bindings to draw something
// special. May change the CBV/SRV/UAV descriptor heap.
void FlushAndUnbindRenderTargets();
void WriteEDRAMUint32UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void WriteEDRAMR32UintUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void WriteEDRAMRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void WriteEDRAMRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
// Totally necessary to rely on the base format - Too Human switches between
// 2_10_10_10_FLOAT and 2_10_10_10_FLOAT_AS_16_16_16_16 every draw.
@ -436,9 +438,6 @@ class RenderTargetCache {
void TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state);
void CommitEDRAMBufferUAVWrites(bool force);
void WriteEDRAMRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void WriteEDRAMRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void ClearBindings();
#if 0
@ -518,6 +517,7 @@ class RenderTargetCache {
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
TraceWriter* trace_writer_;
bool bindless_resources_used_;
bool edram_rov_used_;
// Whether 1 guest pixel is rendered as 2x2 host pixels (currently only
@ -538,7 +538,7 @@ class RenderTargetCache {
kRawSRV,
kRawUAV,
// For ROV access primarily.
kUint32UAV,
kR32UintUAV,
kCount,
};

View File

@ -47,10 +47,10 @@ cbuffer XeEDRAMLoadStoreConstants : register(b0) {
#define xe_edram_clear_depth24 (xe_edram_load_store_constants.z)
#define xe_edram_clear_depth32 (xe_edram_load_store_constants.w)
RWByteAddressBuffer xe_edram_load_store_dest : register(u0);
#ifndef XE_EDRAM_WRITE_ONLY
ByteAddressBuffer xe_edram_load_store_source : register(t0);
#endif
RWByteAddressBuffer xe_edram_load_store_dest : register(u0);
uint2 XeEDRAMSampleCountLog2() {
return (xe_edram_base_samples_2x_depth_pitch >> uint2(12u, 11u)) & 1u;

View File

@ -27,8 +27,8 @@ cbuffer XeTextureLoadConstants : register(b0) {
#define XeTextureLoadGuestPitchTiled 0xFFFFFFFFu
ByteAddressBuffer xe_texture_load_source : register(t0);
RWByteAddressBuffer xe_texture_load_dest : register(u0);
ByteAddressBuffer xe_texture_load_source : register(t0);
// bpb and bpb_log2 are separate because bpb may be not a power of 2 (like 96).
uint4 XeTextureLoadGuestBlockOffsets(uint3 block_index, uint bpb,

View File

@ -494,7 +494,8 @@ bool SharedMemory::AreTiledResourcesUsed() const {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
// As of October 8th, 2018, PIX doesn't support tiled buffers.
// FIXME(Triang3l): Re-enable tiled resources with PIX once fixed.
return provider->GetTiledResourcesTier() >= 1 &&
return provider->GetTiledResourcesTier() !=
D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED &&
provider->GetGraphicsAnalysis() == nullptr;
}

View File

@ -12,6 +12,7 @@
#include "third_party/xxhash/xxhash.h"
#include <algorithm>
#include <cfloat>
#include <cstring>
#include "xenia/base/assert.h"
@ -92,7 +93,6 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_r10g11b11_rgba16_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_r11g11b10_rgba16_cs.h"
constexpr uint32_t TextureCache::Texture::kCachedSRVDescriptorSwizzleMissing;
constexpr uint32_t TextureCache::SRVDescriptorCachePage::kHeapSize;
constexpr uint32_t TextureCache::LoadConstants::kGuestPitchTiled;
constexpr uint32_t TextureCache::kScaledResolveBufferSizeLog2;
@ -905,9 +905,11 @@ const TextureCache::ResolveTileModeInfo
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file,
bool bindless_resources_used,
SharedMemory* shared_memory)
: command_processor_(command_processor),
register_file_(register_file),
bindless_resources_used_(bindless_resources_used),
shared_memory_(shared_memory) {}
TextureCache::~TextureCache() { Shutdown(); }
@ -920,7 +922,8 @@ bool TextureCache::Initialize(bool edram_rov_used) {
// Not currently supported with the RTV/DSV output path for various reasons.
// As of November 27th, 2018, PIX doesn't support tiled buffers.
if (cvars::d3d12_resolution_scale >= 2 && edram_rov_used &&
provider->GetTiledResourcesTier() >= 1 &&
provider->GetTiledResourcesTier() !=
D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED &&
provider->GetGraphicsAnalysis() == nullptr &&
provider->GetVirtualAddressBitsPerResource() >=
kScaledResolveBufferSizeLog2) {
@ -947,28 +950,34 @@ bool TextureCache::Initialize(bool edram_rov_used) {
scaled_resolve_heap_count_ = 0;
// Create the loading root signature.
D3D12_ROOT_PARAMETER root_parameters[2];
// Parameter 0 is constants (changed very often when untiling).
D3D12_ROOT_PARAMETER root_parameters[3];
// Parameter 0 is constants (changed multiple times when untiling).
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
root_parameters[0].Descriptor.ShaderRegister = 0;
root_parameters[0].Descriptor.RegisterSpace = 0;
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// Parameter 1 is source and target.
D3D12_DESCRIPTOR_RANGE root_copy_ranges[2];
root_copy_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
root_copy_ranges[0].NumDescriptors = 1;
root_copy_ranges[0].BaseShaderRegister = 0;
root_copy_ranges[0].RegisterSpace = 0;
root_copy_ranges[0].OffsetInDescriptorsFromTableStart = 0;
root_copy_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
root_copy_ranges[1].NumDescriptors = 1;
root_copy_ranges[1].BaseShaderRegister = 0;
root_copy_ranges[1].RegisterSpace = 0;
root_copy_ranges[1].OffsetInDescriptorsFromTableStart = 1;
// Parameter 1 is the destination.
D3D12_DESCRIPTOR_RANGE root_dest_range;
root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
root_dest_range.NumDescriptors = 1;
root_dest_range.BaseShaderRegister = 0;
root_dest_range.RegisterSpace = 0;
root_dest_range.OffsetInDescriptorsFromTableStart = 0;
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
root_parameters[1].DescriptorTable.pDescriptorRanges = root_copy_ranges;
root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
root_parameters[1].DescriptorTable.pDescriptorRanges = &root_dest_range;
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// Parameter 2 is the source.
D3D12_DESCRIPTOR_RANGE root_source_range;
root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
root_source_range.NumDescriptors = 1;
root_source_range.BaseShaderRegister = 0;
root_source_range.RegisterSpace = 0;
root_source_range.OffsetInDescriptorsFromTableStart = 0;
root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[2].DescriptorTable.NumDescriptorRanges = 1;
root_parameters[2].DescriptorTable.pDescriptorRanges = &root_source_range;
root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
root_signature_desc.NumParameters = UINT(xe::countof(root_parameters));
root_signature_desc.pParameters = root_parameters;
@ -1033,6 +1042,8 @@ bool TextureCache::Initialize(bool edram_rov_used) {
}
}
srv_descriptor_cache_allocated_ = 0;
// Create a heap with null SRV descriptors, since it's faster to copy a
// descriptor than to create an SRV, and null descriptors are used a lot (for
// the signed version when only unsigned is used, for instance).
@ -1137,6 +1148,14 @@ void TextureCache::ClearCache() {
Texture* texture = texture_pair.second;
shared_memory_->UnwatchMemoryRange(texture->base_watch_handle);
shared_memory_->UnwatchMemoryRange(texture->mip_watch_handle);
// Bindful descriptor cache will be cleared entirely now, so only release
// bindless descriptors.
if (bindless_resources_used_) {
for (auto descriptor_pair : texture->srv_descriptors) {
command_processor_->ReleaseViewBindlessDescriptorImmediately(
descriptor_pair.second);
}
}
texture->resource->Release();
delete texture;
}
@ -1148,6 +1167,7 @@ void TextureCache::ClearCache() {
// Clear texture descriptor cache.
srv_descriptor_cache_free_.clear();
srv_descriptor_cache_allocated_ = 0;
for (auto& page : srv_descriptor_cache_) {
page.heap->Release();
}
@ -1155,7 +1175,7 @@ void TextureCache::ClearCache() {
}
void TextureCache::TextureFetchConstantWritten(uint32_t index) {
texture_keys_in_sync_ &= ~(1u << index);
texture_bindings_in_sync_ &= ~(1u << index);
}
void TextureCache::BeginFrame() {
@ -1214,12 +1234,18 @@ void TextureCache::BeginFrame() {
// Exclude the texture from the memory usage counter.
textures_total_size_ -= texture->resource_size;
// Destroy the texture.
if (texture->cached_srv_descriptor_swizzle !=
Texture::kCachedSRVDescriptorSwizzleMissing) {
srv_descriptor_cache_free_.push_back(texture->cached_srv_descriptor);
}
shared_memory_->UnwatchMemoryRange(texture->base_watch_handle);
shared_memory_->UnwatchMemoryRange(texture->mip_watch_handle);
if (bindless_resources_used_) {
for (auto descriptor_pair : texture->srv_descriptors) {
command_processor_->ReleaseViewBindlessDescriptorImmediately(
descriptor_pair.second);
}
} else {
for (auto descriptor_pair : texture->srv_descriptors) {
srv_descriptor_cache_free_.push_back(descriptor_pair.second);
}
}
texture->resource->Release();
delete texture;
}
@ -1262,8 +1288,10 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
// loading may be needed in some draw call later, which may have the same
// key for some binding as before the invalidation, but texture_invalidated_
// being false (menu background in Halo 3).
std::memset(texture_bindings_, 0, sizeof(texture_bindings_));
texture_keys_in_sync_ = 0;
for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) {
texture_bindings_[i].Clear();
}
texture_bindings_in_sync_ = 0;
}
// Update the texture keys and the textures.
@ -1272,7 +1300,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
while (xe::bit_scan_forward(textures_remaining, &index)) {
uint32_t index_bit = uint32_t(1) << index;
textures_remaining &= ~index_bit;
if (texture_keys_in_sync_ & index_bit) {
if (texture_bindings_in_sync_ & index_bit) {
continue;
}
TextureBinding& binding = texture_bindings_[index];
@ -1282,10 +1310,12 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
uint8_t old_swizzled_signs = binding.swizzled_signs;
BindingInfoFromFetchConstant(fetch, binding.key, &binding.host_swizzle,
&binding.swizzled_signs);
texture_keys_in_sync_ |= index_bit;
texture_bindings_in_sync_ |= index_bit;
if (binding.key.IsInvalid()) {
binding.texture = nullptr;
binding.texture_signed = nullptr;
binding.descriptor_index = UINT32_MAX;
binding.descriptor_index_signed = UINT32_MAX;
continue;
}
@ -1305,27 +1335,64 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
if (key_changed ||
!texture_util::IsAnySignNotSigned(old_swizzled_signs)) {
binding.texture = FindOrCreateTexture(binding.key);
binding.descriptor_index =
binding.texture
? FindOrCreateTextureDescriptor(*binding.texture, false,
binding.host_swizzle)
: UINT32_MAX;
load_unsigned_data = true;
}
} else {
binding.texture = nullptr;
binding.descriptor_index = UINT32_MAX;
}
if (texture_util::IsAnySignSigned(binding.swizzled_signs)) {
if (key_changed || !texture_util::IsAnySignSigned(old_swizzled_signs)) {
TextureKey signed_key = binding.key;
signed_key.signed_separate = 1;
binding.texture_signed = FindOrCreateTexture(signed_key);
binding.descriptor_index_signed =
binding.texture
? FindOrCreateTextureDescriptor(*binding.texture_signed, true,
binding.host_swizzle)
: UINT32_MAX;
load_signed_data = true;
}
} else {
binding.texture_signed = nullptr;
binding.descriptor_index_signed = UINT32_MAX;
}
} else {
// Same resource for both unsigned and signed, but descriptor formats may
// be different.
if (key_changed) {
binding.texture = FindOrCreateTexture(binding.key);
load_unsigned_data = true;
}
binding.texture_signed = nullptr;
if (texture_util::IsAnySignNotSigned(binding.swizzled_signs)) {
if (key_changed ||
!texture_util::IsAnySignNotSigned(old_swizzled_signs)) {
binding.descriptor_index =
binding.texture
? FindOrCreateTextureDescriptor(*binding.texture, false,
binding.host_swizzle)
: UINT32_MAX;
}
} else {
binding.descriptor_index = UINT32_MAX;
}
if (texture_util::IsAnySignSigned(binding.swizzled_signs)) {
if (key_changed || !texture_util::IsAnySignSigned(old_swizzled_signs)) {
binding.descriptor_index_signed =
binding.texture
? FindOrCreateTextureDescriptor(*binding.texture, true,
binding.host_swizzle)
: UINT32_MAX;
}
} else {
binding.descriptor_index_signed = UINT32_MAX;
}
}
if (load_unsigned_data && binding.texture != nullptr) {
LoadTextureData(binding.texture);
@ -1368,206 +1435,130 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
}
}
uint64_t TextureCache::GetDescriptorHashForActiveTextures(
const D3D12Shader::TextureSRV* texture_srvs,
uint32_t texture_srv_count) const {
XXH64_state_t hash_state;
XXH64_reset(&hash_state, 0);
for (uint32_t i = 0; i < texture_srv_count; ++i) {
const D3D12Shader::TextureSRV& texture_srv = texture_srvs[i];
// There can be multiple SRVs of the same texture.
XXH64_update(&hash_state, &texture_srv.dimension,
sizeof(texture_srv.dimension));
XXH64_update(&hash_state, &texture_srv.is_signed,
sizeof(texture_srv.is_signed));
bool TextureCache::AreActiveTextureSRVKeysUpToDate(
const TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const {
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
const TextureSRVKey& key = keys[i];
const TextureBinding& binding =
texture_bindings_[texture_srv.fetch_constant];
XXH64_update(&hash_state, &binding.key, sizeof(binding.key));
XXH64_update(&hash_state, &binding.host_swizzle,
sizeof(binding.host_swizzle));
XXH64_update(&hash_state, &binding.swizzled_signs,
sizeof(binding.swizzled_signs));
texture_bindings_[host_shader_bindings[i].fetch_constant];
if (key.key != binding.key || key.host_swizzle != binding.host_swizzle ||
key.swizzled_signs != binding.swizzled_signs) {
return false;
}
return XXH64_digest(&hash_state);
}
return true;
}
void TextureCache::WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv,
void TextureCache::WriteActiveTextureSRVKeys(
TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const {
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
TextureSRVKey& key = keys[i];
const TextureBinding& binding =
texture_bindings_[host_shader_bindings[i].fetch_constant];
key.key = binding.key;
key.host_swizzle = binding.host_swizzle;
key.swizzled_signs = binding.swizzled_signs;
}
}
void TextureCache::WriteActiveTextureBindfulSRV(
const D3D12Shader::TextureBinding& host_shader_binding,
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
desc.Format = DXGI_FORMAT_UNKNOWN;
Dimension binding_dimension;
uint32_t mip_max_level, array_size;
assert_false(bindless_resources_used_);
const TextureBinding& binding =
texture_bindings_[host_shader_binding.fetch_constant];
uint32_t descriptor_index = UINT32_MAX;
Texture* texture = nullptr;
ID3D12Resource* resource = nullptr;
const TextureBinding& binding = texture_bindings_[texture_srv.fetch_constant];
if (!binding.key.IsInvalid()) {
TextureFormat format = binding.key.format;
if (IsSignedVersionSeparate(format) && texture_srv.is_signed) {
texture = binding.texture_signed;
} else {
texture = binding.texture;
}
if (texture != nullptr) {
resource = texture->resource;
}
if (texture_srv.is_signed) {
if (!binding.key.IsInvalid() &&
AreDimensionsCompatible(host_shader_binding.dimension,
binding.key.dimension)) {
if (host_shader_binding.is_signed) {
// Not supporting signed compressed textures - hopefully DXN and DXT5A are
// not used as signed.
if (texture_util::IsAnySignSigned(binding.swizzled_signs)) {
desc.Format = host_formats_[uint32_t(format)].dxgi_format_snorm;
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
unsupported_format_features_used_[uint32_t(format)] |=
kUnsupportedSnormBit;
}
descriptor_index = binding.descriptor_index_signed;
texture = IsSignedVersionSeparate(binding.key.format)
? binding.texture_signed
: binding.texture;
}
} else {
if (texture_util::IsAnySignNotSigned(binding.swizzled_signs)) {
desc.Format = GetDXGIUnormFormat(binding.key);
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
unsupported_format_features_used_[uint32_t(format)] |=
kUnsupportedUnormBit;
descriptor_index = binding.descriptor_index;
texture = binding.texture;
}
}
}
binding_dimension = binding.key.dimension;
mip_max_level = binding.key.mip_max_level;
array_size = binding.key.depth;
// XE_GPU_SWIZZLE and D3D12_SHADER_COMPONENT_MAPPING are the same except for
// one bit.
desc.Shader4ComponentMapping =
binding.host_swizzle |
D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES;
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
D3D12_CPU_DESCRIPTOR_HANDLE source_handle;
if (descriptor_index != UINT32_MAX) {
assert_not_null(texture);
MarkTextureUsed(texture);
source_handle = GetTextureDescriptorCPUHandle(descriptor_index);
} else {
binding_dimension = Dimension::k2D;
mip_max_level = 0;
array_size = 1;
desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0);
}
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
// A null descriptor must still have a valid format.
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
resource = nullptr;
}
NullSRVDescriptorIndex null_descriptor_index;
switch (texture_srv.dimension) {
switch (host_shader_binding.dimension) {
case TextureDimension::k3D:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
desc.Texture3D.MostDetailedMip = 0;
desc.Texture3D.MipLevels = mip_max_level + 1;
desc.Texture3D.ResourceMinLODClamp = 0.0f;
if (binding_dimension != Dimension::k3D) {
// Create a null descriptor so it's safe to sample this texture even
// though it has different dimensions.
resource = nullptr;
}
null_descriptor_index = NullSRVDescriptorIndex::k3D;
break;
case TextureDimension::kCube:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
desc.TextureCube.MostDetailedMip = 0;
desc.TextureCube.MipLevels = mip_max_level + 1;
desc.TextureCube.ResourceMinLODClamp = 0.0f;
if (binding_dimension != Dimension::kCube) {
resource = nullptr;
}
null_descriptor_index = NullSRVDescriptorIndex::kCube;
break;
default:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
desc.Texture2DArray.MostDetailedMip = 0;
desc.Texture2DArray.MipLevels = mip_max_level + 1;
desc.Texture2DArray.FirstArraySlice = 0;
desc.Texture2DArray.ArraySize = array_size;
desc.Texture2DArray.PlaneSlice = 0;
desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
if (binding_dimension == Dimension::k3D ||
binding_dimension == Dimension::kCube) {
resource = nullptr;
}
assert_true(host_shader_binding.dimension == TextureDimension::k1D ||
host_shader_binding.dimension == TextureDimension::k2D);
null_descriptor_index = NullSRVDescriptorIndex::k2DArray;
break;
}
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
source_handle = provider->OffsetViewDescriptor(
null_srv_descriptor_heap_start_, uint32_t(null_descriptor_index));
}
auto device = provider->GetDevice();
if (resource == nullptr) {
// Copy a pre-made null descriptor since it's faster than to create an SRV.
device->CopyDescriptorsSimple(
1, handle,
provider->OffsetViewDescriptor(null_srv_descriptor_heap_start_,
uint32_t(null_descriptor_index)),
{
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_i(
"gpu",
"xe::gpu::d3d12::TextureCache::WriteActiveTextureBindfulSRV->"
"CopyDescriptorsSimple");
#endif // FINE_GRAINED_DRAW_SCOPES
device->CopyDescriptorsSimple(1, handle, source_handle,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
return;
}
MarkTextureUsed(texture);
// Take the descriptor from the cache if it's cached, or create a new one in
// the cache, or directly if this texture was already used with a different
// swizzle. Profiling results say that CreateShaderResourceView takes the
// longest time of draw call processing, and it's very noticeable in many
// games.
bool cached_handle_available = false;
D3D12_CPU_DESCRIPTOR_HANDLE cached_handle = {};
assert_not_null(texture);
if (texture->cached_srv_descriptor_swizzle !=
Texture::kCachedSRVDescriptorSwizzleMissing) {
// Use an existing cached descriptor if it has the needed swizzle.
if (binding.host_swizzle == texture->cached_srv_descriptor_swizzle) {
cached_handle_available = true;
cached_handle = texture->cached_srv_descriptor;
}
uint32_t TextureCache::GetActiveTextureBindlessSRVIndex(
const D3D12Shader::TextureBinding& host_shader_binding) {
assert_true(bindless_resources_used_);
uint32_t descriptor_index = UINT32_MAX;
const TextureBinding& binding =
texture_bindings_[host_shader_binding.fetch_constant];
if (!binding.key.IsInvalid() &&
AreDimensionsCompatible(host_shader_binding.dimension,
binding.key.dimension)) {
descriptor_index = host_shader_binding.is_signed
? binding.descriptor_index_signed
: binding.descriptor_index;
}
} else {
// Try to create a new cached descriptor if it doesn't exist yet.
if (!srv_descriptor_cache_free_.empty()) {
cached_handle_available = true;
cached_handle = srv_descriptor_cache_free_.back();
srv_descriptor_cache_free_.pop_back();
} else if (srv_descriptor_cache_.empty() ||
srv_descriptor_cache_.back().current_usage >=
SRVDescriptorCachePage::kHeapSize) {
D3D12_DESCRIPTOR_HEAP_DESC new_heap_desc;
new_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
new_heap_desc.NumDescriptors = SRVDescriptorCachePage::kHeapSize;
new_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
new_heap_desc.NodeMask = 0;
ID3D12DescriptorHeap* new_heap;
if (SUCCEEDED(device->CreateDescriptorHeap(&new_heap_desc,
IID_PPV_ARGS(&new_heap)))) {
SRVDescriptorCachePage new_page;
new_page.heap = new_heap;
new_page.heap_start = new_heap->GetCPUDescriptorHandleForHeapStart();
new_page.current_usage = 1;
cached_handle_available = true;
cached_handle = new_page.heap_start;
srv_descriptor_cache_.push_back(new_page);
}
} else {
SRVDescriptorCachePage& page = srv_descriptor_cache_.back();
cached_handle_available = true;
cached_handle =
provider->OffsetViewDescriptor(page.heap_start, page.current_usage);
++page.current_usage;
}
if (cached_handle_available) {
device->CreateShaderResourceView(resource, &desc, cached_handle);
texture->cached_srv_descriptor = cached_handle;
texture->cached_srv_descriptor_swizzle = binding.host_swizzle;
if (descriptor_index == UINT32_MAX) {
switch (host_shader_binding.dimension) {
case TextureDimension::k3D:
descriptor_index =
uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTexture3D);
break;
case TextureDimension::kCube:
descriptor_index = uint32_t(
D3D12CommandProcessor::SystemBindlessView::kNullTextureCube);
break;
default:
assert_true(host_shader_binding.dimension == TextureDimension::k1D ||
host_shader_binding.dimension == TextureDimension::k2D);
descriptor_index = uint32_t(
D3D12CommandProcessor::SystemBindlessView::kNullTexture2DArray);
}
}
if (cached_handle_available) {
device->CopyDescriptorsSimple(1, handle, cached_handle,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
} else {
device->CreateShaderResourceView(resource, &desc, handle);
}
return descriptor_index;
}
TextureCache::SamplerParameters TextureCache::GetSamplerParameters(
@ -1583,12 +1574,11 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters(
parameters.clamp_z = fetch.clamp_z;
parameters.border_color = fetch.border_color;
uint32_t mip_min_level, mip_max_level;
uint32_t mip_min_level;
texture_util::GetSubresourcesFromFetchConstant(
fetch, nullptr, nullptr, nullptr, nullptr, nullptr, &mip_min_level,
&mip_max_level, binding.mip_filter);
nullptr, binding.mip_filter);
parameters.mip_min_level = mip_min_level;
parameters.mip_max_level = std::max(mip_max_level, mip_min_level);
AnisoFilter aniso_filter = binding.aniso_filter == AnisoFilter::kUseFetchConst
? fetch.aniso_filter
@ -1675,7 +1665,8 @@ void TextureCache::WriteSampler(SamplerParameters parameters,
desc.BorderColor[3] = 0.0f;
}
desc.MinLOD = float(parameters.mip_min_level);
desc.MaxLOD = float(parameters.mip_max_level);
// Maximum mip level is in the texture resource itself.
desc.MaxLOD = FLT_MAX;
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
device->CreateSampler(&desc, handle);
@ -1737,8 +1728,8 @@ bool TextureCache::TileResolvedTexture(
resolve_tile_mode_info_[uint32_t(resolve_tile_mode)];
auto command_list = command_processor_->GetDeferredCommandList();
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
uint32_t resolution_scale_log2 = IsResolutionScale2X() ? 1 : 0;
texture_base &= 0x1FFFFFFF;
@ -1811,12 +1802,8 @@ bool TextureCache::TileResolvedTexture(
}
// Tile the texture.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2,
descriptor_cpu_start, descriptor_gpu_start) ==
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
if (!command_processor_->RequestOneUseSingleViewDescriptors(2, descriptors)) {
return false;
}
if (resolution_scale_log2) {
@ -1826,19 +1813,15 @@ bool TextureCache::TileResolvedTexture(
}
command_processor_->SubmitBarriers();
command_list->D3DSetComputeRootSignature(resolve_tile_root_signature_);
ResolveTileConstants resolve_tile_constants;
resolve_tile_constants.info = uint32_t(endian) | (uint32_t(format) << 3) |
(resolution_scale_log2 << 9) |
((texture_pitch >> 5) << 10) |
(is_3d ? ((texture_height >> 5) << 19) : 0);
resolve_tile_constants.offset = offset_x | (offset_y << 5) | (offset_z << 10);
resolve_tile_constants.size = resolve_width | (resolve_height << 16);
resolve_tile_constants.host_base = uint32_t(footprint.Offset);
resolve_tile_constants.host_pitch = uint32_t(footprint.Footprint.RowPitch);
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, buffer,
// TODO(Triang3l): Use precreated bindless descriptors here after overall
// cleanup/optimization involving typed buffers.
ui::d3d12::util::CreateRawBufferSRV(device, descriptors[1].first, buffer,
buffer_size);
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_uav =
provider->OffsetViewDescriptor(descriptor_cpu_start, 1);
command_list->D3DSetComputeRootDescriptorTable(2, descriptors[1].second);
if (resolve_tile_mode_info.typed_uav_format != DXGI_FORMAT_UNKNOWN) {
// Not sure if this alignment is actually needed in Direct3D 12, but for
// safety. Also not using the full 512 MB buffer as a typed UAV because
@ -1862,22 +1845,32 @@ bool TextureCache::TileResolvedTexture(
device->CreateUnorderedAccessView(resolution_scale_log2
? scaled_resolve_buffer_
: shared_memory_->GetBuffer(),
nullptr, &uav_desc, descriptor_cpu_uav);
nullptr, &uav_desc, descriptors[0].first);
} else {
if (resolution_scale_log2) {
resolve_tile_constants.guest_base = texture_base & 0xFFF;
CreateScaledResolveBufferRawUAV(
descriptor_cpu_uav, texture_base >> 12,
descriptors[0].first, texture_base >> 12,
((texture_base + texture_size - 1) >> 12) - (texture_base >> 12) + 1);
} else {
resolve_tile_constants.guest_base = texture_base;
shared_memory_->WriteRawUAVDescriptor(descriptor_cpu_uav);
shared_memory_->WriteRawUAVDescriptor(descriptors[0].first);
}
}
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_list->D3DSetComputeRootDescriptorTable(1, descriptors[0].second);
resolve_tile_constants.info = uint32_t(endian) | (uint32_t(format) << 3) |
(resolution_scale_log2 << 9) |
((texture_pitch >> 5) << 10) |
(is_3d ? ((texture_height >> 5) << 19) : 0);
resolve_tile_constants.offset = offset_x | (offset_y << 5) | (offset_z << 10);
resolve_tile_constants.size = resolve_width | (resolve_height << 16);
resolve_tile_constants.host_base = uint32_t(footprint.Offset);
resolve_tile_constants.host_pitch = uint32_t(footprint.Footprint.RowPitch);
command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(resolve_tile_constants) / sizeof(uint32_t),
&resolve_tile_constants, 0);
command_processor_->SetComputePipeline(
resolve_tile_pipelines_[uint32_t(resolve_tile_mode)]);
// Each group processes 32x32 texels after resolution scaling has been
@ -2339,8 +2332,6 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
}
texture->base_watch_handle = nullptr;
texture->mip_watch_handle = nullptr;
texture->cached_srv_descriptor_swizzle =
Texture::kCachedSRVDescriptorSwizzleMissing;
textures_.insert(std::make_pair(map_key, texture));
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
textures_total_size_ += texture->resource_size;
@ -2364,8 +2355,8 @@ bool TextureCache::LoadTextureData(Texture* texture) {
}
auto command_list = command_processor_->GetDeferredCommandList();
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
// Get the pipeline.
LoadMode load_mode = GetLoadMode(texture->key);
@ -2453,16 +2444,19 @@ bool TextureCache::LoadTextureData(Texture* texture) {
// descriptors for base and mips.
bool separate_base_and_mips_descriptors =
scaled_resolve && mip_first == 0 && mip_last != 0;
// TODO(Triang3l): Use precreated bindless descriptors here after overall
// cleanup/optimization involving typed buffers.
uint32_t descriptor_count = separate_base_and_mips_descriptors ? 4 : 2;
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, descriptor_count,
descriptor_count, descriptor_cpu_start, descriptor_gpu_start) ==
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[4];
if (!command_processor_->RequestOneUseSingleViewDescriptors(descriptor_count,
descriptors)) {
return false;
}
// Create two destination descriptors since the table has both.
for (uint32_t i = 0; i < descriptor_count; i += 2) {
ui::d3d12::util::CreateRawBufferUAV(device, descriptors[i].first,
copy_buffer, uint32_t(host_slice_size));
}
if (scaled_resolve) {
// TODO(Triang3l): Allow partial invalidation of scaled textures - send a
// part of scaled_resolve_pages_ to the shader and choose the source
@ -2470,35 +2464,28 @@ bool TextureCache::LoadTextureData(Texture* texture) {
// it's not, duplicate the texels from the unscaled version - will be
// blocky with filtering, but better than nothing.
UseScaledResolveBufferForReading();
uint32_t srv_descriptor_offset = 0;
uint32_t source_descriptor_index = 1;
if (mip_first == 0) {
CreateScaledResolveBufferRawSRV(
provider->OffsetViewDescriptor(descriptor_cpu_start,
srv_descriptor_offset),
texture->key.base_page, (texture->base_size + 0xFFF) >> 12);
srv_descriptor_offset += 2;
descriptors[source_descriptor_index].first, texture->key.base_page,
(texture->base_size + 0xFFF) >> 12);
source_descriptor_index += 2;
}
if (mip_last != 0) {
CreateScaledResolveBufferRawSRV(
provider->OffsetViewDescriptor(descriptor_cpu_start,
srv_descriptor_offset),
texture->key.mip_page, (texture->mip_size + 0xFFF) >> 12);
descriptors[source_descriptor_index].first, texture->key.mip_page,
(texture->mip_size + 0xFFF) >> 12);
}
} else {
shared_memory_->UseForReading();
shared_memory_->WriteRawSRVDescriptor(descriptor_cpu_start);
}
// Create two destination descriptors since the table has both.
for (uint32_t i = 1; i < descriptor_count; i += 2) {
ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, i),
copy_buffer, uint32_t(host_slice_size));
shared_memory_->WriteRawSRVDescriptor(descriptors[1].first);
}
command_processor_->SetComputePipeline(pipeline);
command_list->D3DSetComputeRootSignature(load_root_signature_);
if (!separate_base_and_mips_descriptors) {
// Will be bound later.
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
// Will be bound later if separate base and mip descriptors.
command_list->D3DSetComputeRootDescriptorTable(2, descriptors[1].second);
command_list->D3DSetComputeRootDescriptorTable(1, descriptors[0].second);
}
// Submit commands.
@ -2575,14 +2562,11 @@ bool TextureCache::LoadTextureData(Texture* texture) {
}
std::memcpy(cbuffer_mapping, &load_constants, sizeof(load_constants));
command_list->D3DSetComputeRootConstantBufferView(0, cbuffer_gpu_address);
if (separate_base_and_mips_descriptors) {
if (j == 0) {
command_list->D3DSetComputeRootDescriptorTable(1,
descriptor_gpu_start);
} else if (j == 1) {
if (separate_base_and_mips_descriptors && j <= 1) {
command_list->D3DSetComputeRootDescriptorTable(
1, provider->OffsetViewDescriptor(descriptor_gpu_start, 2));
}
2, descriptors[j * 2 + 1].second);
command_list->D3DSetComputeRootDescriptorTable(
1, descriptors[j * 2].second);
}
command_processor_->SubmitBarriers();
// Each thread group processes 32x32x1 blocks after resolution scaling has
@ -2642,6 +2626,138 @@ bool TextureCache::LoadTextureData(Texture* texture) {
return true;
}
uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
bool is_signed,
uint32_t host_swizzle) {
uint32_t descriptor_key = uint32_t(is_signed) | (host_swizzle << 1);
// Try to find an existing descriptor.
auto it = texture.srv_descriptors.find(descriptor_key);
if (it != texture.srv_descriptors.end()) {
return it->second;
}
// Create a new bindless or cached descriptor if supported.
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
TextureFormat format = texture.key.format;
if (IsSignedVersionSeparate(format) &&
texture.key.signed_separate != uint32_t(is_signed)) {
// Not the version with the needed signedness.
return UINT32_MAX;
}
if (is_signed) {
// Not supporting signed compressed textures - hopefully DXN and DXT5A are
// not used as signed.
desc.Format = host_formats_[uint32_t(format)].dxgi_format_snorm;
} else {
desc.Format = GetDXGIUnormFormat(texture.key);
}
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
unsupported_format_features_used_[uint32_t(format)] |=
is_signed ? kUnsupportedSnormBit : kUnsupportedUnormBit;
return UINT32_MAX;
}
uint32_t mip_levels = texture.key.mip_max_level + 1;
switch (texture.key.dimension) {
case Dimension::k1D:
case Dimension::k2D:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
desc.Texture2DArray.MostDetailedMip = 0;
desc.Texture2DArray.MipLevels = mip_levels;
desc.Texture2DArray.FirstArraySlice = 0;
desc.Texture2DArray.ArraySize = texture.key.depth;
desc.Texture2DArray.PlaneSlice = 0;
desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
break;
case Dimension::k3D:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
desc.Texture3D.MostDetailedMip = 0;
desc.Texture3D.MipLevels = mip_levels;
desc.Texture3D.ResourceMinLODClamp = 0.0f;
break;
case Dimension::kCube:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
desc.TextureCube.MostDetailedMip = 0;
desc.TextureCube.MipLevels = mip_levels;
desc.TextureCube.ResourceMinLODClamp = 0.0f;
break;
default:
assert_unhandled_case(texture.key.dimension);
return UINT32_MAX;
}
desc.Shader4ComponentMapping =
host_swizzle |
D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES;
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
uint32_t descriptor_index;
if (bindless_resources_used_) {
descriptor_index =
command_processor_->RequestPersistentViewBindlessDescriptor();
if (descriptor_index == UINT32_MAX) {
XELOGE(
"Failed to create a texture descriptor - no free bindless view "
"descriptors");
return UINT32_MAX;
}
} else {
if (!srv_descriptor_cache_free_.empty()) {
descriptor_index = srv_descriptor_cache_free_.back();
srv_descriptor_cache_free_.pop_back();
} else {
// Allocated + 1 (including the descriptor that is being added), rounded
// up to SRVDescriptorCachePage::kHeapSize, (allocated + 1 + size - 1).
uint32_t cache_pages_needed = (srv_descriptor_cache_allocated_ +
SRVDescriptorCachePage::kHeapSize) /
SRVDescriptorCachePage::kHeapSize;
if (srv_descriptor_cache_.size() < cache_pages_needed) {
D3D12_DESCRIPTOR_HEAP_DESC cache_heap_desc;
cache_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
cache_heap_desc.NumDescriptors = SRVDescriptorCachePage::kHeapSize;
cache_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
cache_heap_desc.NodeMask = 0;
while (srv_descriptor_cache_.size() < cache_pages_needed) {
SRVDescriptorCachePage cache_page;
if (FAILED(device->CreateDescriptorHeap(
&cache_heap_desc, IID_PPV_ARGS(&cache_page.heap)))) {
XELOGE(
"Failed to create a texture descriptor - couldn't create a "
"descriptor cache heap");
return UINT32_MAX;
}
cache_page.heap_start =
cache_page.heap->GetCPUDescriptorHandleForHeapStart();
srv_descriptor_cache_.push_back(cache_page);
}
}
descriptor_index = srv_descriptor_cache_allocated_++;
}
}
device->CreateShaderResourceView(
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
texture.srv_descriptors.insert({descriptor_key, descriptor_index});
return descriptor_index;
}
D3D12_CPU_DESCRIPTOR_HANDLE TextureCache::GetTextureDescriptorCPUHandle(
uint32_t descriptor_index) const {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
if (bindless_resources_used_) {
return provider->OffsetViewDescriptor(
command_processor_->GetViewBindlessHeapCPUStart(), descriptor_index);
}
D3D12_CPU_DESCRIPTOR_HANDLE heap_start =
srv_descriptor_cache_[descriptor_index /
SRVDescriptorCachePage::kHeapSize]
.heap_start;
uint32_t heap_offset = descriptor_index % SRVDescriptorCachePage::kHeapSize;
return provider->OffsetViewDescriptor(heap_start, heap_offset);
}
void TextureCache::MarkTextureUsed(Texture* texture) {
uint64_t current_frame = command_processor_->GetCurrentFrame();
// This is called very frequently, don't relink unless needed for caching.
@ -2687,8 +2803,10 @@ void TextureCache::WatchCallback(Texture* texture, bool is_mip) {
}
void TextureCache::ClearBindings() {
std::memset(texture_bindings_, 0, sizeof(texture_bindings_));
texture_keys_in_sync_ = 0;
for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) {
texture_bindings_[i].Clear();
}
texture_bindings_in_sync_ = 0;
// Already reset everything.
texture_invalidated_.store(false, std::memory_order_relaxed);
}

View File

@ -11,7 +11,9 @@
#define XENIA_GPU_D3D12_TEXTURE_CACHE_H_
#include <atomic>
#include <cstring>
#include <unordered_map>
#include <utility>
#include "xenia/base/mutex.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
@ -55,9 +57,84 @@ class D3D12CommandProcessor;
// MipAddress but no BaseAddress to save memory because textures are streamed
// this way anyway.
class TextureCache {
union TextureKey {
struct {
// Physical 4 KB page with the base mip level, disregarding A/C/E address
// range prefix.
uint32_t base_page : 17; // 17 total
Dimension dimension : 2; // 19
uint32_t width : 13; // 32
uint32_t height : 13; // 45
uint32_t tiled : 1; // 46
uint32_t packed_mips : 1; // 47
// Physical 4 KB page with mip 1 and smaller.
uint32_t mip_page : 17; // 64
// Layers for stacked and 3D, 6 for cube, 1 for other dimensions.
uint32_t depth : 10; // 74
uint32_t mip_max_level : 4; // 78
TextureFormat format : 6; // 84
Endian endianness : 2; // 86
// Whether this texture is signed and has a different host representation
// than an unsigned view of the same guest texture.
uint32_t signed_separate : 1; // 87
// Whether this texture is a 2x-scaled resolve target.
uint32_t scaled_resolve : 1; // 88
};
struct {
// The key used for unordered_multimap lookup. Single uint32_t instead of
// a uint64_t so XXH hash can be calculated in a stable way due to no
// padding.
uint32_t map_key[2];
// The key used to identify one texture within unordered_multimap buckets.
uint32_t bucket_key;
};
TextureKey() { MakeInvalid(); }
TextureKey(const TextureKey& key) {
SetMapKey(key.GetMapKey());
bucket_key = key.bucket_key;
}
TextureKey& operator=(const TextureKey& key) {
SetMapKey(key.GetMapKey());
bucket_key = key.bucket_key;
return *this;
}
bool operator==(const TextureKey& key) const {
return GetMapKey() == key.GetMapKey() && bucket_key == key.bucket_key;
}
bool operator!=(const TextureKey& key) const {
return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key;
}
inline uint64_t GetMapKey() const {
return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32);
}
inline void SetMapKey(uint64_t key) {
map_key[0] = uint32_t(key);
map_key[1] = uint32_t(key >> 32);
}
inline bool IsInvalid() const {
// Zero base and zero width is enough for a binding to be invalid.
return map_key[0] == 0;
}
inline void MakeInvalid() {
// Reset all for a stable hash.
SetMapKey(0);
bucket_key = 0;
}
};
public:
// Keys that can be stored for checking validity whether descriptors for host
// shader bindings are up to date.
struct TextureSRVKey {
TextureKey key;
uint32_t host_swizzle;
uint8_t swizzled_signs;
};
// Sampler parameters that can be directly converted to a host sampler or used
// for binding hashing.
// for binding checking validity whether samplers are up to date.
union SamplerParameters {
struct {
ClampMode clamp_x : 3; // 3
@ -70,7 +147,7 @@ class TextureCache {
uint32_t mip_linear : 1; // 14
AnisoFilter aniso_filter : 3; // 17
uint32_t mip_min_level : 4; // 21
uint32_t mip_max_level : 4; // 25
// Maximum mip level is in the texture resource itself.
};
uint32_t value;
@ -91,7 +168,8 @@ class TextureCache {
};
TextureCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file, SharedMemory* shared_memory);
RegisterFile* register_file, bool bindless_resources_used,
SharedMemory* shared_memory);
~TextureCache();
bool Initialize(bool edram_rov_used);
@ -109,19 +187,33 @@ class TextureCache {
// binding the actual drawing pipeline.
void RequestTextures(uint32_t used_texture_mask);
// Returns the hash of the current bindings (must be called after
// RequestTextures) for the provided SRV descriptor layout.
uint64_t GetDescriptorHashForActiveTextures(
const D3D12Shader::TextureSRV* texture_srvs,
uint32_t texture_srv_count) const;
// "ActiveTexture" means as of the latest RequestTextures call.
// Returns whether texture SRV keys stored externally are still valid for the
// current bindings and host shader binding layout. Both keys and
// host_shader_bindings must have host_shader_binding_count elements
// (otherwise they are incompatible - like if this function returned false).
bool AreActiveTextureSRVKeysUpToDate(
const TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const;
// Exports the current binding data to texture SRV keys so they can be stored
// for checking whether subsequent draw calls can keep using the same
// bindings. Write host_shader_binding_count keys.
void WriteActiveTextureSRVKeys(
TextureSRVKey* keys,
const D3D12Shader::TextureBinding* host_shader_bindings,
uint32_t host_shader_binding_count) const;
// Returns the post-swizzle signedness of a currently bound texture (must be
// called after RequestTextures).
uint8_t GetActiveTextureSwizzledSigns(uint32_t index) const {
return texture_bindings_[index].swizzled_signs;
}
void WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv,
void WriteActiveTextureBindfulSRV(
const D3D12Shader::TextureBinding& host_shader_binding,
D3D12_CPU_DESCRIPTOR_HANDLE handle);
uint32_t GetActiveTextureBindlessSRVIndex(
const D3D12Shader::TextureBinding& host_shader_binding);
SamplerParameters GetSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const;
@ -276,73 +368,6 @@ class TextureCache {
uint8_t swizzle[4];
};
union TextureKey {
struct {
// Physical 4 KB page with the base mip level, disregarding A/C/E address
// range prefix.
uint32_t base_page : 17; // 17 total
Dimension dimension : 2; // 19
uint32_t width : 13; // 32
uint32_t height : 13; // 45
uint32_t tiled : 1; // 46
uint32_t packed_mips : 1; // 47
// Physical 4 KB page with mip 1 and smaller.
uint32_t mip_page : 17; // 64
// Layers for stacked and 3D, 6 for cube, 1 for other dimensions.
uint32_t depth : 10; // 74
uint32_t mip_max_level : 4; // 78
TextureFormat format : 6; // 84
Endian endianness : 2; // 86
// Whether this texture is signed and has a different host representation
// than an unsigned view of the same guest texture.
uint32_t signed_separate : 1; // 87
// Whether this texture is a 2x-scaled resolve target.
uint32_t scaled_resolve : 1; // 88
};
struct {
// The key used for unordered_multimap lookup. Single uint32_t instead of
// a uint64_t so XXH hash can be calculated in a stable way due to no
// padding.
uint32_t map_key[2];
// The key used to identify one texture within unordered_multimap buckets.
uint32_t bucket_key;
};
TextureKey() { MakeInvalid(); }
TextureKey(const TextureKey& key) {
SetMapKey(key.GetMapKey());
bucket_key = key.bucket_key;
}
TextureKey& operator=(const TextureKey& key) {
SetMapKey(key.GetMapKey());
bucket_key = key.bucket_key;
return *this;
}
bool operator==(const TextureKey& key) const {
return GetMapKey() == key.GetMapKey() && bucket_key == key.bucket_key;
}
bool operator!=(const TextureKey& key) const {
return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key;
}
inline uint64_t GetMapKey() const {
return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32);
}
inline void SetMapKey(uint64_t key) {
map_key[0] = uint32_t(key);
map_key[1] = uint32_t(key >> 32);
}
inline bool IsInvalid() const {
// Zero base and zero width is enough for a binding to be invalid.
return map_key[0] == 0;
}
inline void MakeInvalid() {
// Reset all for a stable hash.
SetMapKey(0);
bucket_key = 0;
}
};
struct Texture {
TextureKey key;
ID3D12Resource* resource;
@ -367,13 +392,11 @@ class TextureCache {
// Row pitches on each mip level (for linear layout mainly).
uint32_t pitches[14];
// SRV descriptor from the cache, for the first swizzle the texture was used
// with (which is usually determined by the format, such as RGBA or BGRA).
// If swizzle is kCachedSRVDescriptorSwizzleMissing, the cached descriptor
// doesn't exist yet (there are no invalid D3D descriptor handle values).
D3D12_CPU_DESCRIPTOR_HANDLE cached_srv_descriptor;
static constexpr uint32_t kCachedSRVDescriptorSwizzleMissing = UINT32_MAX;
uint32_t cached_srv_descriptor_swizzle;
// For bindful - indices in the non-shader-visible descriptor cache for
// copying to the shader-visible heap (much faster than recreating, which,
// according to profiling, was often a bottleneck in many games).
// For bindless - indices in the global shader-visible descriptor heap.
std::unordered_map<uint32_t, uint32_t> srv_descriptors;
// These are to be accessed within the global critical region to synchronize
// with shared memory.
@ -390,7 +413,6 @@ class TextureCache {
static constexpr uint32_t kHeapSize = 65536;
ID3D12DescriptorHeap* heap;
D3D12_CPU_DESCRIPTOR_HANDLE heap_start;
uint32_t current_usage;
};
struct LoadConstants {
@ -459,6 +481,14 @@ class TextureCache {
// Signed version of the texture if the data in the signed version is
// different on the host.
Texture* texture_signed;
// Descriptor indices of texture and texture_signed returned from
// FindOrCreateTextureDescriptor.
uint32_t descriptor_index;
uint32_t descriptor_index_signed;
void Clear() {
std::memset(this, 0, sizeof(*this));
descriptor_index = descriptor_index_signed = UINT32_MAX;
}
};
// Whether the signed version of the texture has a different representation on
@ -505,6 +535,22 @@ class TextureCache {
const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out,
uint32_t* host_swizzle_out, uint8_t* swizzled_signs_out);
static constexpr bool AreDimensionsCompatible(
TextureDimension binding_dimension, Dimension resource_dimension) {
switch (binding_dimension) {
case TextureDimension::k1D:
case TextureDimension::k2D:
return resource_dimension == Dimension::k1D ||
resource_dimension == Dimension::k2D;
case TextureDimension::k3D:
return resource_dimension == Dimension::k3D;
case TextureDimension::kCube:
return resource_dimension == Dimension::kCube;
default:
return false;
}
}
static void LogTextureKeyAction(TextureKey key, const char* action);
static void LogTextureAction(const Texture* texture, const char* action);
@ -517,6 +563,14 @@ class TextureCache {
// allocates descriptors and copies!
bool LoadTextureData(Texture* texture);
// Returns the index of an existing of a newly created non-shader-visible
// cached (for bindful) or a shader-visible global (for bindless) descriptor,
// or UINT32_MAX if failed to create.
uint32_t FindOrCreateTextureDescriptor(Texture& texture, bool is_signed,
uint32_t host_swizzle);
D3D12_CPU_DESCRIPTOR_HANDLE GetTextureDescriptorCPUHandle(
uint32_t descriptor_index) const;
// For LRU caching - updates the last usage frame and moves the texture to
// the end of the usage queue. Must be called any time the texture is
// referenced by any command list to make sure it's not destroyed while still
@ -552,6 +606,7 @@ class TextureCache {
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
bool bindless_resources_used_;
SharedMemory* shared_memory_;
static const LoadModeInfo load_mode_info_[];
@ -571,8 +626,9 @@ class TextureCache {
uint64_t texture_current_usage_time_;
std::vector<SRVDescriptorCachePage> srv_descriptor_cache_;
// Cached descriptors used by deleted textures, for reuse.
std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> srv_descriptor_cache_free_;
uint32_t srv_descriptor_cache_allocated_;
// Indices of cached descriptors used by deleted textures, for reuse.
std::vector<uint32_t> srv_descriptor_cache_free_;
enum class NullSRVDescriptorIndex {
k2DArray,
@ -587,9 +643,9 @@ class TextureCache {
D3D12_CPU_DESCRIPTOR_HANDLE null_srv_descriptor_heap_start_;
TextureBinding texture_bindings_[32] = {};
// Bit vector with bits reset on fetch constant writes to avoid getting
// texture keys from the fetch constants again and again.
uint32_t texture_keys_in_sync_ = 0;
// Bit vector with bits reset on fetch constant writes to avoid parsing fetch
// constants again and again.
uint32_t texture_bindings_in_sync_ = 0;
// Whether a texture has been invalidated (a watch has been triggered), so
// need to try to reload textures, disregarding whether fetch constants have

View File

@ -62,8 +62,8 @@ using namespace ucode;
// S#/T#/U# binding index, and the second is the s#/t#/u# register index
// within its space.
constexpr uint32_t DxbcShaderTranslator::kMaxTextureSRVIndexBits;
constexpr uint32_t DxbcShaderTranslator::kMaxTextureSRVs;
constexpr uint32_t DxbcShaderTranslator::kMaxTextureBindingIndexBits;
constexpr uint32_t DxbcShaderTranslator::kMaxTextureBindings;
constexpr uint32_t DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
constexpr uint32_t DxbcShaderTranslator::kMaxSamplerBindings;
constexpr uint32_t DxbcShaderTranslator::kInterpolatorCount;
@ -74,13 +74,16 @@ constexpr uint32_t DxbcShaderTranslator::kSwizzleXXXX;
constexpr uint32_t DxbcShaderTranslator::kSwizzleYYYY;
constexpr uint32_t DxbcShaderTranslator::kSwizzleZZZZ;
constexpr uint32_t DxbcShaderTranslator::kSwizzleWWWW;
constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated;
constexpr uint32_t DxbcShaderTranslator::kBindingIndexUnallocated;
constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone;
DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
bool bindless_resources_used,
bool edram_rov_used,
bool force_emit_source_map)
: vendor_id_(vendor_id), edram_rov_used_(edram_rov_used) {
: vendor_id_(vendor_id),
bindless_resources_used_(bindless_resources_used),
edram_rov_used_(edram_rov_used) {
emit_source_map_ = force_emit_source_map || cvars::dxbc_source_map;
// Don't allocate again and again for the first shader.
shader_code_.reserve(8192);
@ -154,9 +157,10 @@ void DxbcShaderTranslator::Reset() {
cbuffer_count_ = 0;
// System constants always used in prologues/epilogues.
cbuffer_index_system_constants_ = cbuffer_count_++;
cbuffer_index_float_constants_ = kCbufferIndexUnallocated;
cbuffer_index_bool_loop_constants_ = kCbufferIndexUnallocated;
cbuffer_index_fetch_constants_ = kCbufferIndexUnallocated;
cbuffer_index_float_constants_ = kBindingIndexUnallocated;
cbuffer_index_bool_loop_constants_ = kBindingIndexUnallocated;
cbuffer_index_fetch_constants_ = kBindingIndexUnallocated;
cbuffer_index_descriptor_indices_ = kBindingIndexUnallocated;
system_constants_used_ = 0;
@ -172,7 +176,19 @@ void DxbcShaderTranslator::Reset() {
cf_instruction_predicate_if_open_ = false;
cf_exec_predicate_written_ = false;
texture_srvs_.clear();
srv_count_ = 0;
srv_index_shared_memory_ = kBindingIndexUnallocated;
srv_index_bindless_textures_2d_ = kBindingIndexUnallocated;
srv_index_bindless_textures_3d_ = kBindingIndexUnallocated;
srv_index_bindless_textures_cube_ = kBindingIndexUnallocated;
texture_bindings_.clear();
texture_bindings_for_bindful_srv_indices_.clear();
uav_count_ = 0;
uav_index_shared_memory_ = kBindingIndexUnallocated;
uav_index_edram_ = kBindingIndexUnallocated;
sampler_bindings_.clear();
memexport_alloc_current_count_ = 0;
@ -1369,7 +1385,7 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
}
} break;
case InstructionStorageSource::kConstantFloat: {
if (cbuffer_index_float_constants_ == kCbufferIndexUnallocated) {
if (cbuffer_index_float_constants_ == kBindingIndexUnallocated) {
cbuffer_index_float_constants_ = cbuffer_count_++;
}
if (operand.storage_addressing_mode ==
@ -1600,7 +1616,7 @@ void DxbcShaderTranslator::UpdateExecConditionalsAndEmitDisassembly(
if (type == ParsedExecInstruction::Type::kConditional) {
uint32_t bool_constant_test_temp = PushSystemTemp();
// Check the bool constant value.
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
DxbcOpAnd(DxbcDest::R(bool_constant_test_temp, 0b0001),
@ -1755,7 +1771,7 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction(
// Count (unsigned) in bits 0:7 of the loop constant, initial aL (unsigned) in
// 8:15. Starting from vector 2 because of bool constants.
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
DxbcSrc loop_constant_src(
@ -1843,7 +1859,7 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction(
uint32_t aL_add_temp = PushSystemTemp();
// Extract the value to add to aL (signed, in bits 16:23 of the loop
// constant). Starting from vector 2 because of bool constants.
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
DxbcOpIBFE(DxbcDest::R(aL_add_temp, 0b0001), DxbcSrc::LU(8),
@ -1963,6 +1979,10 @@ const DxbcShaderTranslator::RdefType DxbcShaderTranslator::rdef_types_[size_t(
// kUint4Array48
{nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4,
48, 0, RdefTypeIndex::kUint4, nullptr},
// kUint4DescriptorIndexArray - bindless descriptor indices - size written
// dynamically.
{nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4,
0, 0, RdefTypeIndex::kUint4, nullptr},
};
const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator::
@ -2042,22 +2062,17 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(cbuffer_count_);
// Constant buffer offset (set later).
shader_object_.push_back(0);
// Bound resource count (samplers, SRV, UAV, CBV).
uint32_t resource_count = cbuffer_count_;
if (!is_depth_only_pixel_shader_) {
// + 2 for shared memory SRV and UAV (vfetches can appear in pixel shaders
// too, and the UAV is needed for memexport, however, the choice between
// SRV and UAV is per-pipeline, not per-shader - a resource can't be in a
// read-only state (SRV, IBV) if it's in a read/write state such as UAV).
resource_count +=
uint32_t(sampler_bindings_.size()) + 2 + uint32_t(texture_srvs_.size());
}
if (IsDxbcPixelShader() && edram_rov_used_) {
// EDRAM.
// Bindful resource count.
uint32_t resource_count = srv_count_ + uav_count_ + cbuffer_count_;
if (!sampler_bindings_.empty()) {
if (bindless_resources_used_) {
++resource_count;
} else {
resource_count += uint32_t(sampler_bindings_.size());
}
}
shader_object_.push_back(resource_count);
// Bound resource buffer offset (set later).
// Bindful resource buffer offset (set later).
shader_object_.push_back(0);
if (IsDxbcVertexShader()) {
// vs_5_1
@ -2119,12 +2134,18 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(uint32_t(type.variable_class) |
(uint32_t(type.variable_type) << 16));
shader_object_.push_back(type.row_count | (type.column_count << 16));
if (RdefTypeIndex(i) == RdefTypeIndex::kFloat4ConstantArray) {
switch (RdefTypeIndex(i)) {
case RdefTypeIndex::kFloat4ConstantArray:
// Declaring a 0-sized array may not be safe, so write something valid
// even if they aren't used.
shader_object_.push_back(
std::max(constant_register_map().float_count, uint32_t(1)));
} else {
break;
case RdefTypeIndex::kUint4DescriptorIndexArray:
shader_object_.push_back(std::max(
uint32_t((GetBindlessResourceCount() + 3) >> 2), uint32_t(1)));
break;
default:
shader_object_.push_back(type.element_count |
(type.struct_member_count << 16));
}
@ -2177,33 +2198,37 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
sizeof(uint32_t);
uint32_t constant_name_offsets_system[kSysConst_Count];
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
for (uint32_t i = 0; i < kSysConst_Count; ++i) {
constant_name_offsets_system[i] = new_offset;
new_offset += AppendString(shader_object_, system_constant_rdef_[i].name);
}
}
uint32_t constant_name_offset_float = new_offset;
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_float_constants");
}
uint32_t constant_name_offset_bool = new_offset;
uint32_t constant_name_offset_loop = constant_name_offset_bool;
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
uint32_t constant_name_offset_loop = new_offset;
if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_bool_constants");
constant_name_offset_loop = new_offset;
new_offset += AppendString(shader_object_, "xe_loop_constants");
}
uint32_t constant_name_offset_fetch = new_offset;
if (constant_name_offset_fetch != kCbufferIndexUnallocated) {
if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_fetch_constants");
}
uint32_t constant_name_offset_descriptor_indices = new_offset;
if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_descriptor_indices");
}
const uint32_t constant_size = 10 * sizeof(uint32_t);
// System constants.
uint32_t constant_offset_system = new_offset;
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
uint32_t system_cbuffer_constant_offset = 0;
for (uint32_t i = 0; i < kSysConst_Count; ++i) {
const SystemConstantRdef& constant = system_constant_rdef_[i];
@ -2229,11 +2254,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Float constants.
uint32_t constant_offset_float = new_offset;
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
assert_not_zero(constant_register_map().float_count);
shader_object_.push_back(constant_name_offset_float);
shader_object_.push_back(0);
shader_object_.push_back(
std::max(constant_register_map().float_count, uint32_t(1)) * 4 *
shader_object_.push_back(constant_register_map().float_count * 4 *
sizeof(float));
shader_object_.push_back(kDxbcRdefVariableFlagUsed);
shader_object_.push_back(types_offset +
@ -2249,7 +2274,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Bool and loop constants.
uint32_t constant_offset_bool_loop = new_offset;
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) {
shader_object_.push_back(constant_name_offset_bool);
shader_object_.push_back(0);
shader_object_.push_back(2 * 4 * sizeof(uint32_t));
@ -2279,7 +2304,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Fetch constants.
uint32_t constant_offset_fetch = new_offset;
if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) {
shader_object_.push_back(constant_name_offset_fetch);
shader_object_.push_back(0);
shader_object_.push_back(32 * 6 * sizeof(uint32_t));
@ -2294,6 +2319,26 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
new_offset += constant_size;
}
// Bindless description indices.
uint32_t constant_offset_descriptor_indices = new_offset;
if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) {
assert_not_zero(GetBindlessResourceCount());
shader_object_.push_back(constant_name_offset_descriptor_indices);
shader_object_.push_back(0);
shader_object_.push_back(
xe::align(GetBindlessResourceCount(), uint32_t(4)) * sizeof(uint32_t));
shader_object_.push_back(kDxbcRdefVariableFlagUsed);
shader_object_.push_back(
types_offset +
uint32_t(RdefTypeIndex::kUint4DescriptorIndexArray) * type_size);
shader_object_.push_back(0);
shader_object_.push_back(0xFFFFFFFFu);
shader_object_.push_back(0);
shader_object_.push_back(0xFFFFFFFFu);
shader_object_.push_back(0);
new_offset += constant_size;
}
// ***************************************************************************
// Constant buffers
// ***************************************************************************
@ -2302,21 +2347,25 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
sizeof(uint32_t);
uint32_t cbuffer_name_offset_system = new_offset;
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_system_cbuffer");
}
uint32_t cbuffer_name_offset_float = new_offset;
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_float_cbuffer");
}
uint32_t cbuffer_name_offset_bool_loop = new_offset;
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_bool_loop_cbuffer");
}
uint32_t cbuffer_name_offset_fetch = new_offset;
if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_fetch_cbuffer");
}
uint32_t cbuffer_name_offset_descriptor_indices = new_offset;
if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_descriptor_indices_cbuffer");
}
// Write the offset to the header.
shader_object_[chunk_position_dwords + 1] = new_offset;
@ -2333,11 +2382,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// No D3D_SHADER_CBUFFER_FLAGS.
shader_object_.push_back(0);
} else if (i == cbuffer_index_float_constants_) {
assert_not_zero(constant_register_map().float_count);
shader_object_.push_back(cbuffer_name_offset_float);
shader_object_.push_back(1);
shader_object_.push_back(constant_offset_float);
shader_object_.push_back(
std::max(constant_register_map().float_count, uint32_t(1)) * 4 *
shader_object_.push_back(constant_register_map().float_count * 4 *
sizeof(float));
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
shader_object_.push_back(0);
@ -2356,6 +2405,18 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(32 * 6 * sizeof(uint32_t));
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
shader_object_.push_back(0);
} else if (i == cbuffer_index_descriptor_indices_) {
assert_not_zero(GetBindlessResourceCount());
shader_object_.push_back(cbuffer_name_offset_descriptor_indices);
shader_object_.push_back(1);
shader_object_.push_back(constant_offset_descriptor_indices);
shader_object_.push_back(
xe::align(GetBindlessResourceCount(), uint32_t(4)) *
sizeof(uint32_t));
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
shader_object_.push_back(0);
} else {
assert_unhandled_case(i);
}
}
@ -2367,38 +2428,81 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// their names already.
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
sizeof(uint32_t);
uint32_t sampler_name_offset = 0;
uint32_t shared_memory_srv_name_offset = 0;
uint32_t texture_name_offset = 0;
uint32_t shared_memory_uav_name_offset = 0;
if (!is_depth_only_pixel_shader_) {
sampler_name_offset = new_offset;
uint32_t sampler_name_offset = new_offset;
if (!sampler_bindings_.empty()) {
if (bindless_resources_used_) {
new_offset += AppendString(shader_object_, "xe_samplers");
} else {
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
new_offset +=
AppendString(shader_object_, sampler_bindings_[i].name.c_str());
}
shared_memory_srv_name_offset = new_offset;
new_offset += AppendString(shader_object_, "xe_shared_memory_srv");
texture_name_offset = new_offset;
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str());
}
shared_memory_uav_name_offset = new_offset;
}
uint32_t shared_memory_srv_name_offset = new_offset;
if (srv_index_shared_memory_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_shared_memory_srv");
}
uint32_t bindless_textures_2d_name_offset = new_offset;
uint32_t bindless_textures_3d_name_offset = new_offset;
uint32_t bindless_textures_cube_name_offset = new_offset;
if (bindless_resources_used_) {
if (srv_index_bindless_textures_2d_ != kBindingIndexUnallocated) {
bindless_textures_2d_name_offset = new_offset;
new_offset += AppendString(shader_object_, "xe_textures_2d");
}
if (srv_index_bindless_textures_3d_ != kBindingIndexUnallocated) {
bindless_textures_3d_name_offset = new_offset;
new_offset += AppendString(shader_object_, "xe_textures_3d");
}
if (srv_index_bindless_textures_cube_ != kBindingIndexUnallocated) {
bindless_textures_cube_name_offset = new_offset;
new_offset += AppendString(shader_object_, "xe_textures_cube");
}
} else {
for (TextureBinding& texture_binding : texture_bindings_) {
texture_binding.bindful_srv_rdef_name_offset = new_offset;
new_offset += AppendString(shader_object_, texture_binding.name.c_str());
}
}
uint32_t shared_memory_uav_name_offset = new_offset;
if (uav_index_shared_memory_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_shared_memory_uav");
}
uint32_t edram_name_offset = new_offset;
if (IsDxbcPixelShader() && edram_rov_used_) {
if (uav_index_edram_ != kBindingIndexUnallocated) {
new_offset += AppendString(shader_object_, "xe_edram");
}
// Write the offset to the header.
shader_object_[chunk_position_dwords + 3] = new_offset;
if (!is_depth_only_pixel_shader_) {
// Samplers.
if (!sampler_bindings_.empty()) {
if (bindless_resources_used_) {
// Bindless sampler heap.
shader_object_.push_back(sampler_name_offset);
shader_object_.push_back(uint32_t(DxbcRdefInputType::kSampler));
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid));
shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown));
// Multisampling not applicable.
shader_object_.push_back(0);
// Registers s0:*.
shader_object_.push_back(0);
// Unbounded number of bindings.
shader_object_.push_back(0);
// No DxbcRdefInputFlags.
shader_object_.push_back(0);
// Register space 0.
shader_object_.push_back(0);
// Sampler ID S0.
shader_object_.push_back(0);
} else {
// Bindful samplers.
uint32_t sampler_current_name_offset = sampler_name_offset;
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
const SamplerBinding& sampler_binding = sampler_bindings_[i];
shader_object_.push_back(sampler_name_offset);
shader_object_.push_back(sampler_current_name_offset);
shader_object_.push_back(uint32_t(DxbcRdefInputType::kSampler));
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid));
shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown));
@ -2414,9 +2518,15 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(0);
// Sampler ID S[i].
shader_object_.push_back(i);
sampler_name_offset += GetStringLength(sampler_binding.name.c_str());
sampler_current_name_offset +=
GetStringLength(sampler_binding.name.c_str());
}
}
}
// Shader resource views, sorted by binding index.
for (uint32_t i = 0; i < srv_count_; ++i) {
if (i == srv_index_shared_memory_) {
// Shared memory (when memexport isn't used in the pipeline).
shader_object_.push_back(shared_memory_srv_name_offset);
shader_object_.push_back(uint32_t(DxbcRdefInputType::kByteAddress));
@ -2430,40 +2540,73 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// No DxbcRdefInputFlags.
shader_object_.push_back(0);
shader_object_.push_back(uint32_t(SRVSpace::kMain));
// SRV ID T0.
shader_object_.push_back(0);
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
const TextureSRV& texture_srv = texture_srvs_[i];
} else {
uint32_t texture_name_offset;
DxbcRdefDimension texture_dimension;
uint32_t texture_register;
uint32_t texture_register_count;
SRVSpace texture_register_space;
if (bindless_resources_used_) {
// Bindless texture heap.
if (i == srv_index_bindless_textures_3d_) {
texture_name_offset = bindless_textures_3d_name_offset;
texture_dimension = DxbcRdefDimension::kSRVTexture3D;
texture_register_space = SRVSpace::kBindlessTextures3D;
} else if (i == srv_index_bindless_textures_cube_) {
texture_name_offset = bindless_textures_cube_name_offset;
texture_dimension = DxbcRdefDimension::kSRVTextureCube;
texture_register_space = SRVSpace::kBindlessTexturesCube;
} else {
assert_true(i == srv_index_bindless_textures_2d_);
texture_name_offset = bindless_textures_2d_name_offset;
texture_dimension = DxbcRdefDimension::kSRVTexture2DArray;
texture_register_space = SRVSpace::kBindlessTextures2DArray;
}
texture_register = 0;
texture_register_count = 0;
} else {
// Bindful texture.
auto it = texture_bindings_for_bindful_srv_indices_.find(i);
assert_true(it != texture_bindings_for_bindful_srv_indices_.end());
uint32_t texture_binding_index = it->second;
const TextureBinding& texture_binding =
texture_bindings_[texture_binding_index];
texture_name_offset = texture_binding.bindful_srv_rdef_name_offset;
switch (texture_binding.dimension) {
case TextureDimension::k3D:
texture_dimension = DxbcRdefDimension::kSRVTexture3D;
break;
case TextureDimension::kCube:
texture_dimension = DxbcRdefDimension::kSRVTextureCube;
break;
default:
assert_true(texture_binding.dimension == TextureDimension::k2D);
texture_dimension = DxbcRdefDimension::kSRVTexture2DArray;
}
texture_register = uint32_t(SRVMainRegister::kBindfulTexturesStart) +
texture_binding_index;
texture_register_count = 1;
texture_register_space = SRVSpace::kMain;
}
shader_object_.push_back(texture_name_offset);
shader_object_.push_back(uint32_t(DxbcRdefInputType::kTexture));
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kFloat));
switch (texture_srv.dimension) {
case TextureDimension::k3D:
shader_object_.push_back(uint32_t(DxbcRdefDimension::kSRVTexture3D));
break;
case TextureDimension::kCube:
shader_object_.push_back(
uint32_t(DxbcRdefDimension::kSRVTextureCube));
break;
default:
shader_object_.push_back(
uint32_t(DxbcRdefDimension::kSRVTexture2DArray));
}
shader_object_.push_back(uint32_t(texture_dimension));
// Not multisampled.
shader_object_.push_back(0xFFFFFFFFu);
shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) +
i);
// One binding.
shader_object_.push_back(1);
shader_object_.push_back(texture_register);
shader_object_.push_back(texture_register_count);
// 4-component.
shader_object_.push_back(DxbcRdefInputFlagsComponents);
shader_object_.push_back(uint32_t(SRVSpace::kMain));
// SRV ID T[1 + i] - T0 is shared memory.
shader_object_.push_back(1 + i);
texture_name_offset += GetStringLength(texture_srv.name.c_str());
shader_object_.push_back(uint32_t(texture_register_space));
}
// SRV ID T[i].
shader_object_.push_back(i);
}
// Unordered access views, sorted by binding index.
for (uint32_t i = 0; i < uav_count_; ++i) {
if (i == uav_index_shared_memory_) {
// Shared memory (when memexport is used in the pipeline).
shader_object_.push_back(shared_memory_uav_name_offset);
shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWByteAddress));
@ -2478,12 +2621,8 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(0);
// Register space 0.
shader_object_.push_back(0);
// UAV ID U0.
shader_object_.push_back(0);
}
if (IsDxbcPixelShader() && edram_rov_used_) {
// EDRAM uint32 buffer.
} else if (i == uav_index_edram_) {
// EDRAM R32_UINT buffer.
shader_object_.push_back(edram_name_offset);
shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWTyped));
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kUInt));
@ -2497,8 +2636,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(0);
// Register space 0.
shader_object_.push_back(0);
// UAV ID U1 or U0 depending on whether there's U0.
shader_object_.push_back(ROV_GetEDRAMUAVIndex());
} else {
assert_unhandled_case(i);
}
// UAV ID U[i].
shader_object_.push_back(i);
}
// Constant buffers.
@ -2516,6 +2658,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
} else if (i == cbuffer_index_fetch_constants_) {
shader_object_.push_back(cbuffer_name_offset_fetch);
register_index = uint32_t(CbufferRegister::kFetchConstants);
} else if (i == cbuffer_index_descriptor_indices_) {
shader_object_.push_back(cbuffer_name_offset_descriptor_indices);
register_index = uint32_t(CbufferRegister::kDescriptorIndices);
} else {
assert_unhandled_case(i);
}
shader_object_.push_back(uint32_t(DxbcRdefInputType::kCbuffer));
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid));
@ -3180,7 +3327,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
// Constant buffers, from most frequenly accessed to least frequently accessed
// (the order is a hint to the driver according to the DXBC header).
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
assert_not_zero(constant_register_map().float_count);
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
@ -3196,7 +3344,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(constant_register_map().float_count);
shader_object_.push_back(0);
}
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
@ -3210,7 +3358,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back((sizeof(SystemConstants) + 15) >> 4);
shader_object_.push_back(0);
}
if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
@ -3224,7 +3372,22 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(48);
shader_object_.push_back(0);
}
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) {
assert_not_zero(GetBindlessResourceCount());
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_object_.push_back(cbuffer_index_descriptor_indices_);
shader_object_.push_back(uint32_t(CbufferRegister::kDescriptorIndices));
shader_object_.push_back(uint32_t(CbufferRegister::kDescriptorIndices));
shader_object_.push_back((GetBindlessResourceCount() + 3) >> 2);
shader_object_.push_back(0);
}
if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
@ -3239,8 +3402,22 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(0);
}
if (!is_depth_only_pixel_shader_) {
// Samplers.
if (!sampler_bindings_.empty()) {
if (bindless_resources_used_) {
// Bindless sampler heap.
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) |
ENCODE_D3D10_SB_SAMPLER_MODE(D3D10_SB_SAMPLER_MODE_DEFAULT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 3));
shader_object_.push_back(0);
shader_object_.push_back(0);
shader_object_.push_back(UINT32_MAX);
shader_object_.push_back(0);
} else {
// Bindful samplers.
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
const SamplerBinding& sampler_binding = sampler_bindings_[i];
shader_object_.push_back(
@ -3254,23 +3431,50 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(i);
shader_object_.push_back(0);
}
}
}
// Shader resources.
// Shader resource views, sorted by binding index.
for (uint32_t i = 0; i < srv_count_; ++i) {
if (i == srv_index_shared_memory_) {
// Shared memory ByteAddressBuffer.
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_RAW) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3));
shader_object_.push_back(0);
shader_object_.push_back(srv_index_shared_memory_);
shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory));
shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory));
shader_object_.push_back(uint32_t(SRVSpace::kMain));
// Textures.
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
const TextureSRV& texture_srv = texture_srvs_[i];
} else {
// Texture or texture heap.
D3D10_SB_RESOURCE_DIMENSION texture_srv_dimension;
switch (texture_srv.dimension) {
uint32_t texture_register_first, texture_register_last;
SRVSpace texture_register_space;
if (bindless_resources_used_) {
// Bindless texture heap.
texture_register_first = 0;
texture_register_last = UINT32_MAX;
if (i == srv_index_bindless_textures_3d_) {
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D;
texture_register_space = SRVSpace::kBindlessTextures3D;
} else if (i == srv_index_bindless_textures_cube_) {
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE;
texture_register_space = SRVSpace::kBindlessTexturesCube;
} else {
assert_true(i == srv_index_bindless_textures_2d_);
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY;
texture_register_space = SRVSpace::kBindlessTextures2DArray;
}
} else {
// Bindful texture.
auto it = texture_bindings_for_bindful_srv_indices_.find(i);
assert_true(it != texture_bindings_for_bindful_srv_indices_.end());
uint32_t texture_binding_index = it->second;
const TextureBinding& texture_binding =
texture_bindings_[texture_binding_index];
switch (texture_binding.dimension) {
case TextureDimension::k3D:
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D;
break;
@ -3278,31 +3482,35 @@ void DxbcShaderTranslator::WriteShaderCode() {
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE;
break;
default:
assert_true(texture_binding.dimension == TextureDimension::k2D);
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY;
}
texture_register_first = texture_register_last =
uint32_t(SRVMainRegister::kBindfulTexturesStart) +
texture_binding_index;
texture_register_space = SRVSpace::kMain;
}
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) |
ENCODE_D3D10_SB_RESOURCE_DIMENSION(texture_srv_dimension) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3));
// T0 is shared memory.
shader_object_.push_back(1 + i);
shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) +
i);
shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) +
i);
shader_object_.push_back(i);
shader_object_.push_back(texture_register_first);
shader_object_.push_back(texture_register_last);
shader_object_.push_back(
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 0) |
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 1) |
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 2) |
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 3));
shader_object_.push_back(uint32_t(SRVSpace::kMain));
shader_object_.push_back(uint32_t(texture_register_space));
}
}
// Unordered access views.
if (!is_depth_only_pixel_shader_) {
// Unordered access views, sorted by binding index.
for (uint32_t i = 0; i < uav_count_; ++i) {
if (i == uav_index_shared_memory_) {
// Shared memory RWByteAddressBuffer.
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
@ -3310,22 +3518,22 @@ void DxbcShaderTranslator::WriteShaderCode() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
shader_object_.push_back(0);
shader_object_.push_back(uav_index_shared_memory_);
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
shader_object_.push_back(0);
}
if (IsDxbcPixelShader() && edram_rov_used_) {
// EDRAM uint32 rasterizer-ordered buffer.
} else if (i == uav_index_edram_) {
// EDRAM buffer R32_UINT rasterizer-ordered view.
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) |
ENCODE_D3D10_SB_RESOURCE_DIMENSION(D3D10_SB_RESOURCE_DIMENSION_BUFFER) |
ENCODE_D3D10_SB_RESOURCE_DIMENSION(
D3D10_SB_RESOURCE_DIMENSION_BUFFER) |
D3D11_SB_RASTERIZER_ORDERED_ACCESS |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
shader_object_.push_back(ROV_GetEDRAMUAVIndex());
shader_object_.push_back(uav_index_edram_);
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
shader_object_.push_back(
@ -3334,6 +3542,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 2) |
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 3));
shader_object_.push_back(0);
} else {
assert_unhandled_case(i);
}
}
// Inputs and outputs.

View File

@ -101,8 +101,8 @@ namespace gpu {
// 0 for NaN.
class DxbcShaderTranslator : public ShaderTranslator {
public:
DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used,
bool force_emit_source_map = false);
DxbcShaderTranslator(uint32_t vendor_id, bool bindless_resources_used,
bool edram_rov_used, bool force_emit_source_map = false);
~DxbcShaderTranslator() override;
// Constant buffer bindings in space 0.
@ -111,6 +111,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
kFloatConstants,
kBoolLoopConstants,
kFetchConstants,
kDescriptorIndices,
};
// Some are referenced in xenos_draw.hlsli - check it too when updating!
@ -331,30 +332,39 @@ class DxbcShaderTranslator : public ShaderTranslator {
enum class SRVSpace {
// SRVMainSpaceRegister t# layout.
kMain,
kBindlessTextures2DArray,
kBindlessTextures3D,
kBindlessTexturesCube,
};
// Shader resource view bindings in SRVSpace::kMain.
enum class SRVMainRegister {
kSharedMemory,
kBoundTexturesStart,
kBindfulTexturesStart,
};
// 192 textures at most because there are 32 fetch constants, and textures can
// be 2D array, 3D or cube, and also signed and unsigned.
static constexpr uint32_t kMaxTextureSRVIndexBits = 8;
static constexpr uint32_t kMaxTextureSRVs =
(1 << kMaxTextureSRVIndexBits) - 1;
struct TextureSRV {
static constexpr uint32_t kMaxTextureBindingIndexBits = 8;
static constexpr uint32_t kMaxTextureBindings =
(1 << kMaxTextureBindingIndexBits) - 1;
struct TextureBinding {
uint32_t bindful_srv_index;
// Temporary for WriteResourceDefinitions.
uint32_t bindful_srv_rdef_name_offset;
uint32_t bindless_descriptor_index;
uint32_t fetch_constant;
// Stacked and 3D are separate TextureBindings, even for bindless for null
// descriptor handling simplicity.
TextureDimension dimension;
bool is_signed;
std::string name;
};
// The first binding returned is at t[SRVMainRegister::kBoundTexturesStart]
// The first binding returned is at t[SRVMainRegister::kBindfulTexturesStart]
// of space SRVSpace::kMain.
const TextureSRV* GetTextureSRVs(uint32_t& count_out) const {
count_out = uint32_t(texture_srvs_.size());
return texture_srvs_.data();
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
count_out = uint32_t(texture_bindings_.size());
return texture_bindings_.data();
}
// Arbitrary limit - there can't be more than 2048 in a shader-visible
@ -369,6 +379,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
static constexpr uint32_t kMaxSamplerBindings =
(1 << kMaxSamplerBindingIndexBits) - 1;
struct SamplerBinding {
uint32_t bindless_descriptor_index;
uint32_t fetch_constant;
TextureFilter mag_filter;
TextureFilter min_filter;
@ -381,6 +392,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
return sampler_bindings_.data();
}
// Returns the number of texture SRV and sampler offsets that need to be
// passed via a constant buffer to the shader.
uint32_t GetBindlessResourceCount() const {
return uint32_t(texture_bindings_.size() + sampler_bindings_.size());
}
// Unordered access view bindings in space 0.
enum class UAVRegister {
kSharedMemory,
@ -2144,11 +2161,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
uint32_t piece_temp_component, uint32_t accumulator_temp,
uint32_t accumulator_temp_component);
inline uint32_t ROV_GetEDRAMUAVIndex() const {
// xe_edram is U1 when there's xe_shared_memory_uav which is U0, but when
// there's no xe_shared_memory_uav, it's U0.
return is_depth_only_pixel_shader_ ? 0 : 1;
}
// Whether it's possible and worth skipping running the translated shader for
// 2x2 quads.
bool ROV_IsDepthStencilEarly() const {
@ -2328,9 +2340,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
void CloseInstructionPredication();
void JumpToLabel(uint32_t address);
DxbcSrc FindOrAddTextureSRV(uint32_t fetch_constant,
uint32_t FindOrAddTextureBinding(uint32_t fetch_constant,
TextureDimension dimension, bool is_signed);
DxbcSrc FindOrAddSamplerBinding(uint32_t fetch_constant,
uint32_t FindOrAddSamplerBinding(uint32_t fetch_constant,
TextureFilter mag_filter,
TextureFilter min_filter,
TextureFilter mip_filter,
@ -2340,7 +2352,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
// constant.
DxbcSrc RequestTextureFetchConstantWordPair(uint32_t fetch_constant_index,
uint32_t pair_index) {
if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) {
if (cbuffer_index_fetch_constants_ == kBindingIndexUnallocated) {
cbuffer_index_fetch_constants_ = cbuffer_count_++;
}
uint32_t total_pair_index = fetch_constant_index * 3 + pair_index;
@ -2392,6 +2404,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Vendor ID of the GPU manufacturer, for toggling unsupported features.
uint32_t vendor_id_;
// Whether textures and samplers should be bindless.
bool bindless_resources_used_;
// Whether the output merger should be emulated in pixel shaders.
bool edram_rov_used_;
@ -2422,6 +2437,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
kUint4Array8,
// Fetch constants.
kUint4Array48,
// Descriptor indices - size written dynamically.
kUint4DescriptorIndexArray,
kCount,
kUnknown = kCount
@ -2448,14 +2465,16 @@ class DxbcShaderTranslator : public ShaderTranslator {
};
static const RdefType rdef_types_[size_t(RdefTypeIndex::kCount)];
static constexpr uint32_t kBindingIndexUnallocated = UINT32_MAX;
// Number of constant buffer bindings used in this shader - also used for
// generation of indices of constant buffers that are optional.
uint32_t cbuffer_count_;
static constexpr uint32_t kCbufferIndexUnallocated = UINT32_MAX;
uint32_t cbuffer_index_system_constants_;
uint32_t cbuffer_index_float_constants_;
uint32_t cbuffer_index_bool_loop_constants_;
uint32_t cbuffer_index_fetch_constants_;
uint32_t cbuffer_index_descriptor_indices_;
struct SystemConstantRdef {
const char* name;
@ -2582,7 +2601,24 @@ class DxbcShaderTranslator : public ShaderTranslator {
// predicate condition anymore.
bool cf_exec_predicate_written_;
std::vector<TextureSRV> texture_srvs_;
// Number of SRV resources used in this shader - also used for generation of
// indices of SRV resources that are optional.
uint32_t srv_count_;
uint32_t srv_index_shared_memory_;
uint32_t srv_index_bindless_textures_2d_;
uint32_t srv_index_bindless_textures_3d_;
uint32_t srv_index_bindless_textures_cube_;
std::vector<TextureBinding> texture_bindings_;
std::unordered_map<uint32_t, uint32_t>
texture_bindings_for_bindful_srv_indices_;
// Number of UAV resources used in this shader - also used for generation of
// indices of UAV resources that are optional.
uint32_t uav_count_;
uint32_t uav_index_shared_memory_;
uint32_t uav_index_edram_;
std::vector<SamplerBinding> sampler_bindings_;
// Number of `alloc export`s encountered so far in the translation. The index

View File

@ -44,7 +44,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
// Create a 2-component DxbcSrc for the fetch constant (vf0 is in [0].xy of
// the fetch constants array, vf1 is in [0].zw, vf2 is in [1].xy).
if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) {
if (cbuffer_index_fetch_constants_ == kBindingIndexUnallocated) {
cbuffer_index_fetch_constants_ = cbuffer_count_++;
}
DxbcSrc fetch_constant_src(DxbcSrc::CB(
@ -135,13 +135,21 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
.Select(kSysConst_Flags_Comp),
DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV));
DxbcOpIf(false, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX));
if (srv_index_shared_memory_ == kBindingIndexUnallocated) {
srv_index_shared_memory_ = srv_count_++;
}
if (uav_index_shared_memory_ == kBindingIndexUnallocated) {
uav_index_shared_memory_ = uav_count_++;
}
for (uint32_t i = 0; i < 2; ++i) {
if (i) {
DxbcOpElse();
}
DxbcSrc shared_memory_src(
i ? DxbcSrc::U(0, uint32_t(UAVRegister::kSharedMemory))
: DxbcSrc::T(0, uint32_t(SRVMainRegister::kSharedMemory)));
i ? DxbcSrc::U(uav_index_shared_memory_,
uint32_t(UAVRegister::kSharedMemory))
: DxbcSrc::T(srv_index_shared_memory_,
uint32_t(SRVMainRegister::kSharedMemory)));
uint32_t needed_words_remaining = needed_words;
uint32_t word_index_previous = first_word_index;
while (needed_words_remaining) {
@ -438,7 +446,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
StoreResult(instr.result, DxbcSrc::R(system_temp_result_));
}
DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddTextureSRV(
uint32_t DxbcShaderTranslator::FindOrAddTextureBinding(
uint32_t fetch_constant, TextureDimension dimension, bool is_signed) {
// 1D and 2D textures (including stacked ones) are treated as 2D arrays for
// binding and coordinate simplicity.
@ -446,24 +454,34 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddTextureSRV(
dimension = TextureDimension::k2D;
}
uint32_t srv_index = UINT32_MAX;
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
TextureSRV& texture_srv = texture_srvs_[i];
if (texture_srv.fetch_constant == fetch_constant &&
texture_srv.dimension == dimension &&
texture_srv.is_signed == is_signed) {
srv_index = i;
break;
for (uint32_t i = 0; i < uint32_t(texture_bindings_.size()); ++i) {
TextureBinding& texture_binding = texture_bindings_[i];
if (texture_binding.fetch_constant == fetch_constant &&
texture_binding.dimension == dimension &&
texture_binding.is_signed == is_signed) {
return i;
}
}
if (srv_index == UINT32_MAX) {
if (texture_srvs_.size() >= kMaxTextureSRVs) {
if (texture_bindings_.size() >= kMaxTextureBindings) {
assert_always();
srv_index = kMaxTextureSRVs - 1;
return kMaxTextureBindings - 1;
}
uint32_t texture_binding_index = uint32_t(texture_bindings_.size());
TextureBinding new_texture_binding;
if (!bindless_resources_used_) {
new_texture_binding.bindful_srv_index = srv_count_++;
texture_bindings_for_bindful_srv_indices_.insert(
{new_texture_binding.bindful_srv_index, texture_binding_index});
} else {
TextureSRV new_texture_srv;
new_texture_srv.fetch_constant = fetch_constant;
new_texture_srv.dimension = dimension;
new_texture_srv.is_signed = is_signed;
new_texture_binding.bindful_srv_index = kBindingIndexUnallocated;
}
new_texture_binding.bindful_srv_rdef_name_offset = 0;
// Consistently 0 if not bindless as it may be used for hashing.
new_texture_binding.bindless_descriptor_index =
bindless_resources_used_ ? GetBindlessResourceCount() : 0;
new_texture_binding.fetch_constant = fetch_constant;
new_texture_binding.dimension = dimension;
new_texture_binding.is_signed = is_signed;
const char* dimension_name;
switch (dimension) {
case TextureDimension::k3D:
@ -475,18 +493,13 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddTextureSRV(
default:
dimension_name = "2d";
}
new_texture_srv.name = fmt::format("xe_texture{}_{}_{}", fetch_constant,
new_texture_binding.name = fmt::format("xe_texture{}_{}_{}", fetch_constant,
dimension_name, is_signed ? 's' : 'u');
srv_index = uint32_t(texture_srvs_.size());
texture_srvs_.emplace_back(std::move(new_texture_srv));
}
}
// T0 is shared memory.
return DxbcSrc::T(1 + srv_index,
uint32_t(SRVMainRegister::kBoundTexturesStart) + srv_index);
texture_bindings_.emplace_back(std::move(new_texture_binding));
return texture_binding_index;
}
DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddSamplerBinding(
uint32_t DxbcShaderTranslator::FindOrAddSamplerBinding(
uint32_t fetch_constant, TextureFilter mag_filter, TextureFilter min_filter,
TextureFilter mip_filter, AnisoFilter aniso_filter) {
// In Direct3D 12, anisotropic filtering implies linear filtering.
@ -505,15 +518,13 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddSamplerBinding(
sampler_binding.min_filter == min_filter &&
sampler_binding.mip_filter == mip_filter &&
sampler_binding.aniso_filter == aniso_filter) {
sampler_index = i;
break;
return i;
}
}
if (sampler_index == UINT32_MAX) {
if (sampler_bindings_.size() >= kMaxSamplerBindings) {
assert_always();
sampler_index = kMaxSamplerBindings - 1;
} else {
return kMaxSamplerBindings - 1;
}
std::ostringstream name;
name << "xe_sampler" << fetch_constant;
if (aniso_filter != AnisoFilter::kUseFetchConst) {
@ -531,17 +542,18 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddSamplerBinding(
<< kFilterSuffixes[uint32_t(mip_filter)];
}
SamplerBinding new_sampler_binding;
// Consistently 0 if not bindless as it may be used for hashing.
new_sampler_binding.bindless_descriptor_index =
bindless_resources_used_ ? GetBindlessResourceCount() : 0;
new_sampler_binding.fetch_constant = fetch_constant;
new_sampler_binding.mag_filter = mag_filter;
new_sampler_binding.min_filter = min_filter;
new_sampler_binding.mip_filter = mip_filter;
new_sampler_binding.aniso_filter = aniso_filter;
new_sampler_binding.name = name.str();
sampler_index = uint32_t(sampler_bindings_.size());
uint32_t sampler_binding_index = uint32_t(sampler_bindings_.size());
sampler_bindings_.emplace_back(std::move(new_sampler_binding));
}
}
return DxbcSrc::S(sampler_index, sampler_index);
return sampler_binding_index;
}
void DxbcShaderTranslator::ProcessTextureFetchInstruction(
@ -893,7 +905,6 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
LoadOperand(instr.operands[0], used_result_nonzero_components,
coord_operand_temp_pushed);
DxbcSrc coord_src(coord_operand);
uint32_t coord_temp = UINT32_MAX;
uint32_t offsets_needed = offsets_not_zero & used_result_nonzero_components;
if (!instr.attributes.unnormalized_coordinates || offsets_needed) {
// Using system_temp_result_ as a temporary for coordinate denormalization
@ -948,7 +959,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// - 1D, 2D array - need to be padded to 2D array coordinates.
// - 3D - Z needs to be unnormalized for stacked and normalized for 3D.
// - Cube - coordinates need to be transformed into the cube space.
uint32_t coord_temp = PushSystemTemp();
// Bindless sampler index will be loaded to W after loading the coordinates
// (so W can be used as a temporary for coordinate loading).
uint32_t coord_and_sampler_temp = PushSystemTemp();
// Need normalized coordinates (except for Z - keep it as is, will be
// converted later according to whether the texture is 3D). For cube maps,
@ -978,50 +991,53 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
normalized_components);
if (offsets_not_zero & normalized_components) {
// FIXME(Triang3l): Offsets need to be applied at the LOD being fetched.
DxbcOpAdd(DxbcDest::R(coord_temp, normalized_components), coord_operand,
DxbcSrc::LP(offsets));
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, normalized_components),
coord_operand, DxbcSrc::LP(offsets));
assert_not_zero(normalized_components & 0b011);
DxbcOpDiv(DxbcDest::R(coord_temp, normalized_components & 0b011),
DxbcSrc::R(coord_temp), DxbcSrc::R(size_and_is_3d_temp));
DxbcOpDiv(
DxbcDest::R(coord_and_sampler_temp, normalized_components & 0b011),
DxbcSrc::R(coord_and_sampler_temp),
DxbcSrc::R(size_and_is_3d_temp));
if (instr.dimension == TextureDimension::k3D) {
// Normalize if 3D.
assert_true((size_needed_components & 0b1100) == 0b1100);
DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW));
DxbcOpDiv(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ),
DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ));
DxbcOpEndIf();
}
} else {
DxbcOpDiv(DxbcDest::R(coord_temp, normalized_components), coord_operand,
DxbcSrc::R(size_and_is_3d_temp));
DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, normalized_components),
coord_operand, DxbcSrc::R(size_and_is_3d_temp));
if (instr.dimension == TextureDimension::k3D) {
// Don't normalize if stacked.
assert_true((size_needed_components & 0b1000) == 0b1000);
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
coord_operand.SelectFromSwizzled(2));
}
}
} else {
// Normalized coordinates - apply offsets to XY or copy them to
// coord_temp, and if stacked, denormalize Z.
// coord_and_sampler_temp, and if stacked, denormalize Z.
uint32_t coords_with_offset = offsets_not_zero & normalized_components;
if (coords_with_offset) {
// FIXME(Triang3l): Offsets need to be applied at the LOD being fetched.
assert_true((size_needed_components & coords_with_offset) ==
coords_with_offset);
DxbcOpDiv(DxbcDest::R(coord_temp, coords_with_offset),
DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, coords_with_offset),
DxbcSrc::LP(offsets), DxbcSrc::R(size_and_is_3d_temp));
DxbcOpAdd(DxbcDest::R(coord_temp, coords_with_offset), coord_operand,
DxbcSrc::R(coord_temp));
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, coords_with_offset),
coord_operand, DxbcSrc::R(coord_and_sampler_temp));
}
uint32_t coords_without_offset =
~coords_with_offset & normalized_components;
// 3D/stacked without offset is handled separately.
if (coords_without_offset & 0b011) {
DxbcOpMov(DxbcDest::R(coord_temp, coords_without_offset & 0b011),
DxbcOpMov(
DxbcDest::R(coord_and_sampler_temp, coords_without_offset & 0b011),
coord_operand);
}
if (instr.dimension == TextureDimension::k3D) {
@ -1030,73 +1046,79 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Denormalize and offset Z (re-apply the offset not to lose precision
// as a result of division) if stacked.
DxbcOpIf(false, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW));
DxbcOpMAd(DxbcDest::R(coord_temp, 0b0100),
DxbcOpMAd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
coord_operand.SelectFromSwizzled(2),
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ),
DxbcSrc::LF(offsets[2]));
DxbcOpEndIf();
} else {
// Denormalize Z if stacked, and revert to normalized if 3D.
DxbcOpMul(DxbcDest::R(coord_temp, 0b0100),
DxbcOpMul(DxbcDest::R(coord_and_sampler_temp, 0b0100),
coord_operand.SelectFromSwizzled(2),
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ));
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW),
coord_operand.SelectFromSwizzled(2),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
}
}
}
switch (instr.dimension) {
case TextureDimension::k1D:
// Pad to 2D array coordinates.
DxbcOpMov(DxbcDest::R(coord_temp, 0b0110), DxbcSrc::LF(0.0f));
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0110),
DxbcSrc::LF(0.0f));
break;
case TextureDimension::k2D:
// Pad to 2D array coordinates.
DxbcOpMov(DxbcDest::R(coord_temp, 0b0100), DxbcSrc::LF(0.0f));
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::LF(0.0f));
break;
case TextureDimension::kCube: {
// Transform from the major axis SC/TC plus 1 into cube coordinates.
// Move SC/TC from 1...2 to -1...1.
DxbcOpMAd(DxbcDest::R(coord_temp, 0b0011), DxbcSrc::R(coord_temp),
DxbcSrc::LF(2.0f), DxbcSrc::LF(-3.0f));
DxbcOpMAd(DxbcDest::R(coord_and_sampler_temp, 0b0011),
DxbcSrc::R(coord_and_sampler_temp), DxbcSrc::LF(2.0f),
DxbcSrc::LF(-3.0f));
// Get the face index (floored, within 0...5) as an integer to
// coord_temp.z.
// coord_and_sampler_temp.z.
if (offsets[2]) {
DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100),
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
coord_operand.SelectFromSwizzled(2),
DxbcSrc::LF(offsets[2]));
DxbcOpFToU(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
DxbcOpFToU(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
} else {
DxbcOpFToU(DxbcDest::R(coord_temp, 0b0100),
DxbcOpFToU(DxbcDest::R(coord_and_sampler_temp, 0b0100),
coord_operand.SelectFromSwizzled(2));
}
DxbcOpUMin(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), DxbcSrc::LU(5));
DxbcOpUMin(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
DxbcSrc::LU(5));
// Split the face index into axis and sign (0 - positive, 1 - negative)
// to coord_temp.zw (sign in W so it won't be overwritten).
DxbcOpUBFE(DxbcDest::R(coord_temp, 0b1100), DxbcSrc::LU(0, 0, 2, 1),
DxbcSrc::LU(0, 0, 1, 0),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
// to coord_and_sampler_temp.zw (sign in W so it won't be overwritten).
// Fine to overwrite W at this point, the sampler index hasn't been
// loaded yet.
DxbcOpUBFE(DxbcDest::R(coord_and_sampler_temp, 0b1100),
DxbcSrc::LU(0, 0, 2, 1), DxbcSrc::LU(0, 0, 1, 0),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
// Remap the axes in a way opposite to the ALU cube instruction.
DxbcOpSwitch(DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
DxbcOpSwitch(DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
DxbcOpCase(DxbcSrc::LU(0));
{
// X is the major axis.
// Y = -TC (TC overwritten).
DxbcOpMov(DxbcDest::R(coord_temp, 0b0010),
-DxbcSrc::R(coord_temp, DxbcSrc::kYYYY));
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0010),
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY));
// Z = neg ? SC : -SC.
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW),
DxbcSrc::R(coord_temp, DxbcSrc::kXXXX),
-DxbcSrc::R(coord_temp, DxbcSrc::kXXXX));
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX),
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX));
// X = neg ? -1 : 1 (SC overwritten).
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0001),
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f),
DxbcSrc::LF(1.0f));
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0001),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f));
}
DxbcOpBreak();
DxbcOpCase(DxbcSrc::LU(1));
@ -1104,31 +1126,31 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Y is the major axis.
// X = SC (already there).
// Z = neg ? -TC : TC.
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW),
-DxbcSrc::R(coord_temp, DxbcSrc::kYYYY),
DxbcSrc::R(coord_temp, DxbcSrc::kYYYY));
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY));
// Y = neg ? -1 : 1 (TC overwritten).
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0010),
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f),
DxbcSrc::LF(1.0f));
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0010),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f));
}
DxbcOpBreak();
DxbcOpDefault();
{
// Z is the major axis.
// X = neg ? -SC : SC (SC overwritten).
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0001),
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW),
-DxbcSrc::R(coord_temp, DxbcSrc::kXXXX),
DxbcSrc::R(coord_temp, DxbcSrc::kXXXX));
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0001),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX));
// Y = -TC (TC overwritten).
DxbcOpMov(DxbcDest::R(coord_temp, 0b0010),
-DxbcSrc::R(coord_temp, DxbcSrc::kYYYY));
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0010),
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY));
// Z = neg ? -1 : 1.
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f),
DxbcSrc::LF(1.0f));
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f));
}
DxbcOpBreak();
DxbcOpEndSwitch();
@ -1145,10 +1167,26 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// since the return value can be used with bias later, forcing linear mip
// filtering (the XNA assembler also doesn't accept MipFilter overrides
// for getCompTexLOD).
DxbcSrc sampler(FindOrAddSamplerBinding(
uint32_t sampler_binding_index = FindOrAddSamplerBinding(
tfetch_index, instr.attributes.mag_filter,
instr.attributes.min_filter, TextureFilter::kLinear,
instr.attributes.aniso_filter));
instr.attributes.aniso_filter);
DxbcSrc sampler(DxbcSrc::S(sampler_binding_index, sampler_binding_index));
if (bindless_resources_used_) {
// Load the sampler index to coord_and_sampler_temp.w and use relative
// sampler indexing.
if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) {
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
}
uint32_t sampler_bindless_descriptor_index =
sampler_bindings_[sampler_binding_index].bindless_descriptor_index;
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b1000),
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
uint32_t(CbufferRegister::kDescriptorIndices),
sampler_bindless_descriptor_index >> 2)
.Select(sampler_bindless_descriptor_index & 3));
sampler = DxbcSrc::S(0, DxbcIndex(coord_and_sampler_temp, 3));
}
// Check which SRV needs to be accessed - signed or unsigned. If there is
// at least one non-signed component, will be using the unsigned one.
uint32_t is_unsigned_temp = PushSystemTemp();
@ -1158,9 +1196,89 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
DxbcOpINE(DxbcDest::R(is_unsigned_temp, 0b0001),
DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX),
DxbcSrc::LU(uint32_t(TextureSign::kSigned) * 0b01010101));
if (bindless_resources_used_) {
// Bindless path - select the SRV index between unsigned and signed to
// query.
if (instr.dimension == TextureDimension::k3D) {
// Check if 3D.
assert_true((size_needed_components & 0b1000) == 0b1000);
DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW));
}
for (uint32_t is_stacked = 0;
is_stacked < (instr.dimension == TextureDimension::k3D ? 2u : 1u);
++is_stacked) {
TextureDimension srv_dimension = instr.dimension;
if (is_stacked) {
srv_dimension = TextureDimension::k2D;
DxbcOpElse();
}
uint32_t texture_binding_index_unsigned =
FindOrAddTextureBinding(tfetch_index, srv_dimension, false);
uint32_t texture_binding_index_signed =
FindOrAddTextureBinding(tfetch_index, srv_dimension, true);
uint32_t texture_bindless_descriptor_index_unsigned =
texture_bindings_[texture_binding_index_unsigned]
.bindless_descriptor_index;
uint32_t texture_bindless_descriptor_index_signed =
texture_bindings_[texture_binding_index_signed]
.bindless_descriptor_index;
if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) {
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
}
DxbcOpMovC(
DxbcDest::R(is_unsigned_temp, 0b0001),
DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX),
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
uint32_t(CbufferRegister::kDescriptorIndices),
texture_bindless_descriptor_index_unsigned >> 2)
.Select(texture_bindless_descriptor_index_unsigned & 3),
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
uint32_t(CbufferRegister::kDescriptorIndices),
texture_bindless_descriptor_index_signed >> 2)
.Select(texture_bindless_descriptor_index_signed & 3));
// Always 3 coordinate components (1D and 2D are padded to 2D
// arrays, 3D and cube have 3 coordinate dimensions). Not caring
// about normalization of the array layer because it doesn't
// participate in LOD calculation in Direct3D 12.
// The `lod` instruction returns the unclamped LOD (probably need
// unclamped so it can be biased back into the range later) in the Y
// component, and the resource swizzle is the return value swizzle.
// FIXME(Triang3l): Gradient exponent adjustment from the fetch
// constant needs to be applied here, would require math involving
// SV_Position parity, replacing coordinates for one pixel with 0
// and for another with the adjusted gradient, but possibly not used
// by any games.
assert_true(used_result_nonzero_components == 0b0001);
uint32_t* bindless_srv_index = nullptr;
switch (srv_dimension) {
case TextureDimension::k1D:
case TextureDimension::k2D:
bindless_srv_index = &srv_index_bindless_textures_2d_;
break;
case TextureDimension::k3D:
bindless_srv_index = &srv_index_bindless_textures_3d_;
break;
case TextureDimension::kCube:
bindless_srv_index = &srv_index_bindless_textures_cube_;
break;
}
assert_not_null(bindless_srv_index);
if (*bindless_srv_index == kBindingIndexUnallocated) {
*bindless_srv_index = srv_count_++;
}
DxbcOpLOD(DxbcDest::R(system_temp_result_, 0b0001),
DxbcSrc::R(coord_and_sampler_temp), 3,
DxbcSrc::T(*bindless_srv_index,
DxbcIndex(is_unsigned_temp, 0), DxbcSrc::kYYYY),
sampler);
}
if (instr.dimension == TextureDimension::k3D) {
// Close the 3D/stacked check.
DxbcOpEndIf();
}
} else {
// Bindful path - conditionally query one of the SRVs.
DxbcOpIf(true, DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX));
// Release is_unsigned_temp.
PopSystemTemp();
for (uint32_t is_signed = 0; is_signed < 2; ++is_signed) {
if (is_signed) {
DxbcOpElse();
@ -1171,30 +1289,25 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW));
}
for (uint32_t is_stacked = 0;
is_stacked < (instr.dimension == TextureDimension::k3D ? 2u : 1u);
is_stacked <
(instr.dimension == TextureDimension::k3D ? 2u : 1u);
++is_stacked) {
if (is_stacked) {
DxbcOpElse();
}
// Always 3 coordinate components (1D and 2D are padded to 2D arrays,
// 3D and cube have 3 coordinate dimensions). Not caring about
// normalization of the array layer because it doesn't participate in
// LOD calculation in Direct3D 12.
// The `lod` instruction returns the unclamped LOD (probably need
// unclamped so it can be biased back into the range later) in the Y
// component, and the resource swizzle is the return value swizzle.
// FIXME(Triang3l): Gradient exponent adjustment from the fetch
// constant needs to be applied here, would require SV_Position.xy & 1
// math, replacing coordinates for one pixel with 0 and for another
// with the adjusted gradient, but possibly not used by any games.
assert_true(used_result_nonzero_components == 0b0001);
DxbcOpLOD(DxbcDest::R(system_temp_result_, 0b0001),
DxbcSrc::R(coord_temp), 3,
FindOrAddTextureSRV(
uint32_t texture_binding_index = FindOrAddTextureBinding(
tfetch_index,
is_stacked ? TextureDimension::k2D : instr.dimension,
is_signed != 0)
.Select(1),
is_signed != 0);
DxbcOpLOD(
DxbcDest::R(system_temp_result_, 0b0001),
DxbcSrc::R(coord_and_sampler_temp), 3,
DxbcSrc::T(
texture_bindings_[texture_binding_index].bindful_srv_index,
uint32_t(SRVMainRegister::kBindfulTexturesStart) +
texture_binding_index,
DxbcSrc::kYYYY),
sampler);
}
if (instr.dimension == TextureDimension::k3D) {
@ -1204,6 +1317,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
}
// Close the signedness check.
DxbcOpEndIf();
}
// Release is_unsigned_temp.
PopSystemTemp();
} else {
// - Gradients or LOD to be passed to the sample_d/sample_l.
@ -1322,11 +1438,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
} else {
// Coarse is according to the Direct3D 11.3 specification.
DxbcOpDerivRTXCoarse(DxbcDest::R(grad_h_lod_temp, grad_mask),
DxbcSrc::R(coord_temp));
DxbcSrc::R(coord_and_sampler_temp));
DxbcOpMul(DxbcDest::R(grad_h_lod_temp, grad_mask),
DxbcSrc::R(grad_h_lod_temp), lod_src);
DxbcOpDerivRTYCoarse(DxbcDest::R(grad_v_temp, grad_mask),
DxbcSrc::R(coord_temp));
DxbcSrc::R(coord_and_sampler_temp));
// FIXME(Triang3l): Gradient exponent adjustment is currently not
// done in getCompTexLOD, so don't do it here too.
#if 0
@ -1357,11 +1473,27 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// doesn't allow mixing anisotropic and point filtering. Possibly
// anistropic filtering should be disabled when explicit LOD is used - do
// this here.
DxbcSrc sampler(FindOrAddSamplerBinding(
uint32_t sampler_binding_index = FindOrAddSamplerBinding(
tfetch_index, instr.attributes.mag_filter,
instr.attributes.min_filter, instr.attributes.mip_filter,
use_computed_lod ? instr.attributes.aniso_filter
: AnisoFilter::kDisabled));
: AnisoFilter::kDisabled);
DxbcSrc sampler(DxbcSrc::S(sampler_binding_index, sampler_binding_index));
if (bindless_resources_used_) {
// Load the sampler index to coord_and_sampler_temp.w and use relative
// sampler indexing.
if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) {
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
}
uint32_t sampler_bindless_descriptor_index =
sampler_bindings_[sampler_binding_index].bindless_descriptor_index;
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b1000),
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
uint32_t(CbufferRegister::kDescriptorIndices),
sampler_bindless_descriptor_index >> 2)
.Select(sampler_bindless_descriptor_index & 3));
sampler = DxbcSrc::S(0, DxbcIndex(coord_and_sampler_temp, 3));
}
// Break result register dependencies because textures will be sampled
// conditionally, including the primary signs.
@ -1389,9 +1521,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// - srv_selection_temp.z - if stacked and not forced to be point-sampled,
// the lerp factor between two layers, wrapped by layer_lerp_factor_src
// with l(0.0) fallback for the point sampling case.
// - srv_selection_temp.w - scratch for calculations involving these.
// - srv_selection_temp.w - first, scratch for calculations involving
// these, then, unsigned or signed SRV description index.
DxbcSrc layer_lerp_factor_src(DxbcSrc::LF(0.0f));
uint32_t srv_selection_temp = UINT32_MAX;
// W is always needed for bindless.
uint32_t srv_selection_temp =
bindless_resources_used_ ? PushSystemTemp() : UINT32_MAX;
if (instr.dimension == TextureDimension::k3D) {
bool vol_mag_filter_is_fetch_const =
instr.attributes.vol_mag_filter == TextureFilter::kUseFetchConst;
@ -1469,10 +1604,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
}
// For linear filtering, subtract 0.5 from the coordinates and store
// the lerp factor. Flooring will be done later.
DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), DxbcSrc::LF(-0.5f));
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
DxbcSrc::LF(-0.5f));
DxbcOpFrc(DxbcDest::R(srv_selection_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
// Close the linear check.
DxbcOpEndIf();
// Close the stacked check.
@ -1505,11 +1641,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
}
// For linear filtering, subtract 0.5 from the coordinates and store
// the lerp factor. Flooring will be done later.
DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ),
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
DxbcSrc::LF(-0.5f));
DxbcOpFrc(DxbcDest::R(srv_selection_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
if (vol_mag_filter_is_fetch_const) {
// Close the fetch constant linear filtering mode check.
DxbcOpEndIf();
@ -1578,13 +1714,50 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// for the layer index, but on the Xbox 360, addressing is similar to
// that of 3D textures). This is needed for both point and linear
// filtering (with linear, 0.5 was subtracted previously).
DxbcOpRoundNI(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
DxbcOpRoundNI(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
}
uint32_t texture_binding_index_unsigned =
FindOrAddTextureBinding(tfetch_index, srv_dimension, false);
const TextureBinding& texture_binding_unsigned =
texture_bindings_[texture_binding_index_unsigned];
uint32_t texture_binding_index_signed =
FindOrAddTextureBinding(tfetch_index, srv_dimension, true);
const TextureBinding& texture_binding_signed =
texture_bindings_[texture_binding_index_signed];
DxbcSrc srv_unsigned(DxbcSrc::LF(0.0f)), srv_signed(DxbcSrc::LF(0.0f));
if (bindless_resources_used_) {
uint32_t* bindless_srv_index = nullptr;
switch (srv_dimension) {
case TextureDimension::k1D:
case TextureDimension::k2D:
bindless_srv_index = &srv_index_bindless_textures_2d_;
break;
case TextureDimension::k3D:
bindless_srv_index = &srv_index_bindless_textures_3d_;
break;
case TextureDimension::kCube:
bindless_srv_index = &srv_index_bindless_textures_cube_;
break;
}
assert_not_null(bindless_srv_index);
if (*bindless_srv_index == kBindingIndexUnallocated) {
*bindless_srv_index = srv_count_++;
}
assert_true(srv_selection_temp != UINT32_MAX);
srv_unsigned =
DxbcSrc::T(*bindless_srv_index, DxbcIndex(srv_selection_temp, 3));
srv_signed = srv_unsigned;
} else {
srv_unsigned =
DxbcSrc::T(texture_binding_unsigned.bindful_srv_index,
uint32_t(SRVMainRegister::kBindfulTexturesStart) +
texture_binding_index_unsigned);
srv_signed =
DxbcSrc::T(texture_binding_signed.bindful_srv_index,
uint32_t(SRVMainRegister::kBindfulTexturesStart) +
texture_binding_index_signed);
}
DxbcSrc srv_unsigned(
FindOrAddTextureSRV(tfetch_index, srv_dimension, false));
DxbcSrc srv_signed(
FindOrAddTextureSRV(tfetch_index, srv_dimension, true));
for (uint32_t layer = 0; layer < (layer_lerp_needed ? 2u : 1u);
++layer) {
uint32_t layer_value_temp = system_temp_result_;
@ -1596,8 +1769,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// If the lerp factor is not zero, sample the next layer.
DxbcOpIf(true, DxbcSrc::R(layer_value_temp, DxbcSrc::kXXXX));
// Go to the next layer.
DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100),
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ),
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
DxbcSrc::LF(1.0f));
}
// Always 3 coordinate components (1D and 2D are padded to 2D arrays,
@ -1605,17 +1778,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
DxbcOpIf(false, is_all_signed_src);
{
// Sample the unsigned texture.
if (bindless_resources_used_) {
// Load the unsigned texture descriptor index.
assert_true(srv_selection_temp != UINT32_MAX);
if (cbuffer_index_descriptor_indices_ ==
kBindingIndexUnallocated) {
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
}
uint32_t texture_bindless_descriptor_index =
texture_binding_unsigned.bindless_descriptor_index;
DxbcOpMov(
DxbcDest::R(srv_selection_temp, 0b1000),
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
uint32_t(CbufferRegister::kDescriptorIndices),
texture_bindless_descriptor_index >> 2)
.Select(texture_bindless_descriptor_index & 3));
}
if (grad_v_temp != UINT32_MAX) {
assert_not_zero(grad_component_count);
DxbcOpSampleD(
DxbcDest::R(layer_value_temp, used_result_nonzero_components),
DxbcSrc::R(coord_temp), 3, srv_unsigned, sampler,
DxbcSrc::R(coord_and_sampler_temp), 3, srv_unsigned, sampler,
DxbcSrc::R(grad_h_lod_temp), DxbcSrc::R(grad_v_temp),
srv_grad_component_count);
} else {
DxbcOpSampleL(
DxbcDest::R(layer_value_temp, used_result_nonzero_components),
DxbcSrc::R(coord_temp), 3, srv_unsigned, sampler, lod_src);
DxbcSrc::R(coord_and_sampler_temp), 3, srv_unsigned, sampler,
lod_src);
}
}
DxbcOpEndIf();
@ -1623,17 +1813,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
{
// Sample the signed texture.
uint32_t signed_temp = PushSystemTemp();
if (bindless_resources_used_) {
// Load the signed texture descriptor index.
assert_true(srv_selection_temp != UINT32_MAX);
if (cbuffer_index_descriptor_indices_ ==
kBindingIndexUnallocated) {
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
}
uint32_t texture_bindless_descriptor_index =
texture_binding_signed.bindless_descriptor_index;
DxbcOpMov(
DxbcDest::R(srv_selection_temp, 0b1000),
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
uint32_t(CbufferRegister::kDescriptorIndices),
texture_bindless_descriptor_index >> 2)
.Select(texture_bindless_descriptor_index & 3));
}
if (grad_v_temp != UINT32_MAX) {
assert_not_zero(grad_component_count);
DxbcOpSampleD(
DxbcDest::R(signed_temp, used_result_nonzero_components),
DxbcSrc::R(coord_temp), 3, srv_signed, sampler,
DxbcSrc::R(coord_and_sampler_temp), 3, srv_signed, sampler,
DxbcSrc::R(grad_h_lod_temp), DxbcSrc::R(grad_v_temp),
srv_grad_component_count);
} else {
DxbcOpSampleL(
DxbcDest::R(signed_temp, used_result_nonzero_components),
DxbcSrc::R(coord_temp), 3, srv_signed, sampler, lod_src);
DxbcSrc::R(coord_and_sampler_temp), 3, srv_signed, sampler,
lod_src);
}
DxbcOpMovC(
DxbcDest::R(layer_value_temp, used_result_nonzero_components),
@ -1680,7 +1887,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
}
}
// Release coord_temp.
// Release coord_and_sampler_temp.
PopSystemTemp();
// Apply the bias and gamma correction (gamma is after filtering here,

View File

@ -435,8 +435,12 @@ void DxbcShaderTranslator::ExportToMemory() {
DxbcOpSwitch(element_size_src);
for (uint32_t k = 1; k <= 4; k <<= 1) {
DxbcOpCase(DxbcSrc::LU(k * 4));
if (uav_index_shared_memory_ == kBindingIndexUnallocated) {
uav_index_shared_memory_ = uav_count_++;
}
DxbcOpStoreRaw(
DxbcDest::U(0, uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1),
DxbcDest::U(uav_index_shared_memory_,
uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1),
address_src, eM_src);
DxbcOpBreak();
}

View File

@ -1575,8 +1575,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
DxbcOpIf(true, temp_x_src);
{
// Write the new depth/stencil.
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpStoreUAVTyped(
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
DxbcSrc::R(system_temp_rov_depth_stencil_).Select(i));
}
@ -1955,10 +1958,13 @@ void DxbcShaderTranslator::
// Load the old depth/stencil value to VGPR [0].z.
// VGPR [0].x = new depth
// VGPR [0].z = old depth/stencil
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpLdUAVTyped(DxbcDest::R(system_temps_subroutine_, 0b0100),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
DxbcSrc::U(ROV_GetEDRAMUAVIndex(),
uint32_t(UAVRegister::kEDRAM), DxbcSrc::kXXXX));
DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM),
DxbcSrc::kXXXX));
// Extract the old depth part to VGPR [0].w.
// VGPR [0].x = new depth
// VGPR [0].z = old depth/stencil
@ -2398,8 +2404,11 @@ void DxbcShaderTranslator::
// Write the new depth/stencil.
// VGPR [0].x = new depth/stencil
// VGPR [0].y = depth/stencil test failure
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpStoreUAVTyped(
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX));
if (depth_stencil_early) {
@ -2499,10 +2508,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
// Load the lower 32 bits of the 64bpp color to VGPR [0].z.
// VGPRs [0].xy - packed source color/alpha if not blending.
// VGPR [0].z - lower 32 bits of the packed color.
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpLdUAVTyped(
DxbcDest::R(system_temps_subroutine_, 0b0100),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), 1,
DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM),
DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM),
DxbcSrc::kXXXX));
// Get the address of the upper 32 bits of the color to VGPR [0].w.
// VGPRs [0].xy - packed source color/alpha if not blending.
@ -2514,10 +2526,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
// Load the upper 32 bits of the 64bpp color to VGPR [0].w.
// VGPRs [0].xy - packed source color/alpha if not blending.
// VGPRs [0].zw - packed destination color/alpha.
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpLdUAVTyped(
DxbcDest::R(system_temps_subroutine_, 0b1000),
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kWWWW), 1,
DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM),
DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM),
DxbcSrc::kXXXX));
}
// The color is 32bpp.
@ -2526,10 +2541,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
// Load the 32bpp color to VGPR [0].z.
// VGPRs [0].xy - packed source color/alpha if not blending.
// VGPR [0].z - packed 32bpp destination color.
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpLdUAVTyped(
DxbcDest::R(system_temps_subroutine_, 0b0100),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), 1,
DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM),
DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM),
DxbcSrc::kXXXX));
// Break register dependency in VGPR [0].w if the color is 32bpp.
// VGPRs [0].xy - packed source color/alpha if not blending.
@ -3276,8 +3294,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
DxbcOpIf(true, DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kZZZZ));
{
// Store the lower 32 bits of the 64bpp color.
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpStoreUAVTyped(
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), 1,
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX));
// Get the address of the upper 32 bits of the color to VGPR [0].z (can't
@ -3289,8 +3310,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW),
DxbcSrc::LU(1));
// Store the upper 32 bits of the 64bpp color.
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpStoreUAVTyped(
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kZZZZ), 1,
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY));
}
@ -3298,8 +3322,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
DxbcOpElse();
{
// Store the 32bpp color.
if (uav_index_edram_ == kBindingIndexUnallocated) {
uav_index_edram_ = uav_count_++;
}
DxbcOpStoreUAVTyped(
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), 1,
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX));
}

View File

@ -41,6 +41,8 @@ DEFINE_string(
"[vertex or unspecified, linedomaincp, linedomainpatch, triangledomaincp, "
"triangledomainpatch, quaddomaincp, quaddomainpatch].",
"GPU");
DEFINE_bool(shader_output_bindless_resources, false,
"Output host shader with bindless resources used.", "GPU");
DEFINE_bool(shader_output_dxbc_rov, false,
"Output ROV-based output-merger code in DXBC pixel shaders.",
"GPU");
@ -109,7 +111,8 @@ int shader_compiler_main(const std::vector<std::string>& args) {
} else if (cvars::shader_output_type == "dxbc" ||
cvars::shader_output_type == "dxbctext") {
translator = std::make_unique<DxbcShaderTranslator>(
0, cvars::shader_output_dxbc_rov);
0, cvars::shader_output_bindless_resources,
cvars::shader_output_dxbc_rov);
} else {
translator = std::make_unique<UcodeShaderTranslator>();
}

View File

@ -327,19 +327,22 @@ bool D3D12Provider::Initialize() {
// Check if optional features are supported.
rasterizer_ordered_views_supported_ = false;
tiled_resources_tier_ = 0;
resource_binding_tier_ = D3D12_RESOURCE_BINDING_TIER_1;
tiled_resources_tier_ = D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED;
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS,
&options, sizeof(options)))) {
rasterizer_ordered_views_supported_ = options.ROVsSupported ? true : false;
tiled_resources_tier_ = uint32_t(options.TiledResourcesTier);
resource_binding_tier_ = options.ResourceBindingTier;
tiled_resources_tier_ = options.TiledResourcesTier;
}
programmable_sample_positions_tier_ = 0;
programmable_sample_positions_tier_ =
D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED;
D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2;
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2,
&options2, sizeof(options2)))) {
programmable_sample_positions_tier_ =
uint32_t(options2.ProgrammableSamplePositionsTier);
options2.ProgrammableSamplePositionsTier;
}
virtual_address_bits_per_resource_ = 0;
D3D12_FEATURE_DATA_GPU_VIRTUAL_ADDRESS_SUPPORT virtual_address_support;
@ -349,14 +352,17 @@ bool D3D12Provider::Initialize() {
virtual_address_bits_per_resource_ =
virtual_address_support.MaxGPUVirtualAddressBitsPerResource;
}
XELOGD3D("Direct3D 12 device features:");
XELOGD3D("* Max GPU virtual address bits per resource: {}",
virtual_address_bits_per_resource_);
XELOGD3D("* Programmable sample positions: tier {}",
programmable_sample_positions_tier_);
XELOGD3D("* Rasterizer-ordered views: {}",
rasterizer_ordered_views_supported_ ? "yes" : "no");
XELOGD3D("* Tiled resources: tier {}", tiled_resources_tier_);
XELOGD3D(
"Direct3D 12 device features:\n"
"Max GPU virtual address bits per resource: {}\n"
"Programmable sample positions: tier {}\n"
"Rasterizer-ordered views: {}\n"
"Resource binding: tier {}\n"
"Tiled resources: tier {}\n",
virtual_address_bits_per_resource_,
uint32_t(programmable_sample_positions_tier_),
rasterizer_ordered_views_supported_ ? "yes" : "no",
uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_));
// Get the graphics analysis interface, will silently fail if PIX is not
// attached.

View File

@ -68,13 +68,19 @@ class D3D12Provider : public GraphicsProvider {
uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; }
// Device features.
uint32_t GetProgrammableSamplePositionsTier() const {
D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER
GetProgrammableSamplePositionsTier() const {
return programmable_sample_positions_tier_;
}
bool AreRasterizerOrderedViewsSupported() const {
return rasterizer_ordered_views_supported_;
}
uint32_t GetTiledResourcesTier() const { return tiled_resources_tier_; }
D3D12_RESOURCE_BINDING_TIER GetResourceBindingTier() const {
return resource_binding_tier_;
}
D3D12_TILED_RESOURCES_TIER GetTiledResourcesTier() const {
return tiled_resources_tier_;
}
uint32_t GetVirtualAddressBitsPerResource() const {
return virtual_address_bits_per_resource_;
}
@ -128,9 +134,10 @@ class D3D12Provider : public GraphicsProvider {
uint32_t adapter_vendor_id_;
uint32_t programmable_sample_positions_tier_;
D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER programmable_sample_positions_tier_;
bool rasterizer_ordered_views_supported_;
uint32_t tiled_resources_tier_;
D3D12_RESOURCE_BINDING_TIER resource_binding_tier_;
D3D12_TILED_RESOURCES_TIER tiled_resources_tier_;
uint32_t virtual_address_bits_per_resource_;
};

View File

@ -10,6 +10,8 @@
#ifndef XENIA_UI_D3D12_D3D12_UTIL_H_
#define XENIA_UI_D3D12_D3D12_UTIL_H_
#include <utility>
#include "xenia/ui/d3d12/d3d12_provider.h"
namespace xe {
@ -17,6 +19,9 @@ namespace ui {
namespace d3d12 {
namespace util {
using DescriptorCPUGPUHandlePair =
std::pair<D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_GPU_DESCRIPTOR_HANDLE>;
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesDefault;
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload;
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback;